00001 /* 00002 ** 2004 May 22 00003 ** 00004 ** The author disclaims copyright to this source code. In place of 00005 ** a legal notice, here is a blessing: 00006 ** 00007 ** May you do good and not evil. 00008 ** May you find forgiveness for yourself and forgive others. 00009 ** May you share freely, never taking more than you give. 00010 ** 00011 ****************************************************************************** 00012 ** 00013 ** This file contains code that is specific to Unix systems. 00014 ** 00015 ** $Id: os_unix.c,v 1.209 2008/11/11 18:34:35 danielk1977 Exp $ 00016 */ 00017 #include "sqliteInt.h" 00018 #if SQLITE_OS_UNIX /* This file is used on unix only */ 00019 00020 /* 00021 ** If SQLITE_ENABLE_LOCKING_STYLE is defined and is non-zero, then several 00022 ** alternative locking implementations are provided: 00023 ** 00024 ** * POSIX locking (the default), 00025 ** * No locking, 00026 ** * Dot-file locking, 00027 ** * flock() locking, 00028 ** * AFP locking (OSX only). 00029 ** 00030 ** SQLITE_ENABLE_LOCKING_STYLE only works on a Mac. It is turned on by 00031 ** default on a Mac and disabled on all other posix platforms. 00032 */ 00033 #if !defined(SQLITE_ENABLE_LOCKING_STYLE) 00034 # if defined(__DARWIN__) 00035 # define SQLITE_ENABLE_LOCKING_STYLE 1 00036 # else 00037 # define SQLITE_ENABLE_LOCKING_STYLE 0 00038 # endif 00039 #endif 00040 00041 /* 00042 ** These #defines should enable >2GB file support on Posix if the 00043 ** underlying operating system supports it. If the OS lacks 00044 ** large file support, these should be no-ops. 00045 ** 00046 ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch 00047 ** on the compiler command line. This is necessary if you are compiling 00048 ** on a recent machine (ex: RedHat 7.2) but you want your code to work 00049 ** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2 00050 ** without this option, LFS is enable. But LFS does not exist in the kernel 00051 ** in RedHat 6.0, so the code won't work. Hence, for maximum binary 00052 ** portability you should omit LFS. 00053 */ 00054 #ifndef SQLITE_DISABLE_LFS 00055 # define _LARGE_FILE 1 00056 # ifndef _FILE_OFFSET_BITS 00057 # define _FILE_OFFSET_BITS 64 00058 # endif 00059 # define _LARGEFILE_SOURCE 1 00060 #endif 00061 00062 /* 00063 ** standard include files. 00064 */ 00065 #include <sys/types.h> 00066 #include <sys/stat.h> 00067 #include <fcntl.h> 00068 #include <unistd.h> 00069 #include <time.h> 00070 #include <sys/time.h> 00071 #include <errno.h> 00072 00073 #if SQLITE_ENABLE_LOCKING_STYLE 00074 #include <sys/ioctl.h> 00075 #include <sys/param.h> 00076 #include <sys/mount.h> 00077 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 00078 00079 /* 00080 ** If we are to be thread-safe, include the pthreads header and define 00081 ** the SQLITE_UNIX_THREADS macro. 00082 */ 00083 #if SQLITE_THREADSAFE 00084 # include <pthread.h> 00085 # define SQLITE_UNIX_THREADS 1 00086 #endif 00087 00088 /* 00089 ** Default permissions when creating a new file 00090 */ 00091 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS 00092 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 00093 #endif 00094 00095 /* 00096 ** Maximum supported path-length. 00097 */ 00098 #define MAX_PATHNAME 512 00099 00100 00101 /* 00102 ** The unixFile structure is subclass of sqlite3_file specific for the unix 00103 ** protability layer. 00104 */ 00105 typedef struct unixFile unixFile; 00106 struct unixFile { 00107 sqlite3_io_methods const *pMethod; /* Always the first entry */ 00108 #ifdef SQLITE_TEST 00109 /* In test mode, increase the size of this structure a bit so that 00110 ** it is larger than the struct CrashFile defined in test6.c. 00111 */ 00112 char aPadding[32]; 00113 #endif 00114 struct openCnt *pOpen; /* Info about all open fd's on this inode */ 00115 struct lockInfo *pLock; /* Info about locks on this inode */ 00116 #if SQLITE_ENABLE_LOCKING_STYLE 00117 void *lockingContext; /* Locking style specific state */ 00118 #endif 00119 int h; /* The file descriptor */ 00120 unsigned char locktype; /* The type of lock held on this fd */ 00121 int dirfd; /* File descriptor for the directory */ 00122 #if SQLITE_THREADSAFE 00123 pthread_t tid; /* The thread that "owns" this unixFile */ 00124 #endif 00125 int lastErrno; /* The unix errno from the last I/O error */ 00126 }; 00127 00128 /* 00129 ** Include code that is common to all os_*.c files 00130 */ 00131 #include "os_common.h" 00132 00133 /* 00134 ** Define various macros that are missing from some systems. 00135 */ 00136 #ifndef O_LARGEFILE 00137 # define O_LARGEFILE 0 00138 #endif 00139 #ifdef SQLITE_DISABLE_LFS 00140 # undef O_LARGEFILE 00141 # define O_LARGEFILE 0 00142 #endif 00143 #ifndef O_NOFOLLOW 00144 # define O_NOFOLLOW 0 00145 #endif 00146 #ifndef O_BINARY 00147 # define O_BINARY 0 00148 #endif 00149 00150 /* 00151 ** The DJGPP compiler environment looks mostly like Unix, but it 00152 ** lacks the fcntl() system call. So redefine fcntl() to be something 00153 ** that always succeeds. This means that locking does not occur under 00154 ** DJGPP. But it is DOS - what did you expect? 00155 */ 00156 #ifdef __DJGPP__ 00157 # define fcntl(A,B,C) 0 00158 #endif 00159 00160 /* 00161 ** The threadid macro resolves to the thread-id or to 0. Used for 00162 ** testing and debugging only. 00163 */ 00164 #if SQLITE_THREADSAFE 00165 #define threadid pthread_self() 00166 #else 00167 #define threadid 0 00168 #endif 00169 00170 /* 00171 ** Set or check the unixFile.tid field. This field is set when an unixFile 00172 ** is first opened. All subsequent uses of the unixFile verify that the 00173 ** same thread is operating on the unixFile. Some operating systems do 00174 ** not allow locks to be overridden by other threads and that restriction 00175 ** means that sqlite3* database handles cannot be moved from one thread 00176 ** to another. This logic makes sure a user does not try to do that 00177 ** by mistake. 00178 ** 00179 ** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to 00180 ** another as long as we are running on a system that supports threads 00181 ** overriding each others locks (which now the most common behavior) 00182 ** or if no locks are held. But the unixFile.pLock field needs to be 00183 ** recomputed because its key includes the thread-id. See the 00184 ** transferOwnership() function below for additional information 00185 */ 00186 #if SQLITE_THREADSAFE 00187 # define SET_THREADID(X) (X)->tid = pthread_self() 00188 # define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \ 00189 !pthread_equal((X)->tid, pthread_self())) 00190 #else 00191 # define SET_THREADID(X) 00192 # define CHECK_THREADID(X) 0 00193 #endif 00194 00195 /* 00196 ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996) 00197 ** section 6.5.2.2 lines 483 through 490 specify that when a process 00198 ** sets or clears a lock, that operation overrides any prior locks set 00199 ** by the same process. It does not explicitly say so, but this implies 00200 ** that it overrides locks set by the same process using a different 00201 ** file descriptor. Consider this test case: 00202 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 00203 ** 00204 ** Suppose ./file1 and ./file2 are really the same file (because 00205 ** one is a hard or symbolic link to the other) then if you set 00206 ** an exclusive lock on fd1, then try to get an exclusive lock 00207 ** on fd2, it works. I would have expected the second lock to 00208 ** fail since there was already a lock on the file due to fd1. 00209 ** But not so. Since both locks came from the same process, the 00210 ** second overrides the first, even though they were on different 00211 ** file descriptors opened on different file names. 00212 ** 00213 ** Bummer. If you ask me, this is broken. Badly broken. It means 00214 ** that we cannot use POSIX locks to synchronize file access among 00215 ** competing threads of the same process. POSIX locks will work fine 00216 ** to synchronize access for threads in separate processes, but not 00217 ** threads within the same process. 00218 ** 00219 ** To work around the problem, SQLite has to manage file locks internally 00220 ** on its own. Whenever a new database is opened, we have to find the 00221 ** specific inode of the database file (the inode is determined by the 00222 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 00223 ** and check for locks already existing on that inode. When locks are 00224 ** created or removed, we have to look at our own internal record of the 00225 ** locks to see if another thread has previously set a lock on that same 00226 ** inode. 00227 ** 00228 ** The sqlite3_file structure for POSIX is no longer just an integer file 00229 ** descriptor. It is now a structure that holds the integer file 00230 ** descriptor and a pointer to a structure that describes the internal 00231 ** locks on the corresponding inode. There is one locking structure 00232 ** per inode, so if the same inode is opened twice, both unixFile structures 00233 ** point to the same locking structure. The locking structure keeps 00234 ** a reference count (so we will know when to delete it) and a "cnt" 00235 ** field that tells us its internal lock status. cnt==0 means the 00236 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 00237 ** cnt>0 means there are cnt shared locks on the file. 00238 ** 00239 ** Any attempt to lock or unlock a file first checks the locking 00240 ** structure. The fcntl() system call is only invoked to set a 00241 ** POSIX lock if the internal lock structure transitions between 00242 ** a locked and an unlocked state. 00243 ** 00244 ** 2004-Jan-11: 00245 ** More recent discoveries about POSIX advisory locks. (The more 00246 ** I discover, the more I realize the a POSIX advisory locks are 00247 ** an abomination.) 00248 ** 00249 ** If you close a file descriptor that points to a file that has locks, 00250 ** all locks on that file that are owned by the current process are 00251 ** released. To work around this problem, each unixFile structure contains 00252 ** a pointer to an openCnt structure. There is one openCnt structure 00253 ** per open inode, which means that multiple unixFile can point to a single 00254 ** openCnt. When an attempt is made to close an unixFile, if there are 00255 ** other unixFile open on the same inode that are holding locks, the call 00256 ** to close() the file descriptor is deferred until all of the locks clear. 00257 ** The openCnt structure keeps a list of file descriptors that need to 00258 ** be closed and that list is walked (and cleared) when the last lock 00259 ** clears. 00260 ** 00261 ** First, under Linux threads, because each thread has a separate 00262 ** process ID, lock operations in one thread do not override locks 00263 ** to the same file in other threads. Linux threads behave like 00264 ** separate processes in this respect. But, if you close a file 00265 ** descriptor in linux threads, all locks are cleared, even locks 00266 ** on other threads and even though the other threads have different 00267 ** process IDs. Linux threads is inconsistent in this respect. 00268 ** (I'm beginning to think that linux threads is an abomination too.) 00269 ** The consequence of this all is that the hash table for the lockInfo 00270 ** structure has to include the process id as part of its key because 00271 ** locks in different threads are treated as distinct. But the 00272 ** openCnt structure should not include the process id in its 00273 ** key because close() clears lock on all threads, not just the current 00274 ** thread. Were it not for this goofiness in linux threads, we could 00275 ** combine the lockInfo and openCnt structures into a single structure. 00276 ** 00277 ** 2004-Jun-28: 00278 ** On some versions of linux, threads can override each others locks. 00279 ** On others not. Sometimes you can change the behavior on the same 00280 ** system by setting the LD_ASSUME_KERNEL environment variable. The 00281 ** POSIX standard is silent as to which behavior is correct, as far 00282 ** as I can tell, so other versions of unix might show the same 00283 ** inconsistency. There is no little doubt in my mind that posix 00284 ** advisory locks and linux threads are profoundly broken. 00285 ** 00286 ** To work around the inconsistencies, we have to test at runtime 00287 ** whether or not threads can override each others locks. This test 00288 ** is run once, the first time any lock is attempted. A static 00289 ** variable is set to record the results of this test for future 00290 ** use. 00291 */ 00292 00293 /* 00294 ** An instance of the following structure serves as the key used 00295 ** to locate a particular lockInfo structure given its inode. 00296 ** 00297 ** If threads cannot override each others locks, then we set the 00298 ** lockKey.tid field to the thread ID. If threads can override 00299 ** each others locks then tid is always set to zero. tid is omitted 00300 ** if we compile without threading support. 00301 */ 00302 struct lockKey { 00303 dev_t dev; /* Device number */ 00304 ino_t ino; /* Inode number */ 00305 #if SQLITE_THREADSAFE 00306 pthread_t tid; /* Thread ID or zero if threads can override each other */ 00307 #endif 00308 }; 00309 00310 /* 00311 ** An instance of the following structure is allocated for each open 00312 ** inode on each thread with a different process ID. (Threads have 00313 ** different process IDs on linux, but not on most other unixes.) 00314 ** 00315 ** A single inode can have multiple file descriptors, so each unixFile 00316 ** structure contains a pointer to an instance of this object and this 00317 ** object keeps a count of the number of unixFile pointing to it. 00318 */ 00319 struct lockInfo { 00320 struct lockKey key; /* The lookup key */ 00321 int cnt; /* Number of SHARED locks held */ 00322 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 00323 int nRef; /* Number of pointers to this structure */ 00324 struct lockInfo *pNext, *pPrev; /* List of all lockInfo objects */ 00325 }; 00326 00327 /* 00328 ** An instance of the following structure serves as the key used 00329 ** to locate a particular openCnt structure given its inode. This 00330 ** is the same as the lockKey except that the thread ID is omitted. 00331 */ 00332 struct openKey { 00333 dev_t dev; /* Device number */ 00334 ino_t ino; /* Inode number */ 00335 }; 00336 00337 /* 00338 ** An instance of the following structure is allocated for each open 00339 ** inode. This structure keeps track of the number of locks on that 00340 ** inode. If a close is attempted against an inode that is holding 00341 ** locks, the close is deferred until all locks clear by adding the 00342 ** file descriptor to be closed to the pending list. 00343 */ 00344 struct openCnt { 00345 struct openKey key; /* The lookup key */ 00346 int nRef; /* Number of pointers to this structure */ 00347 int nLock; /* Number of outstanding locks */ 00348 int nPending; /* Number of pending close() operations */ 00349 int *aPending; /* Malloced space holding fd's awaiting a close() */ 00350 struct openCnt *pNext, *pPrev; /* List of all openCnt objects */ 00351 }; 00352 00353 /* 00354 ** List of all lockInfo and openCnt objects. This used to be a hash 00355 ** table. But the number of objects is rarely more than a dozen and 00356 ** never exceeds a few thousand. And lookup is not on a critical 00357 ** path oo a simple linked list will suffice. 00358 */ 00359 static struct lockInfo *lockList = 0; 00360 static struct openCnt *openList = 0; 00361 00362 /* 00363 ** The locking styles are associated with the different file locking 00364 ** capabilities supported by different file systems. 00365 ** 00366 ** POSIX locking style fully supports shared and exclusive byte-range locks 00367 ** AFP locking only supports exclusive byte-range locks 00368 ** FLOCK only supports a single file-global exclusive lock 00369 ** DOTLOCK isn't a true locking style, it refers to the use of a special 00370 ** file named the same as the database file with a '.lock' extension, this 00371 ** can be used on file systems that do not offer any reliable file locking 00372 ** NO locking means that no locking will be attempted, this is only used for 00373 ** read-only file systems currently 00374 ** UNSUPPORTED means that no locking will be attempted, this is only used for 00375 ** file systems that are known to be unsupported 00376 */ 00377 #define LOCKING_STYLE_POSIX 1 00378 #define LOCKING_STYLE_NONE 2 00379 #define LOCKING_STYLE_DOTFILE 3 00380 #define LOCKING_STYLE_FLOCK 4 00381 #define LOCKING_STYLE_AFP 5 00382 00383 /* 00384 ** Only set the lastErrno if the error code is a real error and not 00385 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK 00386 */ 00387 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) 00388 00389 /* 00390 ** Helper functions to obtain and relinquish the global mutex. 00391 */ 00392 static void enterMutex(void){ 00393 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); 00394 } 00395 static void leaveMutex(void){ 00396 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); 00397 } 00398 00399 #if SQLITE_THREADSAFE 00400 /* 00401 ** This variable records whether or not threads can override each others 00402 ** locks. 00403 ** 00404 ** 0: No. Threads cannot override each others locks. 00405 ** 1: Yes. Threads can override each others locks. 00406 ** -1: We don't know yet. 00407 ** 00408 ** On some systems, we know at compile-time if threads can override each 00409 ** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro 00410 ** will be set appropriately. On other systems, we have to check at 00411 ** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is 00412 ** undefined. 00413 ** 00414 ** This variable normally has file scope only. But during testing, we make 00415 ** it a global so that the test code can change its value in order to verify 00416 ** that the right stuff happens in either case. 00417 */ 00418 #ifndef SQLITE_THREAD_OVERRIDE_LOCK 00419 # define SQLITE_THREAD_OVERRIDE_LOCK -1 00420 #endif 00421 #ifdef SQLITE_TEST 00422 int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK; 00423 #else 00424 static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK; 00425 #endif 00426 00427 /* 00428 ** This structure holds information passed into individual test 00429 ** threads by the testThreadLockingBehavior() routine. 00430 */ 00431 struct threadTestData { 00432 int fd; /* File to be locked */ 00433 struct flock lock; /* The locking operation */ 00434 int result; /* Result of the locking operation */ 00435 }; 00436 00437 #ifdef SQLITE_LOCK_TRACE 00438 /* 00439 ** Print out information about all locking operations. 00440 ** 00441 ** This routine is used for troubleshooting locks on multithreaded 00442 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE 00443 ** command-line option on the compiler. This code is normally 00444 ** turned off. 00445 */ 00446 static int lockTrace(int fd, int op, struct flock *p){ 00447 char *zOpName, *zType; 00448 int s; 00449 int savedErrno; 00450 if( op==F_GETLK ){ 00451 zOpName = "GETLK"; 00452 }else if( op==F_SETLK ){ 00453 zOpName = "SETLK"; 00454 }else{ 00455 s = fcntl(fd, op, p); 00456 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); 00457 return s; 00458 } 00459 if( p->l_type==F_RDLCK ){ 00460 zType = "RDLCK"; 00461 }else if( p->l_type==F_WRLCK ){ 00462 zType = "WRLCK"; 00463 }else if( p->l_type==F_UNLCK ){ 00464 zType = "UNLCK"; 00465 }else{ 00466 assert( 0 ); 00467 } 00468 assert( p->l_whence==SEEK_SET ); 00469 s = fcntl(fd, op, p); 00470 savedErrno = errno; 00471 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", 00472 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, 00473 (int)p->l_pid, s); 00474 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ 00475 struct flock l2; 00476 l2 = *p; 00477 fcntl(fd, F_GETLK, &l2); 00478 if( l2.l_type==F_RDLCK ){ 00479 zType = "RDLCK"; 00480 }else if( l2.l_type==F_WRLCK ){ 00481 zType = "WRLCK"; 00482 }else if( l2.l_type==F_UNLCK ){ 00483 zType = "UNLCK"; 00484 }else{ 00485 assert( 0 ); 00486 } 00487 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", 00488 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); 00489 } 00490 errno = savedErrno; 00491 return s; 00492 } 00493 #define fcntl lockTrace 00494 #endif /* SQLITE_LOCK_TRACE */ 00495 00496 #ifdef __linux__ 00497 /* 00498 ** This function is used as the main routine for a thread launched by 00499 ** testThreadLockingBehavior(). It tests whether the shared-lock obtained 00500 ** by the main thread in testThreadLockingBehavior() conflicts with a 00501 ** hypothetical write-lock obtained by this thread on the same file. 00502 ** 00503 ** The write-lock is not actually acquired, as this is not possible if 00504 ** the file is open in read-only mode (see ticket #3472). 00505 */ 00506 static void *threadLockingTest(void *pArg){ 00507 struct threadTestData *pData = (struct threadTestData*)pArg; 00508 pData->result = fcntl(pData->fd, F_GETLK, &pData->lock); 00509 return pArg; 00510 } 00511 00512 /* 00513 ** This procedure attempts to determine whether or not threads 00514 ** can override each others locks then sets the 00515 ** threadsOverrideEachOthersLocks variable appropriately. 00516 */ 00517 static void testThreadLockingBehavior(int fd_orig){ 00518 int fd; 00519 int rc; 00520 struct threadTestData d; 00521 struct flock l; 00522 pthread_t t; 00523 00524 fd = dup(fd_orig); 00525 if( fd<0 ) return; 00526 memset(&l, 0, sizeof(l)); 00527 l.l_type = F_RDLCK; 00528 l.l_len = 1; 00529 l.l_start = 0; 00530 l.l_whence = SEEK_SET; 00531 rc = fcntl(fd_orig, F_SETLK, &l); 00532 if( rc!=0 ) return; 00533 memset(&d, 0, sizeof(d)); 00534 d.fd = fd; 00535 d.lock = l; 00536 d.lock.l_type = F_WRLCK; 00537 pthread_create(&t, 0, threadLockingTest, &d); 00538 pthread_join(t, 0); 00539 close(fd); 00540 if( d.result!=0 ) return; 00541 threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK); 00542 } 00543 #else 00544 /* 00545 ** On anything other than linux, assume threads override each others locks. 00546 */ 00547 static void testThreadLockingBehavior(int fd_orig){ 00548 threadsOverrideEachOthersLocks = 1; 00549 } 00550 #endif /* __linux__ */ 00551 00552 #endif /* SQLITE_THREADSAFE */ 00553 00554 /* 00555 ** Release a lockInfo structure previously allocated by findLockInfo(). 00556 */ 00557 static void releaseLockInfo(struct lockInfo *pLock){ 00558 if( pLock ){ 00559 pLock->nRef--; 00560 if( pLock->nRef==0 ){ 00561 if( pLock->pPrev ){ 00562 assert( pLock->pPrev->pNext==pLock ); 00563 pLock->pPrev->pNext = pLock->pNext; 00564 }else{ 00565 assert( lockList==pLock ); 00566 lockList = pLock->pNext; 00567 } 00568 if( pLock->pNext ){ 00569 assert( pLock->pNext->pPrev==pLock ); 00570 pLock->pNext->pPrev = pLock->pPrev; 00571 } 00572 sqlite3_free(pLock); 00573 } 00574 } 00575 } 00576 00577 /* 00578 ** Release a openCnt structure previously allocated by findLockInfo(). 00579 */ 00580 static void releaseOpenCnt(struct openCnt *pOpen){ 00581 if( pOpen ){ 00582 pOpen->nRef--; 00583 if( pOpen->nRef==0 ){ 00584 if( pOpen->pPrev ){ 00585 assert( pOpen->pPrev->pNext==pOpen ); 00586 pOpen->pPrev->pNext = pOpen->pNext; 00587 }else{ 00588 assert( openList==pOpen ); 00589 openList = pOpen->pNext; 00590 } 00591 if( pOpen->pNext ){ 00592 assert( pOpen->pNext->pPrev==pOpen ); 00593 pOpen->pNext->pPrev = pOpen->pPrev; 00594 } 00595 sqlite3_free(pOpen->aPending); 00596 sqlite3_free(pOpen); 00597 } 00598 } 00599 } 00600 00601 #if SQLITE_ENABLE_LOCKING_STYLE 00602 /* 00603 ** Tests a byte-range locking query to see if byte range locks are 00604 ** supported, if not we fall back to dotlockLockingStyle. 00605 */ 00606 static int testLockingStyle(int fd){ 00607 struct flock lockInfo; 00608 00609 /* Test byte-range lock using fcntl(). If the call succeeds, 00610 ** assume that the file-system supports POSIX style locks. 00611 */ 00612 lockInfo.l_len = 1; 00613 lockInfo.l_start = 0; 00614 lockInfo.l_whence = SEEK_SET; 00615 lockInfo.l_type = F_RDLCK; 00616 if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) { 00617 return LOCKING_STYLE_POSIX; 00618 } 00619 00620 /* Testing for flock() can give false positives. So if if the above 00621 ** test fails, then we fall back to using dot-file style locking. 00622 */ 00623 return LOCKING_STYLE_DOTFILE; 00624 } 00625 #endif 00626 00627 /* 00628 ** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the 00629 ** f_fstypename entry in the statfs structure as returned by stat() for 00630 ** the file system hosting the database file and selects the appropriate 00631 ** locking style based on its value. These values and assignments are 00632 ** based on Darwin/OSX behavior and have not been thoroughly tested on 00633 ** other systems. 00634 ** 00635 ** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always 00636 ** returns LOCKING_STYLE_POSIX. 00637 */ 00638 static int detectLockingStyle( 00639 sqlite3_vfs *pVfs, 00640 const char *filePath, 00641 int fd 00642 ){ 00643 #if SQLITE_ENABLE_LOCKING_STYLE 00644 struct Mapping { 00645 const char *zFilesystem; 00646 int eLockingStyle; 00647 } aMap[] = { 00648 { "hfs", LOCKING_STYLE_POSIX }, 00649 { "ufs", LOCKING_STYLE_POSIX }, 00650 { "afpfs", LOCKING_STYLE_AFP }, 00651 #ifdef SQLITE_ENABLE_AFP_LOCKING_SMB 00652 { "smbfs", LOCKING_STYLE_AFP }, 00653 #else 00654 { "smbfs", LOCKING_STYLE_FLOCK }, 00655 #endif 00656 { "msdos", LOCKING_STYLE_DOTFILE }, 00657 { "webdav", LOCKING_STYLE_NONE }, 00658 { 0, 0 } 00659 }; 00660 int i; 00661 struct statfs fsInfo; 00662 00663 if( !filePath ){ 00664 return LOCKING_STYLE_NONE; 00665 } 00666 if( pVfs->pAppData ){ 00667 return SQLITE_PTR_TO_INT(pVfs->pAppData); 00668 } 00669 00670 if( statfs(filePath, &fsInfo) != -1 ){ 00671 if( fsInfo.f_flags & MNT_RDONLY ){ 00672 return LOCKING_STYLE_NONE; 00673 } 00674 for(i=0; aMap[i].zFilesystem; i++){ 00675 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 00676 return aMap[i].eLockingStyle; 00677 } 00678 } 00679 } 00680 00681 /* Default case. Handles, amongst others, "nfs". */ 00682 return testLockingStyle(fd); 00683 #endif 00684 return LOCKING_STYLE_POSIX; 00685 } 00686 00687 /* 00688 ** Given a file descriptor, locate lockInfo and openCnt structures that 00689 ** describes that file descriptor. Create new ones if necessary. The 00690 ** return values might be uninitialized if an error occurs. 00691 ** 00692 ** Return an appropriate error code. 00693 */ 00694 static int findLockInfo( 00695 int fd, /* The file descriptor used in the key */ 00696 struct lockInfo **ppLock, /* Return the lockInfo structure here */ 00697 struct openCnt **ppOpen /* Return the openCnt structure here */ 00698 ){ 00699 int rc; 00700 struct lockKey key1; 00701 struct openKey key2; 00702 struct stat statbuf; 00703 struct lockInfo *pLock; 00704 struct openCnt *pOpen; 00705 rc = fstat(fd, &statbuf); 00706 if( rc!=0 ){ 00707 #ifdef EOVERFLOW 00708 if( errno==EOVERFLOW ) return SQLITE_NOLFS; 00709 #endif 00710 return SQLITE_IOERR; 00711 } 00712 00713 /* On OS X on an msdos filesystem, the inode number is reported 00714 ** incorrectly for zero-size files. See ticket #3260. To work 00715 ** around this problem (we consider it a bug in OS X, not SQLite) 00716 ** we always increase the file size to 1 by writing a single byte 00717 ** prior to accessing the inode number. The one byte written is 00718 ** an ASCII 'S' character which also happens to be the first byte 00719 ** in the header of every SQLite database. In this way, if there 00720 ** is a race condition such that another thread has already populated 00721 ** the first page of the database, no damage is done. 00722 */ 00723 if( statbuf.st_size==0 ){ 00724 write(fd, "S", 1); 00725 rc = fstat(fd, &statbuf); 00726 if( rc!=0 ){ 00727 return SQLITE_IOERR; 00728 } 00729 } 00730 00731 memset(&key1, 0, sizeof(key1)); 00732 key1.dev = statbuf.st_dev; 00733 key1.ino = statbuf.st_ino; 00734 #if SQLITE_THREADSAFE 00735 if( threadsOverrideEachOthersLocks<0 ){ 00736 testThreadLockingBehavior(fd); 00737 } 00738 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self(); 00739 #endif 00740 memset(&key2, 0, sizeof(key2)); 00741 key2.dev = statbuf.st_dev; 00742 key2.ino = statbuf.st_ino; 00743 pLock = lockList; 00744 while( pLock && memcmp(&key1, &pLock->key, sizeof(key1)) ){ 00745 pLock = pLock->pNext; 00746 } 00747 if( pLock==0 ){ 00748 pLock = sqlite3_malloc( sizeof(*pLock) ); 00749 if( pLock==0 ){ 00750 rc = SQLITE_NOMEM; 00751 goto exit_findlockinfo; 00752 } 00753 pLock->key = key1; 00754 pLock->nRef = 1; 00755 pLock->cnt = 0; 00756 pLock->locktype = 0; 00757 pLock->pNext = lockList; 00758 pLock->pPrev = 0; 00759 if( lockList ) lockList->pPrev = pLock; 00760 lockList = pLock; 00761 }else{ 00762 pLock->nRef++; 00763 } 00764 *ppLock = pLock; 00765 if( ppOpen!=0 ){ 00766 pOpen = openList; 00767 while( pOpen && memcmp(&key2, &pOpen->key, sizeof(key2)) ){ 00768 pOpen = pOpen->pNext; 00769 } 00770 if( pOpen==0 ){ 00771 pOpen = sqlite3_malloc( sizeof(*pOpen) ); 00772 if( pOpen==0 ){ 00773 releaseLockInfo(pLock); 00774 rc = SQLITE_NOMEM; 00775 goto exit_findlockinfo; 00776 } 00777 pOpen->key = key2; 00778 pOpen->nRef = 1; 00779 pOpen->nLock = 0; 00780 pOpen->nPending = 0; 00781 pOpen->aPending = 0; 00782 pOpen->pNext = openList; 00783 pOpen->pPrev = 0; 00784 if( openList ) openList->pPrev = pOpen; 00785 openList = pOpen; 00786 }else{ 00787 pOpen->nRef++; 00788 } 00789 *ppOpen = pOpen; 00790 } 00791 00792 exit_findlockinfo: 00793 return rc; 00794 } 00795 00796 #ifdef SQLITE_DEBUG 00797 /* 00798 ** Helper function for printing out trace information from debugging 00799 ** binaries. This returns the string represetation of the supplied 00800 ** integer lock-type. 00801 */ 00802 static const char *locktypeName(int locktype){ 00803 switch( locktype ){ 00804 case NO_LOCK: return "NONE"; 00805 case SHARED_LOCK: return "SHARED"; 00806 case RESERVED_LOCK: return "RESERVED"; 00807 case PENDING_LOCK: return "PENDING"; 00808 case EXCLUSIVE_LOCK: return "EXCLUSIVE"; 00809 } 00810 return "ERROR"; 00811 } 00812 #endif 00813 00814 /* 00815 ** If we are currently in a different thread than the thread that the 00816 ** unixFile argument belongs to, then transfer ownership of the unixFile 00817 ** over to the current thread. 00818 ** 00819 ** A unixFile is only owned by a thread on systems where one thread is 00820 ** unable to override locks created by a different thread. RedHat9 is 00821 ** an example of such a system. 00822 ** 00823 ** Ownership transfer is only allowed if the unixFile is currently unlocked. 00824 ** If the unixFile is locked and an ownership is wrong, then return 00825 ** SQLITE_MISUSE. SQLITE_OK is returned if everything works. 00826 */ 00827 #if SQLITE_THREADSAFE 00828 static int transferOwnership(unixFile *pFile){ 00829 int rc; 00830 pthread_t hSelf; 00831 if( threadsOverrideEachOthersLocks ){ 00832 /* Ownership transfers not needed on this system */ 00833 return SQLITE_OK; 00834 } 00835 hSelf = pthread_self(); 00836 if( pthread_equal(pFile->tid, hSelf) ){ 00837 /* We are still in the same thread */ 00838 OSTRACE1("No-transfer, same thread\n"); 00839 return SQLITE_OK; 00840 } 00841 if( pFile->locktype!=NO_LOCK ){ 00842 /* We cannot change ownership while we are holding a lock! */ 00843 return SQLITE_MISUSE; 00844 } 00845 OSTRACE4("Transfer ownership of %d from %d to %d\n", 00846 pFile->h, pFile->tid, hSelf); 00847 pFile->tid = hSelf; 00848 if (pFile->pLock != NULL) { 00849 releaseLockInfo(pFile->pLock); 00850 rc = findLockInfo(pFile->h, &pFile->pLock, 0); 00851 OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h, 00852 locktypeName(pFile->locktype), 00853 locktypeName(pFile->pLock->locktype), pFile->pLock->cnt); 00854 return rc; 00855 } else { 00856 return SQLITE_OK; 00857 } 00858 } 00859 #else 00860 /* On single-threaded builds, ownership transfer is a no-op */ 00861 # define transferOwnership(X) SQLITE_OK 00862 #endif 00863 00864 /* 00865 ** Seek to the offset passed as the second argument, then read cnt 00866 ** bytes into pBuf. Return the number of bytes actually read. 00867 ** 00868 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 00869 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 00870 ** one system to another. Since SQLite does not define USE_PREAD 00871 ** any any form by default, we will not attempt to define _XOPEN_SOURCE. 00872 ** See tickets #2741 and #2681. 00873 */ 00874 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 00875 int got; 00876 i64 newOffset; 00877 TIMER_START; 00878 #if defined(USE_PREAD) 00879 got = pread(id->h, pBuf, cnt, offset); 00880 SimulateIOError( got = -1 ); 00881 #elif defined(USE_PREAD64) 00882 got = pread64(id->h, pBuf, cnt, offset); 00883 SimulateIOError( got = -1 ); 00884 #else 00885 newOffset = lseek(id->h, offset, SEEK_SET); 00886 SimulateIOError( newOffset-- ); 00887 if( newOffset!=offset ){ 00888 return -1; 00889 } 00890 got = read(id->h, pBuf, cnt); 00891 #endif 00892 TIMER_END; 00893 OSTRACE5("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED); 00894 return got; 00895 } 00896 00897 /* 00898 ** Read data from a file into a buffer. Return SQLITE_OK if all 00899 ** bytes were read successfully and SQLITE_IOERR if anything goes 00900 ** wrong. 00901 */ 00902 static int unixRead( 00903 sqlite3_file *id, 00904 void *pBuf, 00905 int amt, 00906 sqlite3_int64 offset 00907 ){ 00908 int got; 00909 assert( id ); 00910 got = seekAndRead((unixFile*)id, offset, pBuf, amt); 00911 if( got==amt ){ 00912 return SQLITE_OK; 00913 }else if( got<0 ){ 00914 return SQLITE_IOERR_READ; 00915 }else{ 00916 /* Unread parts of the buffer must be zero-filled */ 00917 memset(&((char*)pBuf)[got], 0, amt-got); 00918 return SQLITE_IOERR_SHORT_READ; 00919 } 00920 } 00921 00922 /* 00923 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 00924 ** Return the number of bytes actually read. Update the offset. 00925 */ 00926 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 00927 int got; 00928 i64 newOffset; 00929 TIMER_START; 00930 #if defined(USE_PREAD) 00931 got = pwrite(id->h, pBuf, cnt, offset); 00932 #elif defined(USE_PREAD64) 00933 got = pwrite64(id->h, pBuf, cnt, offset); 00934 #else 00935 newOffset = lseek(id->h, offset, SEEK_SET); 00936 if( newOffset!=offset ){ 00937 return -1; 00938 } 00939 got = write(id->h, pBuf, cnt); 00940 #endif 00941 TIMER_END; 00942 OSTRACE5("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED); 00943 return got; 00944 } 00945 00946 00947 /* 00948 ** Write data from a buffer into a file. Return SQLITE_OK on success 00949 ** or some other error code on failure. 00950 */ 00951 static int unixWrite( 00952 sqlite3_file *id, 00953 const void *pBuf, 00954 int amt, 00955 sqlite3_int64 offset 00956 ){ 00957 int wrote = 0; 00958 assert( id ); 00959 assert( amt>0 ); 00960 while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){ 00961 amt -= wrote; 00962 offset += wrote; 00963 pBuf = &((char*)pBuf)[wrote]; 00964 } 00965 SimulateIOError(( wrote=(-1), amt=1 )); 00966 SimulateDiskfullError(( wrote=0, amt=1 )); 00967 if( amt>0 ){ 00968 if( wrote<0 ){ 00969 return SQLITE_IOERR_WRITE; 00970 }else{ 00971 return SQLITE_FULL; 00972 } 00973 } 00974 return SQLITE_OK; 00975 } 00976 00977 #ifdef SQLITE_TEST 00978 /* 00979 ** Count the number of fullsyncs and normal syncs. This is used to test 00980 ** that syncs and fullsyncs are occuring at the right times. 00981 */ 00982 int sqlite3_sync_count = 0; 00983 int sqlite3_fullsync_count = 0; 00984 #endif 00985 00986 /* 00987 ** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined. 00988 ** Otherwise use fsync() in its place. 00989 */ 00990 #ifndef HAVE_FDATASYNC 00991 # define fdatasync fsync 00992 #endif 00993 00994 /* 00995 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 00996 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 00997 ** only available on Mac OS X. But that could change. 00998 */ 00999 #ifdef F_FULLFSYNC 01000 # define HAVE_FULLFSYNC 1 01001 #else 01002 # define HAVE_FULLFSYNC 0 01003 #endif 01004 01005 01006 /* 01007 ** The fsync() system call does not work as advertised on many 01008 ** unix systems. The following procedure is an attempt to make 01009 ** it work better. 01010 ** 01011 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 01012 ** for testing when we want to run through the test suite quickly. 01013 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 01014 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 01015 ** or power failure will likely corrupt the database file. 01016 */ 01017 static int full_fsync(int fd, int fullSync, int dataOnly){ 01018 int rc; 01019 01020 /* Record the number of times that we do a normal fsync() and 01021 ** FULLSYNC. This is used during testing to verify that this procedure 01022 ** gets called with the correct arguments. 01023 */ 01024 #ifdef SQLITE_TEST 01025 if( fullSync ) sqlite3_fullsync_count++; 01026 sqlite3_sync_count++; 01027 #endif 01028 01029 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 01030 ** no-op 01031 */ 01032 #ifdef SQLITE_NO_SYNC 01033 rc = SQLITE_OK; 01034 #else 01035 01036 #if HAVE_FULLFSYNC 01037 if( fullSync ){ 01038 rc = fcntl(fd, F_FULLFSYNC, 0); 01039 }else{ 01040 rc = 1; 01041 } 01042 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 01043 * It shouldn't be possible for fullfsync to fail on the local 01044 * file system (on OSX), so failure indicates that FULLFSYNC 01045 * isn't supported for this file system. So, attempt an fsync 01046 * and (for now) ignore the overhead of a superfluous fcntl call. 01047 * It'd be better to detect fullfsync support once and avoid 01048 * the fcntl call every time sync is called. 01049 */ 01050 if( rc ) rc = fsync(fd); 01051 01052 #else 01053 if( dataOnly ){ 01054 rc = fdatasync(fd); 01055 }else{ 01056 rc = fsync(fd); 01057 } 01058 #endif /* HAVE_FULLFSYNC */ 01059 #endif /* defined(SQLITE_NO_SYNC) */ 01060 01061 return rc; 01062 } 01063 01064 /* 01065 ** Make sure all writes to a particular file are committed to disk. 01066 ** 01067 ** If dataOnly==0 then both the file itself and its metadata (file 01068 ** size, access time, etc) are synced. If dataOnly!=0 then only the 01069 ** file data is synced. 01070 ** 01071 ** Under Unix, also make sure that the directory entry for the file 01072 ** has been created by fsync-ing the directory that contains the file. 01073 ** If we do not do this and we encounter a power failure, the directory 01074 ** entry for the journal might not exist after we reboot. The next 01075 ** SQLite to access the file will not know that the journal exists (because 01076 ** the directory entry for the journal was never created) and the transaction 01077 ** will not roll back - possibly leading to database corruption. 01078 */ 01079 static int unixSync(sqlite3_file *id, int flags){ 01080 int rc; 01081 unixFile *pFile = (unixFile*)id; 01082 01083 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 01084 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 01085 01086 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 01087 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 01088 || (flags&0x0F)==SQLITE_SYNC_FULL 01089 ); 01090 01091 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 01092 ** line is to test that doing so does not cause any problems. 01093 */ 01094 SimulateDiskfullError( return SQLITE_FULL ); 01095 01096 assert( pFile ); 01097 OSTRACE2("SYNC %-3d\n", pFile->h); 01098 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 01099 SimulateIOError( rc=1 ); 01100 if( rc ){ 01101 return SQLITE_IOERR_FSYNC; 01102 } 01103 if( pFile->dirfd>=0 ){ 01104 OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd, 01105 HAVE_FULLFSYNC, isFullsync); 01106 #ifndef SQLITE_DISABLE_DIRSYNC 01107 /* The directory sync is only attempted if full_fsync is 01108 ** turned off or unavailable. If a full_fsync occurred above, 01109 ** then the directory sync is superfluous. 01110 */ 01111 if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){ 01112 /* 01113 ** We have received multiple reports of fsync() returning 01114 ** errors when applied to directories on certain file systems. 01115 ** A failed directory sync is not a big deal. So it seems 01116 ** better to ignore the error. Ticket #1657 01117 */ 01118 /* return SQLITE_IOERR; */ 01119 } 01120 #endif 01121 close(pFile->dirfd); /* Only need to sync once, so close the directory */ 01122 pFile->dirfd = -1; /* when we are done. */ 01123 } 01124 return SQLITE_OK; 01125 } 01126 01127 /* 01128 ** Truncate an open file to a specified size 01129 */ 01130 static int unixTruncate(sqlite3_file *id, i64 nByte){ 01131 int rc; 01132 assert( id ); 01133 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 01134 rc = ftruncate(((unixFile*)id)->h, (off_t)nByte); 01135 if( rc ){ 01136 return SQLITE_IOERR_TRUNCATE; 01137 }else{ 01138 return SQLITE_OK; 01139 } 01140 } 01141 01142 /* 01143 ** Determine the current size of a file in bytes 01144 */ 01145 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 01146 int rc; 01147 struct stat buf; 01148 assert( id ); 01149 rc = fstat(((unixFile*)id)->h, &buf); 01150 SimulateIOError( rc=1 ); 01151 if( rc!=0 ){ 01152 return SQLITE_IOERR_FSTAT; 01153 } 01154 *pSize = buf.st_size; 01155 01156 /* When opening a zero-size database, the findLockInfo() procedure 01157 ** writes a single byte into that file in order to work around a bug 01158 ** in the OS-X msdos filesystem. In order to avoid problems with upper 01159 ** layers, we need to report this file size as zero even though it is 01160 ** really 1. Ticket #3260. 01161 */ 01162 if( *pSize==1 ) *pSize = 0; 01163 01164 01165 return SQLITE_OK; 01166 } 01167 01168 /* 01169 ** This routine translates a standard POSIX errno code into something 01170 ** useful to the clients of the sqlite3 functions. Specifically, it is 01171 ** intended to translate a variety of "try again" errors into SQLITE_BUSY 01172 ** and a variety of "please close the file descriptor NOW" errors into 01173 ** SQLITE_IOERR 01174 ** 01175 ** Errors during initialization of locks, or file system support for locks, 01176 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. 01177 */ 01178 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { 01179 switch (posixError) { 01180 case 0: 01181 return SQLITE_OK; 01182 01183 case EAGAIN: 01184 case ETIMEDOUT: 01185 case EBUSY: 01186 case EINTR: 01187 case ENOLCK: 01188 /* random NFS retry error, unless during file system support 01189 * introspection, in which it actually means what it says */ 01190 return SQLITE_BUSY; 01191 01192 case EACCES: 01193 /* EACCES is like EAGAIN during locking operations, but not any other time*/ 01194 if( (sqliteIOErr == SQLITE_IOERR_LOCK) || 01195 (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 01196 (sqliteIOErr == SQLITE_IOERR_RDLOCK) || 01197 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){ 01198 return SQLITE_BUSY; 01199 } 01200 /* else fall through */ 01201 case EPERM: 01202 return SQLITE_PERM; 01203 01204 case EDEADLK: 01205 return SQLITE_IOERR_BLOCKED; 01206 01207 #if EOPNOTSUPP!=ENOTSUP 01208 case EOPNOTSUPP: 01209 /* something went terribly awry, unless during file system support 01210 * introspection, in which it actually means what it says */ 01211 #endif 01212 #ifdef ENOTSUP 01213 case ENOTSUP: 01214 /* invalid fd, unless during file system support introspection, in which 01215 * it actually means what it says */ 01216 #endif 01217 case EIO: 01218 case EBADF: 01219 case EINVAL: 01220 case ENOTCONN: 01221 case ENODEV: 01222 case ENXIO: 01223 case ENOENT: 01224 case ESTALE: 01225 case ENOSYS: 01226 /* these should force the client to close the file and reconnect */ 01227 01228 default: 01229 return sqliteIOErr; 01230 } 01231 } 01232 01233 /* 01234 ** This routine checks if there is a RESERVED lock held on the specified 01235 ** file by this or any other process. If such a lock is held, set *pResOut 01236 ** to a non-zero value otherwise *pResOut is set to zero. The return value 01237 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 01238 */ 01239 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 01240 int rc = SQLITE_OK; 01241 int reserved = 0; 01242 unixFile *pFile = (unixFile*)id; 01243 01244 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 01245 01246 assert( pFile ); 01247 enterMutex(); /* Because pFile->pLock is shared across threads */ 01248 01249 /* Check if a thread in this process holds such a lock */ 01250 if( pFile->pLock->locktype>SHARED_LOCK ){ 01251 reserved = 1; 01252 } 01253 01254 /* Otherwise see if some other process holds it. 01255 */ 01256 if( !reserved ){ 01257 struct flock lock; 01258 lock.l_whence = SEEK_SET; 01259 lock.l_start = RESERVED_BYTE; 01260 lock.l_len = 1; 01261 lock.l_type = F_WRLCK; 01262 if (-1 == fcntl(pFile->h, F_GETLK, &lock)) { 01263 int tErrno = errno; 01264 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 01265 pFile->lastErrno = tErrno; 01266 } else if( lock.l_type!=F_UNLCK ){ 01267 reserved = 1; 01268 } 01269 } 01270 01271 leaveMutex(); 01272 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved); 01273 01274 *pResOut = reserved; 01275 return rc; 01276 } 01277 01278 /* 01279 ** Lock the file with the lock specified by parameter locktype - one 01280 ** of the following: 01281 ** 01282 ** (1) SHARED_LOCK 01283 ** (2) RESERVED_LOCK 01284 ** (3) PENDING_LOCK 01285 ** (4) EXCLUSIVE_LOCK 01286 ** 01287 ** Sometimes when requesting one lock state, additional lock states 01288 ** are inserted in between. The locking might fail on one of the later 01289 ** transitions leaving the lock state different from what it started but 01290 ** still short of its goal. The following chart shows the allowed 01291 ** transitions and the inserted intermediate states: 01292 ** 01293 ** UNLOCKED -> SHARED 01294 ** SHARED -> RESERVED 01295 ** SHARED -> (PENDING) -> EXCLUSIVE 01296 ** RESERVED -> (PENDING) -> EXCLUSIVE 01297 ** PENDING -> EXCLUSIVE 01298 ** 01299 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 01300 ** routine to lower a locking level. 01301 */ 01302 static int unixLock(sqlite3_file *id, int locktype){ 01303 /* The following describes the implementation of the various locks and 01304 ** lock transitions in terms of the POSIX advisory shared and exclusive 01305 ** lock primitives (called read-locks and write-locks below, to avoid 01306 ** confusion with SQLite lock names). The algorithms are complicated 01307 ** slightly in order to be compatible with windows systems simultaneously 01308 ** accessing the same database file, in case that is ever required. 01309 ** 01310 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 01311 ** byte', each single bytes at well known offsets, and the 'shared byte 01312 ** range', a range of 510 bytes at a well known offset. 01313 ** 01314 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 01315 ** byte'. If this is successful, a random byte from the 'shared byte 01316 ** range' is read-locked and the lock on the 'pending byte' released. 01317 ** 01318 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 01319 ** A RESERVED lock is implemented by grabbing a write-lock on the 01320 ** 'reserved byte'. 01321 ** 01322 ** A process may only obtain a PENDING lock after it has obtained a 01323 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 01324 ** on the 'pending byte'. This ensures that no new SHARED locks can be 01325 ** obtained, but existing SHARED locks are allowed to persist. A process 01326 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 01327 ** This property is used by the algorithm for rolling back a journal file 01328 ** after a crash. 01329 ** 01330 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 01331 ** implemented by obtaining a write-lock on the entire 'shared byte 01332 ** range'. Since all other locks require a read-lock on one of the bytes 01333 ** within this range, this ensures that no other locks are held on the 01334 ** database. 01335 ** 01336 ** The reason a single byte cannot be used instead of the 'shared byte 01337 ** range' is that some versions of windows do not support read-locks. By 01338 ** locking a random byte from a range, concurrent SHARED locks may exist 01339 ** even if the locking primitive used is always a write-lock. 01340 */ 01341 int rc = SQLITE_OK; 01342 unixFile *pFile = (unixFile*)id; 01343 struct lockInfo *pLock = pFile->pLock; 01344 struct flock lock; 01345 int s; 01346 01347 assert( pFile ); 01348 OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h, 01349 locktypeName(locktype), locktypeName(pFile->locktype), 01350 locktypeName(pLock->locktype), pLock->cnt , getpid()); 01351 01352 /* If there is already a lock of this type or more restrictive on the 01353 ** unixFile, do nothing. Don't use the end_lock: exit path, as 01354 ** enterMutex() hasn't been called yet. 01355 */ 01356 if( pFile->locktype>=locktype ){ 01357 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h, 01358 locktypeName(locktype)); 01359 return SQLITE_OK; 01360 } 01361 01362 /* Make sure the locking sequence is correct 01363 */ 01364 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); 01365 assert( locktype!=PENDING_LOCK ); 01366 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK ); 01367 01368 /* This mutex is needed because pFile->pLock is shared across threads 01369 */ 01370 enterMutex(); 01371 01372 /* Make sure the current thread owns the pFile. 01373 */ 01374 rc = transferOwnership(pFile); 01375 if( rc!=SQLITE_OK ){ 01376 leaveMutex(); 01377 return rc; 01378 } 01379 pLock = pFile->pLock; 01380 01381 /* If some thread using this PID has a lock via a different unixFile* 01382 ** handle that precludes the requested lock, return BUSY. 01383 */ 01384 if( (pFile->locktype!=pLock->locktype && 01385 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK)) 01386 ){ 01387 rc = SQLITE_BUSY; 01388 goto end_lock; 01389 } 01390 01391 /* If a SHARED lock is requested, and some thread using this PID already 01392 ** has a SHARED or RESERVED lock, then increment reference counts and 01393 ** return SQLITE_OK. 01394 */ 01395 if( locktype==SHARED_LOCK && 01396 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){ 01397 assert( locktype==SHARED_LOCK ); 01398 assert( pFile->locktype==0 ); 01399 assert( pLock->cnt>0 ); 01400 pFile->locktype = SHARED_LOCK; 01401 pLock->cnt++; 01402 pFile->pOpen->nLock++; 01403 goto end_lock; 01404 } 01405 01406 lock.l_len = 1L; 01407 01408 lock.l_whence = SEEK_SET; 01409 01410 /* A PENDING lock is needed before acquiring a SHARED lock and before 01411 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 01412 ** be released. 01413 */ 01414 if( locktype==SHARED_LOCK 01415 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK) 01416 ){ 01417 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK); 01418 lock.l_start = PENDING_BYTE; 01419 s = fcntl(pFile->h, F_SETLK, &lock); 01420 if( s==(-1) ){ 01421 int tErrno = errno; 01422 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 01423 if( IS_LOCK_ERROR(rc) ){ 01424 pFile->lastErrno = tErrno; 01425 } 01426 goto end_lock; 01427 } 01428 } 01429 01430 01431 /* If control gets to this point, then actually go ahead and make 01432 ** operating system calls for the specified lock. 01433 */ 01434 if( locktype==SHARED_LOCK ){ 01435 int tErrno = 0; 01436 assert( pLock->cnt==0 ); 01437 assert( pLock->locktype==0 ); 01438 01439 /* Now get the read-lock */ 01440 lock.l_start = SHARED_FIRST; 01441 lock.l_len = SHARED_SIZE; 01442 if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){ 01443 tErrno = errno; 01444 } 01445 /* Drop the temporary PENDING lock */ 01446 lock.l_start = PENDING_BYTE; 01447 lock.l_len = 1L; 01448 lock.l_type = F_UNLCK; 01449 if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){ 01450 if( s != -1 ){ 01451 /* This could happen with a network mount */ 01452 tErrno = errno; 01453 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 01454 if( IS_LOCK_ERROR(rc) ){ 01455 pFile->lastErrno = tErrno; 01456 } 01457 goto end_lock; 01458 } 01459 } 01460 if( s==(-1) ){ 01461 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 01462 if( IS_LOCK_ERROR(rc) ){ 01463 pFile->lastErrno = tErrno; 01464 } 01465 }else{ 01466 pFile->locktype = SHARED_LOCK; 01467 pFile->pOpen->nLock++; 01468 pLock->cnt = 1; 01469 } 01470 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){ 01471 /* We are trying for an exclusive lock but another thread in this 01472 ** same process is still holding a shared lock. */ 01473 rc = SQLITE_BUSY; 01474 }else{ 01475 /* The request was for a RESERVED or EXCLUSIVE lock. It is 01476 ** assumed that there is a SHARED or greater lock on the file 01477 ** already. 01478 */ 01479 assert( 0!=pFile->locktype ); 01480 lock.l_type = F_WRLCK; 01481 switch( locktype ){ 01482 case RESERVED_LOCK: 01483 lock.l_start = RESERVED_BYTE; 01484 break; 01485 case EXCLUSIVE_LOCK: 01486 lock.l_start = SHARED_FIRST; 01487 lock.l_len = SHARED_SIZE; 01488 break; 01489 default: 01490 assert(0); 01491 } 01492 s = fcntl(pFile->h, F_SETLK, &lock); 01493 if( s==(-1) ){ 01494 int tErrno = errno; 01495 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 01496 if( IS_LOCK_ERROR(rc) ){ 01497 pFile->lastErrno = tErrno; 01498 } 01499 } 01500 } 01501 01502 if( rc==SQLITE_OK ){ 01503 pFile->locktype = locktype; 01504 pLock->locktype = locktype; 01505 }else if( locktype==EXCLUSIVE_LOCK ){ 01506 pFile->locktype = PENDING_LOCK; 01507 pLock->locktype = PENDING_LOCK; 01508 } 01509 01510 end_lock: 01511 leaveMutex(); 01512 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype), 01513 rc==SQLITE_OK ? "ok" : "failed"); 01514 return rc; 01515 } 01516 01517 /* 01518 ** Lower the locking level on file descriptor pFile to locktype. locktype 01519 ** must be either NO_LOCK or SHARED_LOCK. 01520 ** 01521 ** If the locking level of the file descriptor is already at or below 01522 ** the requested locking level, this routine is a no-op. 01523 */ 01524 static int unixUnlock(sqlite3_file *id, int locktype){ 01525 struct lockInfo *pLock; 01526 struct flock lock; 01527 int rc = SQLITE_OK; 01528 unixFile *pFile = (unixFile*)id; 01529 int h; 01530 01531 assert( pFile ); 01532 OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype, 01533 pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid()); 01534 01535 assert( locktype<=SHARED_LOCK ); 01536 if( pFile->locktype<=locktype ){ 01537 return SQLITE_OK; 01538 } 01539 if( CHECK_THREADID(pFile) ){ 01540 return SQLITE_MISUSE; 01541 } 01542 enterMutex(); 01543 h = pFile->h; 01544 pLock = pFile->pLock; 01545 assert( pLock->cnt!=0 ); 01546 if( pFile->locktype>SHARED_LOCK ){ 01547 assert( pLock->locktype==pFile->locktype ); 01548 SimulateIOErrorBenign(1); 01549 SimulateIOError( h=(-1) ) 01550 SimulateIOErrorBenign(0); 01551 if( locktype==SHARED_LOCK ){ 01552 lock.l_type = F_RDLCK; 01553 lock.l_whence = SEEK_SET; 01554 lock.l_start = SHARED_FIRST; 01555 lock.l_len = SHARED_SIZE; 01556 if( fcntl(h, F_SETLK, &lock)==(-1) ){ 01557 int tErrno = errno; 01558 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 01559 if( IS_LOCK_ERROR(rc) ){ 01560 pFile->lastErrno = tErrno; 01561 } 01562 goto end_unlock; 01563 } 01564 } 01565 lock.l_type = F_UNLCK; 01566 lock.l_whence = SEEK_SET; 01567 lock.l_start = PENDING_BYTE; 01568 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 01569 if( fcntl(h, F_SETLK, &lock)!=(-1) ){ 01570 pLock->locktype = SHARED_LOCK; 01571 }else{ 01572 int tErrno = errno; 01573 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 01574 if( IS_LOCK_ERROR(rc) ){ 01575 pFile->lastErrno = tErrno; 01576 } 01577 goto end_unlock; 01578 } 01579 } 01580 if( locktype==NO_LOCK ){ 01581 struct openCnt *pOpen; 01582 01583 /* Decrement the shared lock counter. Release the lock using an 01584 ** OS call only when all threads in this same process have released 01585 ** the lock. 01586 */ 01587 pLock->cnt--; 01588 if( pLock->cnt==0 ){ 01589 lock.l_type = F_UNLCK; 01590 lock.l_whence = SEEK_SET; 01591 lock.l_start = lock.l_len = 0L; 01592 SimulateIOErrorBenign(1); 01593 SimulateIOError( h=(-1) ) 01594 SimulateIOErrorBenign(0); 01595 if( fcntl(h, F_SETLK, &lock)!=(-1) ){ 01596 pLock->locktype = NO_LOCK; 01597 }else{ 01598 int tErrno = errno; 01599 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 01600 if( IS_LOCK_ERROR(rc) ){ 01601 pFile->lastErrno = tErrno; 01602 } 01603 pLock->cnt = 1; 01604 goto end_unlock; 01605 } 01606 } 01607 01608 /* Decrement the count of locks against this same file. When the 01609 ** count reaches zero, close any other file descriptors whose close 01610 ** was deferred because of outstanding locks. 01611 */ 01612 if( rc==SQLITE_OK ){ 01613 pOpen = pFile->pOpen; 01614 pOpen->nLock--; 01615 assert( pOpen->nLock>=0 ); 01616 if( pOpen->nLock==0 && pOpen->nPending>0 ){ 01617 int i; 01618 for(i=0; i<pOpen->nPending; i++){ 01619 close(pOpen->aPending[i]); 01620 } 01621 sqlite3_free(pOpen->aPending); 01622 pOpen->nPending = 0; 01623 pOpen->aPending = 0; 01624 } 01625 } 01626 } 01627 01628 end_unlock: 01629 leaveMutex(); 01630 if( rc==SQLITE_OK ) pFile->locktype = locktype; 01631 return rc; 01632 } 01633 01634 /* 01635 ** This function performs the parts of the "close file" operation 01636 ** common to all locking schemes. It closes the directory and file 01637 ** handles, if they are valid, and sets all fields of the unixFile 01638 ** structure to 0. 01639 */ 01640 static int closeUnixFile(sqlite3_file *id){ 01641 unixFile *pFile = (unixFile*)id; 01642 if( pFile ){ 01643 if( pFile->dirfd>=0 ){ 01644 close(pFile->dirfd); 01645 } 01646 if( pFile->h>=0 ){ 01647 close(pFile->h); 01648 } 01649 OSTRACE2("CLOSE %-3d\n", pFile->h); 01650 OpenCounter(-1); 01651 memset(pFile, 0, sizeof(unixFile)); 01652 } 01653 return SQLITE_OK; 01654 } 01655 01656 /* 01657 ** Close a file. 01658 */ 01659 static int unixClose(sqlite3_file *id){ 01660 if( id ){ 01661 unixFile *pFile = (unixFile *)id; 01662 unixUnlock(id, NO_LOCK); 01663 enterMutex(); 01664 if( pFile->pOpen && pFile->pOpen->nLock ){ 01665 /* If there are outstanding locks, do not actually close the file just 01666 ** yet because that would clear those locks. Instead, add the file 01667 ** descriptor to pOpen->aPending. It will be automatically closed when 01668 ** the last lock is cleared. 01669 */ 01670 int *aNew; 01671 struct openCnt *pOpen = pFile->pOpen; 01672 aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) ); 01673 if( aNew==0 ){ 01674 /* If a malloc fails, just leak the file descriptor */ 01675 }else{ 01676 pOpen->aPending = aNew; 01677 pOpen->aPending[pOpen->nPending] = pFile->h; 01678 pOpen->nPending++; 01679 pFile->h = -1; 01680 } 01681 } 01682 releaseLockInfo(pFile->pLock); 01683 releaseOpenCnt(pFile->pOpen); 01684 closeUnixFile(id); 01685 leaveMutex(); 01686 } 01687 return SQLITE_OK; 01688 } 01689 01690 01691 #if SQLITE_ENABLE_LOCKING_STYLE 01692 #pragma mark AFP Support 01693 01694 /* 01695 ** The afpLockingContext structure contains all afp lock specific state 01696 */ 01697 typedef struct afpLockingContext afpLockingContext; 01698 struct afpLockingContext { 01699 unsigned long long sharedLockByte; 01700 const char *filePath; 01701 }; 01702 01703 struct ByteRangeLockPB2 01704 { 01705 unsigned long long offset; /* offset to first byte to lock */ 01706 unsigned long long length; /* nbr of bytes to lock */ 01707 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 01708 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 01709 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 01710 int fd; /* file desc to assoc this lock with */ 01711 }; 01712 01713 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 01714 01715 /* 01716 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 01717 */ 01718 static int _AFPFSSetLock( 01719 const char *path, 01720 unixFile *pFile, 01721 unsigned long long offset, 01722 unsigned long long length, 01723 int setLockFlag 01724 ){ 01725 struct ByteRangeLockPB2 pb; 01726 int err; 01727 01728 pb.unLockFlag = setLockFlag ? 0 : 1; 01729 pb.startEndFlag = 0; 01730 pb.offset = offset; 01731 pb.length = length; 01732 pb.fd = pFile->h; 01733 OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n", 01734 (setLockFlag?"ON":"OFF"), pFile->h, offset, length); 01735 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 01736 if ( err==-1 ) { 01737 int rc; 01738 int tErrno = errno; 01739 OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno)); 01740 rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); /* error */ 01741 if( IS_LOCK_ERROR(rc) ){ 01742 pFile->lastErrno = tErrno; 01743 } 01744 return rc; 01745 } else { 01746 return SQLITE_OK; 01747 } 01748 } 01749 01750 /* AFP-style reserved lock checking following the behavior of 01751 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */ 01752 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 01753 int rc = SQLITE_OK; 01754 int reserved = 0; 01755 unixFile *pFile = (unixFile*)id; 01756 01757 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 01758 01759 assert( pFile ); 01760 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 01761 01762 /* Check if a thread in this process holds such a lock */ 01763 if( pFile->locktype>SHARED_LOCK ){ 01764 reserved = 1; 01765 } 01766 01767 /* Otherwise see if some other process holds it. 01768 */ 01769 if( !reserved ){ 01770 /* lock the RESERVED byte */ 01771 int lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1); 01772 if( SQLITE_OK==lrc ){ 01773 /* if we succeeded in taking the reserved lock, unlock it to restore 01774 ** the original state */ 01775 lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1, 0); 01776 } else { 01777 /* if we failed to get the lock then someone else must have it */ 01778 reserved = 1; 01779 } 01780 if( IS_LOCK_ERROR(lrc) ){ 01781 rc=lrc; 01782 } 01783 } 01784 01785 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved); 01786 01787 *pResOut = reserved; 01788 return rc; 01789 } 01790 01791 /* AFP-style locking following the behavior of unixLock, see the unixLock 01792 ** function comments for details of lock management. */ 01793 static int afpLock(sqlite3_file *id, int locktype){ 01794 int rc = SQLITE_OK; 01795 unixFile *pFile = (unixFile*)id; 01796 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 01797 01798 assert( pFile ); 01799 OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h, 01800 locktypeName(locktype), locktypeName(pFile->locktype), getpid()); 01801 01802 /* If there is already a lock of this type or more restrictive on the 01803 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 01804 ** enterMutex() hasn't been called yet. 01805 */ 01806 if( pFile->locktype>=locktype ){ 01807 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h, 01808 locktypeName(locktype)); 01809 return SQLITE_OK; 01810 } 01811 01812 /* Make sure the locking sequence is correct 01813 */ 01814 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); 01815 assert( locktype!=PENDING_LOCK ); 01816 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK ); 01817 01818 /* This mutex is needed because pFile->pLock is shared across threads 01819 */ 01820 enterMutex(); 01821 01822 /* Make sure the current thread owns the pFile. 01823 */ 01824 rc = transferOwnership(pFile); 01825 if( rc!=SQLITE_OK ){ 01826 leaveMutex(); 01827 return rc; 01828 } 01829 01830 /* A PENDING lock is needed before acquiring a SHARED lock and before 01831 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 01832 ** be released. 01833 */ 01834 if( locktype==SHARED_LOCK 01835 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK) 01836 ){ 01837 int failed; 01838 failed = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 1); 01839 if (failed) { 01840 rc = failed; 01841 goto afp_end_lock; 01842 } 01843 } 01844 01845 /* If control gets to this point, then actually go ahead and make 01846 ** operating system calls for the specified lock. 01847 */ 01848 if( locktype==SHARED_LOCK ){ 01849 int lk, lrc1, lrc2, lrc1Errno; 01850 01851 /* Now get the read-lock SHARED_LOCK */ 01852 /* note that the quality of the randomness doesn't matter that much */ 01853 lk = random(); 01854 context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1); 01855 lrc1 = _AFPFSSetLock(context->filePath, pFile, 01856 SHARED_FIRST+context->sharedLockByte, 1, 1); 01857 if( IS_LOCK_ERROR(lrc1) ){ 01858 lrc1Errno = pFile->lastErrno; 01859 } 01860 /* Drop the temporary PENDING lock */ 01861 lrc2 = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 0); 01862 01863 if( IS_LOCK_ERROR(lrc1) ) { 01864 pFile->lastErrno = lrc1Errno; 01865 rc = lrc1; 01866 goto afp_end_lock; 01867 } else if( IS_LOCK_ERROR(lrc2) ){ 01868 rc = lrc2; 01869 goto afp_end_lock; 01870 } else if( lrc1 != SQLITE_OK ) { 01871 rc = lrc1; 01872 } else { 01873 pFile->locktype = SHARED_LOCK; 01874 } 01875 }else{ 01876 /* The request was for a RESERVED or EXCLUSIVE lock. It is 01877 ** assumed that there is a SHARED or greater lock on the file 01878 ** already. 01879 */ 01880 int failed = 0; 01881 assert( 0!=pFile->locktype ); 01882 if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) { 01883 /* Acquire a RESERVED lock */ 01884 failed = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1); 01885 } 01886 if (!failed && locktype == EXCLUSIVE_LOCK) { 01887 /* Acquire an EXCLUSIVE lock */ 01888 01889 /* Remove the shared lock before trying the range. we'll need to 01890 ** reestablish the shared lock if we can't get the afpUnlock 01891 */ 01892 if (!(failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST + 01893 context->sharedLockByte, 1, 0))) { 01894 /* now attemmpt to get the exclusive lock range */ 01895 failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST, 01896 SHARED_SIZE, 1); 01897 if (failed && (failed = _AFPFSSetLock(context->filePath, pFile, 01898 SHARED_FIRST + context->sharedLockByte, 1, 1))) { 01899 rc = failed; 01900 } 01901 } else { 01902 rc = failed; 01903 } 01904 } 01905 if( failed ){ 01906 rc = failed; 01907 } 01908 } 01909 01910 if( rc==SQLITE_OK ){ 01911 pFile->locktype = locktype; 01912 }else if( locktype==EXCLUSIVE_LOCK ){ 01913 pFile->locktype = PENDING_LOCK; 01914 } 01915 01916 afp_end_lock: 01917 leaveMutex(); 01918 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype), 01919 rc==SQLITE_OK ? "ok" : "failed"); 01920 return rc; 01921 } 01922 01923 /* 01924 ** Lower the locking level on file descriptor pFile to locktype. locktype 01925 ** must be either NO_LOCK or SHARED_LOCK. 01926 ** 01927 ** If the locking level of the file descriptor is already at or below 01928 ** the requested locking level, this routine is a no-op. 01929 */ 01930 static int afpUnlock(sqlite3_file *id, int locktype) { 01931 int rc = SQLITE_OK; 01932 unixFile *pFile = (unixFile*)id; 01933 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 01934 01935 assert( pFile ); 01936 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype, 01937 pFile->locktype, getpid()); 01938 01939 assert( locktype<=SHARED_LOCK ); 01940 if( pFile->locktype<=locktype ){ 01941 return SQLITE_OK; 01942 } 01943 if( CHECK_THREADID(pFile) ){ 01944 return SQLITE_MISUSE; 01945 } 01946 enterMutex(); 01947 int failed = SQLITE_OK; 01948 if( pFile->locktype>SHARED_LOCK ){ 01949 if( locktype==SHARED_LOCK ){ 01950 01951 /* unlock the exclusive range - then re-establish the shared lock */ 01952 if (pFile->locktype==EXCLUSIVE_LOCK) { 01953 failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST, 01954 SHARED_SIZE, 0); 01955 if (!failed) { 01956 /* successfully removed the exclusive lock */ 01957 if ((failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST+ 01958 context->sharedLockByte, 1, 1))) { 01959 /* failed to re-establish our shared lock */ 01960 rc = failed; 01961 } 01962 } else { 01963 rc = failed; 01964 } 01965 } 01966 } 01967 if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) { 01968 if ((failed = _AFPFSSetLock(context->filePath, pFile, 01969 PENDING_BYTE, 1, 0))){ 01970 /* failed to release the pending lock */ 01971 rc = failed; 01972 } 01973 } 01974 if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) { 01975 if ((failed = _AFPFSSetLock(context->filePath, pFile, 01976 RESERVED_BYTE, 1, 0))) { 01977 /* failed to release the reserved lock */ 01978 rc = failed; 01979 } 01980 } 01981 } 01982 if( locktype==NO_LOCK ){ 01983 int failed = _AFPFSSetLock(context->filePath, pFile, 01984 SHARED_FIRST + context->sharedLockByte, 1, 0); 01985 if (failed) { 01986 rc = failed; 01987 } 01988 } 01989 if (rc == SQLITE_OK) 01990 pFile->locktype = locktype; 01991 leaveMutex(); 01992 return rc; 01993 } 01994 01995 /* 01996 ** Close a file & cleanup AFP specific locking context 01997 */ 01998 static int afpClose(sqlite3_file *id) { 01999 if( id ){ 02000 unixFile *pFile = (unixFile*)id; 02001 afpUnlock(id, NO_LOCK); 02002 sqlite3_free(pFile->lockingContext); 02003 } 02004 return closeUnixFile(id); 02005 } 02006 02007 02008 #pragma mark flock() style locking 02009 02010 /* 02011 ** The flockLockingContext is not used 02012 */ 02013 typedef void flockLockingContext; 02014 02015 /* flock-style reserved lock checking following the behavior of 02016 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */ 02017 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 02018 int rc = SQLITE_OK; 02019 int reserved = 0; 02020 unixFile *pFile = (unixFile*)id; 02021 02022 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 02023 02024 assert( pFile ); 02025 02026 /* Check if a thread in this process holds such a lock */ 02027 if( pFile->locktype>SHARED_LOCK ){ 02028 reserved = 1; 02029 } 02030 02031 /* Otherwise see if some other process holds it. */ 02032 if( !reserved ){ 02033 /* attempt to get the lock */ 02034 int lrc = flock(pFile->h, LOCK_EX | LOCK_NB); 02035 if( !lrc ){ 02036 /* got the lock, unlock it */ 02037 lrc = flock(pFile->h, LOCK_UN); 02038 if ( lrc ) { 02039 int tErrno = errno; 02040 /* unlock failed with an error */ 02041 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 02042 if( IS_LOCK_ERROR(lrc) ){ 02043 pFile->lastErrno = tErrno; 02044 rc = lrc; 02045 } 02046 } 02047 } else { 02048 int tErrno = errno; 02049 reserved = 1; 02050 /* someone else might have it reserved */ 02051 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 02052 if( IS_LOCK_ERROR(lrc) ){ 02053 pFile->lastErrno = tErrno; 02054 rc = lrc; 02055 } 02056 } 02057 } 02058 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved); 02059 02060 *pResOut = reserved; 02061 return rc; 02062 } 02063 02064 static int flockLock(sqlite3_file *id, int locktype) { 02065 int rc = SQLITE_OK; 02066 unixFile *pFile = (unixFile*)id; 02067 02068 assert( pFile ); 02069 02070 /* if we already have a lock, it is exclusive. 02071 ** Just adjust level and punt on outta here. */ 02072 if (pFile->locktype > NO_LOCK) { 02073 pFile->locktype = locktype; 02074 return SQLITE_OK; 02075 } 02076 02077 /* grab an exclusive lock */ 02078 02079 if (flock(pFile->h, LOCK_EX | LOCK_NB)) { 02080 int tErrno = errno; 02081 /* didn't get, must be busy */ 02082 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 02083 if( IS_LOCK_ERROR(rc) ){ 02084 pFile->lastErrno = tErrno; 02085 } 02086 } else { 02087 /* got it, set the type and return ok */ 02088 pFile->locktype = locktype; 02089 } 02090 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype), 02091 rc==SQLITE_OK ? "ok" : "failed"); 02092 return rc; 02093 } 02094 02095 static int flockUnlock(sqlite3_file *id, int locktype) { 02096 unixFile *pFile = (unixFile*)id; 02097 02098 assert( pFile ); 02099 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype, 02100 pFile->locktype, getpid()); 02101 assert( locktype<=SHARED_LOCK ); 02102 02103 /* no-op if possible */ 02104 if( pFile->locktype==locktype ){ 02105 return SQLITE_OK; 02106 } 02107 02108 /* shared can just be set because we always have an exclusive */ 02109 if (locktype==SHARED_LOCK) { 02110 pFile->locktype = locktype; 02111 return SQLITE_OK; 02112 } 02113 02114 /* no, really, unlock. */ 02115 int rc = flock(pFile->h, LOCK_UN); 02116 if (rc) { 02117 int r, tErrno = errno; 02118 r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 02119 if( IS_LOCK_ERROR(r) ){ 02120 pFile->lastErrno = tErrno; 02121 } 02122 return r; 02123 } else { 02124 pFile->locktype = NO_LOCK; 02125 return SQLITE_OK; 02126 } 02127 } 02128 02129 /* 02130 ** Close a file. 02131 */ 02132 static int flockClose(sqlite3_file *id) { 02133 if( id ){ 02134 flockUnlock(id, NO_LOCK); 02135 } 02136 return closeUnixFile(id); 02137 } 02138 02139 #pragma mark Old-School .lock file based locking 02140 02141 /* Dotlock-style reserved lock checking following the behavior of 02142 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */ 02143 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 02144 int rc = SQLITE_OK; 02145 int reserved = 0; 02146 unixFile *pFile = (unixFile*)id; 02147 02148 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 02149 02150 assert( pFile ); 02151 02152 /* Check if a thread in this process holds such a lock */ 02153 if( pFile->locktype>SHARED_LOCK ){ 02154 reserved = 1; 02155 } 02156 02157 /* Otherwise see if some other process holds it. */ 02158 if( !reserved ){ 02159 char *zLockFile = (char *)pFile->lockingContext; 02160 struct stat statBuf; 02161 02162 if( lstat(zLockFile, &statBuf)==0 ){ 02163 /* file exists, someone else has the lock */ 02164 reserved = 1; 02165 }else{ 02166 /* file does not exist, we could have it if we want it */ 02167 int tErrno = errno; 02168 if( ENOENT != tErrno ){ 02169 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 02170 pFile->lastErrno = tErrno; 02171 } 02172 } 02173 } 02174 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved); 02175 02176 *pResOut = reserved; 02177 return rc; 02178 } 02179 02180 static int dotlockLock(sqlite3_file *id, int locktype) { 02181 unixFile *pFile = (unixFile*)id; 02182 int fd; 02183 char *zLockFile = (char *)pFile->lockingContext; 02184 int rc=SQLITE_OK; 02185 02186 /* if we already have a lock, it is exclusive. 02187 ** Just adjust level and punt on outta here. */ 02188 if (pFile->locktype > NO_LOCK) { 02189 pFile->locktype = locktype; 02190 02191 /* Always update the timestamp on the old file */ 02192 utimes(zLockFile, NULL); 02193 rc = SQLITE_OK; 02194 goto dotlock_end_lock; 02195 } 02196 02197 /* check to see if lock file already exists */ 02198 struct stat statBuf; 02199 if (lstat(zLockFile,&statBuf) == 0){ 02200 rc = SQLITE_BUSY; /* it does, busy */ 02201 goto dotlock_end_lock; 02202 } 02203 02204 /* grab an exclusive lock */ 02205 fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600); 02206 if( fd<0 ){ 02207 /* failed to open/create the file, someone else may have stolen the lock */ 02208 int tErrno = errno; 02209 if( EEXIST == tErrno ){ 02210 rc = SQLITE_BUSY; 02211 } else { 02212 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 02213 if( IS_LOCK_ERROR(rc) ){ 02214 pFile->lastErrno = tErrno; 02215 } 02216 } 02217 goto dotlock_end_lock; 02218 } 02219 close(fd); 02220 02221 /* got it, set the type and return ok */ 02222 pFile->locktype = locktype; 02223 02224 dotlock_end_lock: 02225 return rc; 02226 } 02227 02228 static int dotlockUnlock(sqlite3_file *id, int locktype) { 02229 unixFile *pFile = (unixFile*)id; 02230 char *zLockFile = (char *)pFile->lockingContext; 02231 02232 assert( pFile ); 02233 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype, 02234 pFile->locktype, getpid()); 02235 assert( locktype<=SHARED_LOCK ); 02236 02237 /* no-op if possible */ 02238 if( pFile->locktype==locktype ){ 02239 return SQLITE_OK; 02240 } 02241 02242 /* shared can just be set because we always have an exclusive */ 02243 if (locktype==SHARED_LOCK) { 02244 pFile->locktype = locktype; 02245 return SQLITE_OK; 02246 } 02247 02248 /* no, really, unlock. */ 02249 if (unlink(zLockFile) ) { 02250 int rc, tErrno = errno; 02251 if( ENOENT != tErrno ){ 02252 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 02253 } 02254 if( IS_LOCK_ERROR(rc) ){ 02255 pFile->lastErrno = tErrno; 02256 } 02257 return rc; 02258 } 02259 pFile->locktype = NO_LOCK; 02260 return SQLITE_OK; 02261 } 02262 02263 /* 02264 ** Close a file. 02265 */ 02266 static int dotlockClose(sqlite3_file *id) { 02267 if( id ){ 02268 unixFile *pFile = (unixFile*)id; 02269 dotlockUnlock(id, NO_LOCK); 02270 sqlite3_free(pFile->lockingContext); 02271 } 02272 return closeUnixFile(id); 02273 } 02274 02275 02276 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 02277 02278 /* 02279 ** The nolockLockingContext is void 02280 */ 02281 typedef void nolockLockingContext; 02282 02283 static int nolockCheckReservedLock(sqlite3_file *id, int *pResOut) { 02284 *pResOut = 0; 02285 return SQLITE_OK; 02286 } 02287 02288 static int nolockLock(sqlite3_file *id, int locktype) { 02289 return SQLITE_OK; 02290 } 02291 02292 static int nolockUnlock(sqlite3_file *id, int locktype) { 02293 return SQLITE_OK; 02294 } 02295 02296 /* 02297 ** Close a file. 02298 */ 02299 static int nolockClose(sqlite3_file *id) { 02300 return closeUnixFile(id); 02301 } 02302 02303 02304 /* 02305 ** Information and control of an open file handle. 02306 */ 02307 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 02308 switch( op ){ 02309 case SQLITE_FCNTL_LOCKSTATE: { 02310 *(int*)pArg = ((unixFile*)id)->locktype; 02311 return SQLITE_OK; 02312 } 02313 } 02314 return SQLITE_ERROR; 02315 } 02316 02317 /* 02318 ** Return the sector size in bytes of the underlying block device for 02319 ** the specified file. This is almost always 512 bytes, but may be 02320 ** larger for some devices. 02321 ** 02322 ** SQLite code assumes this function cannot fail. It also assumes that 02323 ** if two files are created in the same file-system directory (i.e. 02324 ** a database and its journal file) that the sector size will be the 02325 ** same for both. 02326 */ 02327 static int unixSectorSize(sqlite3_file *id){ 02328 return SQLITE_DEFAULT_SECTOR_SIZE; 02329 } 02330 02331 /* 02332 ** Return the device characteristics for the file. This is always 0. 02333 */ 02334 static int unixDeviceCharacteristics(sqlite3_file *id){ 02335 return 0; 02336 } 02337 02338 /* 02339 ** Initialize the contents of the unixFile structure pointed to by pId. 02340 ** 02341 ** When locking extensions are enabled, the filepath and locking style 02342 ** are needed to determine the unixFile pMethod to use for locking operations. 02343 ** The locking-style specific lockingContext data structure is created 02344 ** and assigned here also. 02345 */ 02346 static int fillInUnixFile( 02347 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 02348 int h, /* Open file descriptor of file being opened */ 02349 int dirfd, /* Directory file descriptor */ 02350 sqlite3_file *pId, /* Write to the unixFile structure here */ 02351 const char *zFilename, /* Name of the file being opened */ 02352 int noLock /* Omit locking if true */ 02353 ){ 02354 int eLockingStyle; 02355 unixFile *pNew = (unixFile *)pId; 02356 int rc = SQLITE_OK; 02357 02358 /* Macro to define the static contents of an sqlite3_io_methods 02359 ** structure for a unix backend file. Different locking methods 02360 ** require different functions for the xClose, xLock, xUnlock and 02361 ** xCheckReservedLock methods. 02362 */ 02363 #define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) { \ 02364 1, /* iVersion */ \ 02365 xClose, /* xClose */ \ 02366 unixRead, /* xRead */ \ 02367 unixWrite, /* xWrite */ \ 02368 unixTruncate, /* xTruncate */ \ 02369 unixSync, /* xSync */ \ 02370 unixFileSize, /* xFileSize */ \ 02371 xLock, /* xLock */ \ 02372 xUnlock, /* xUnlock */ \ 02373 xCheckReservedLock, /* xCheckReservedLock */ \ 02374 unixFileControl, /* xFileControl */ \ 02375 unixSectorSize, /* xSectorSize */ \ 02376 unixDeviceCharacteristics /* xDeviceCapabilities */ \ 02377 } 02378 static sqlite3_io_methods aIoMethod[] = { 02379 IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock) 02380 ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock) 02381 #if SQLITE_ENABLE_LOCKING_STYLE 02382 ,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock) 02383 ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock) 02384 ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock) 02385 #endif 02386 }; 02387 /* The order of the IOMETHODS macros above is important. It must be the 02388 ** same order as the LOCKING_STYLE numbers 02389 */ 02390 assert(LOCKING_STYLE_POSIX==1); 02391 assert(LOCKING_STYLE_NONE==2); 02392 assert(LOCKING_STYLE_DOTFILE==3); 02393 assert(LOCKING_STYLE_FLOCK==4); 02394 assert(LOCKING_STYLE_AFP==5); 02395 02396 assert( pNew->pLock==NULL ); 02397 assert( pNew->pOpen==NULL ); 02398 02399 OSTRACE3("OPEN %-3d %s\n", h, zFilename); 02400 pNew->h = h; 02401 pNew->dirfd = dirfd; 02402 SET_THREADID(pNew); 02403 02404 if( noLock ){ 02405 eLockingStyle = LOCKING_STYLE_NONE; 02406 }else{ 02407 eLockingStyle = detectLockingStyle(pVfs, zFilename, h); 02408 } 02409 02410 switch( eLockingStyle ){ 02411 02412 case LOCKING_STYLE_POSIX: { 02413 enterMutex(); 02414 rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen); 02415 leaveMutex(); 02416 break; 02417 } 02418 02419 #if SQLITE_ENABLE_LOCKING_STYLE 02420 case LOCKING_STYLE_AFP: { 02421 /* AFP locking uses the file path so it needs to be included in 02422 ** the afpLockingContext. 02423 */ 02424 afpLockingContext *pCtx; 02425 pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) ); 02426 if( pCtx==0 ){ 02427 rc = SQLITE_NOMEM; 02428 }else{ 02429 /* NB: zFilename exists and remains valid until the file is closed 02430 ** according to requirement F11141. So we do not need to make a 02431 ** copy of the filename. */ 02432 pCtx->filePath = zFilename; 02433 srandomdev(); 02434 } 02435 break; 02436 } 02437 02438 case LOCKING_STYLE_DOTFILE: { 02439 /* Dotfile locking uses the file path so it needs to be included in 02440 ** the dotlockLockingContext 02441 */ 02442 char *zLockFile; 02443 int nFilename; 02444 nFilename = strlen(zFilename) + 6; 02445 zLockFile = (char *)sqlite3_malloc(nFilename); 02446 if( zLockFile==0 ){ 02447 rc = SQLITE_NOMEM; 02448 }else{ 02449 sqlite3_snprintf(nFilename, zLockFile, "%s.lock", zFilename); 02450 } 02451 pNew->lockingContext = zLockFile; 02452 break; 02453 } 02454 02455 case LOCKING_STYLE_FLOCK: 02456 case LOCKING_STYLE_NONE: 02457 break; 02458 #endif 02459 } 02460 02461 pNew->lastErrno = 0; 02462 if( rc!=SQLITE_OK ){ 02463 if( dirfd>=0 ) close(dirfd); 02464 close(h); 02465 }else{ 02466 pNew->pMethod = &aIoMethod[eLockingStyle-1]; 02467 OpenCounter(+1); 02468 } 02469 return rc; 02470 } 02471 02472 /* 02473 ** Open a file descriptor to the directory containing file zFilename. 02474 ** If successful, *pFd is set to the opened file descriptor and 02475 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 02476 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 02477 ** value. 02478 ** 02479 ** If SQLITE_OK is returned, the caller is responsible for closing 02480 ** the file descriptor *pFd using close(). 02481 */ 02482 static int openDirectory(const char *zFilename, int *pFd){ 02483 int ii; 02484 int fd = -1; 02485 char zDirname[MAX_PATHNAME+1]; 02486 02487 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 02488 for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--); 02489 if( ii>0 ){ 02490 zDirname[ii] = '\0'; 02491 fd = open(zDirname, O_RDONLY|O_BINARY, 0); 02492 if( fd>=0 ){ 02493 #ifdef FD_CLOEXEC 02494 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 02495 #endif 02496 OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname); 02497 } 02498 } 02499 *pFd = fd; 02500 return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN); 02501 } 02502 02503 /* 02504 ** Create a temporary file name in zBuf. zBuf must be allocated 02505 ** by the calling process and must be big enough to hold at least 02506 ** pVfs->mxPathname bytes. 02507 */ 02508 static int getTempname(int nBuf, char *zBuf){ 02509 static const char *azDirs[] = { 02510 0, 02511 "/var/tmp", 02512 "/usr/tmp", 02513 "/tmp", 02514 ".", 02515 }; 02516 static const unsigned char zChars[] = 02517 "abcdefghijklmnopqrstuvwxyz" 02518 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 02519 "0123456789"; 02520 int i, j; 02521 struct stat buf; 02522 const char *zDir = "."; 02523 02524 /* It's odd to simulate an io-error here, but really this is just 02525 ** using the io-error infrastructure to test that SQLite handles this 02526 ** function failing. 02527 */ 02528 SimulateIOError( return SQLITE_IOERR ); 02529 02530 azDirs[0] = sqlite3_temp_directory; 02531 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){ 02532 if( azDirs[i]==0 ) continue; 02533 if( stat(azDirs[i], &buf) ) continue; 02534 if( !S_ISDIR(buf.st_mode) ) continue; 02535 if( access(azDirs[i], 07) ) continue; 02536 zDir = azDirs[i]; 02537 break; 02538 } 02539 02540 /* Check that the output buffer is large enough for the temporary file 02541 ** name. If it is not, return SQLITE_ERROR. 02542 */ 02543 if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= nBuf ){ 02544 return SQLITE_ERROR; 02545 } 02546 02547 do{ 02548 sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); 02549 j = strlen(zBuf); 02550 sqlite3_randomness(15, &zBuf[j]); 02551 for(i=0; i<15; i++, j++){ 02552 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; 02553 } 02554 zBuf[j] = 0; 02555 }while( access(zBuf,0)==0 ); 02556 return SQLITE_OK; 02557 } 02558 02559 02560 /* 02561 ** Open the file zPath. 02562 ** 02563 ** Previously, the SQLite OS layer used three functions in place of this 02564 ** one: 02565 ** 02566 ** sqlite3OsOpenReadWrite(); 02567 ** sqlite3OsOpenReadOnly(); 02568 ** sqlite3OsOpenExclusive(); 02569 ** 02570 ** These calls correspond to the following combinations of flags: 02571 ** 02572 ** ReadWrite() -> (READWRITE | CREATE) 02573 ** ReadOnly() -> (READONLY) 02574 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 02575 ** 02576 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 02577 ** true, the file was configured to be automatically deleted when the 02578 ** file handle closed. To achieve the same effect using this new 02579 ** interface, add the DELETEONCLOSE flag to those specified above for 02580 ** OpenExclusive(). 02581 */ 02582 static int unixOpen( 02583 sqlite3_vfs *pVfs, 02584 const char *zPath, 02585 sqlite3_file *pFile, 02586 int flags, 02587 int *pOutFlags 02588 ){ 02589 int fd = 0; /* File descriptor returned by open() */ 02590 int dirfd = -1; /* Directory file descriptor */ 02591 int oflags = 0; /* Flags to pass to open() */ 02592 int eType = flags&0xFFFFFF00; /* Type of file to open */ 02593 int noLock; /* True to omit locking primitives */ 02594 02595 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 02596 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 02597 int isCreate = (flags & SQLITE_OPEN_CREATE); 02598 int isReadonly = (flags & SQLITE_OPEN_READONLY); 02599 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 02600 02601 /* If creating a master or main-file journal, this function will open 02602 ** a file-descriptor on the directory too. The first time unixSync() 02603 ** is called the directory file descriptor will be fsync()ed and close()d. 02604 */ 02605 int isOpenDirectory = (isCreate && 02606 (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL) 02607 ); 02608 02609 /* If argument zPath is a NULL pointer, this function is required to open 02610 ** a temporary file. Use this buffer to store the file name in. 02611 */ 02612 char zTmpname[MAX_PATHNAME+1]; 02613 const char *zName = zPath; 02614 02615 /* Check the following statements are true: 02616 ** 02617 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 02618 ** (b) if CREATE is set, then READWRITE must also be set, and 02619 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 02620 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 02621 */ 02622 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 02623 assert(isCreate==0 || isReadWrite); 02624 assert(isExclusive==0 || isCreate); 02625 assert(isDelete==0 || isCreate); 02626 02627 /* The main DB, main journal, and master journal are never automatically 02628 ** deleted 02629 */ 02630 assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete ); 02631 assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete ); 02632 assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete ); 02633 02634 /* Assert that the upper layer has set one of the "file-type" flags. */ 02635 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 02636 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 02637 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL 02638 || eType==SQLITE_OPEN_TRANSIENT_DB 02639 ); 02640 02641 memset(pFile, 0, sizeof(unixFile)); 02642 02643 if( !zName ){ 02644 int rc; 02645 assert(isDelete && !isOpenDirectory); 02646 rc = getTempname(MAX_PATHNAME+1, zTmpname); 02647 if( rc!=SQLITE_OK ){ 02648 return rc; 02649 } 02650 zName = zTmpname; 02651 } 02652 02653 if( isReadonly ) oflags |= O_RDONLY; 02654 if( isReadWrite ) oflags |= O_RDWR; 02655 if( isCreate ) oflags |= O_CREAT; 02656 if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW); 02657 oflags |= (O_LARGEFILE|O_BINARY); 02658 02659 fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS); 02660 if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){ 02661 /* Failed to open the file for read/write access. Try read-only. */ 02662 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 02663 flags |= SQLITE_OPEN_READONLY; 02664 return unixOpen(pVfs, zPath, pFile, flags, pOutFlags); 02665 } 02666 if( fd<0 ){ 02667 return SQLITE_CANTOPEN; 02668 } 02669 if( isDelete ){ 02670 unlink(zName); 02671 } 02672 if( pOutFlags ){ 02673 *pOutFlags = flags; 02674 } 02675 02676 assert(fd!=0); 02677 if( isOpenDirectory ){ 02678 int rc = openDirectory(zPath, &dirfd); 02679 if( rc!=SQLITE_OK ){ 02680 close(fd); 02681 return rc; 02682 } 02683 } 02684 02685 #ifdef FD_CLOEXEC 02686 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 02687 #endif 02688 02689 noLock = eType!=SQLITE_OPEN_MAIN_DB; 02690 return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock); 02691 } 02692 02693 /* 02694 ** Delete the file at zPath. If the dirSync argument is true, fsync() 02695 ** the directory after deleting the file. 02696 */ 02697 static int unixDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ 02698 int rc = SQLITE_OK; 02699 SimulateIOError(return SQLITE_IOERR_DELETE); 02700 unlink(zPath); 02701 #ifndef SQLITE_DISABLE_DIRSYNC 02702 if( dirSync ){ 02703 int fd; 02704 rc = openDirectory(zPath, &fd); 02705 if( rc==SQLITE_OK ){ 02706 if( fsync(fd) ){ 02707 rc = SQLITE_IOERR_DIR_FSYNC; 02708 } 02709 close(fd); 02710 } 02711 } 02712 #endif 02713 return rc; 02714 } 02715 02716 /* 02717 ** Test the existance of or access permissions of file zPath. The 02718 ** test performed depends on the value of flags: 02719 ** 02720 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 02721 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 02722 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 02723 ** 02724 ** Otherwise return 0. 02725 */ 02726 static int unixAccess( 02727 sqlite3_vfs *pVfs, 02728 const char *zPath, 02729 int flags, 02730 int *pResOut 02731 ){ 02732 int amode = 0; 02733 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 02734 switch( flags ){ 02735 case SQLITE_ACCESS_EXISTS: 02736 amode = F_OK; 02737 break; 02738 case SQLITE_ACCESS_READWRITE: 02739 amode = W_OK|R_OK; 02740 break; 02741 case SQLITE_ACCESS_READ: 02742 amode = R_OK; 02743 break; 02744 02745 default: 02746 assert(!"Invalid flags argument"); 02747 } 02748 *pResOut = (access(zPath, amode)==0); 02749 return SQLITE_OK; 02750 } 02751 02752 02753 /* 02754 ** Turn a relative pathname into a full pathname. The relative path 02755 ** is stored as a nul-terminated string in the buffer pointed to by 02756 ** zPath. 02757 ** 02758 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 02759 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 02760 ** this buffer before returning. 02761 */ 02762 static int unixFullPathname( 02763 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 02764 const char *zPath, /* Possibly relative input path */ 02765 int nOut, /* Size of output buffer in bytes */ 02766 char *zOut /* Output buffer */ 02767 ){ 02768 02769 /* It's odd to simulate an io-error here, but really this is just 02770 ** using the io-error infrastructure to test that SQLite handles this 02771 ** function failing. This function could fail if, for example, the 02772 ** current working directly has been unlinked. 02773 */ 02774 SimulateIOError( return SQLITE_ERROR ); 02775 02776 assert( pVfs->mxPathname==MAX_PATHNAME ); 02777 zOut[nOut-1] = '\0'; 02778 if( zPath[0]=='/' ){ 02779 sqlite3_snprintf(nOut, zOut, "%s", zPath); 02780 }else{ 02781 int nCwd; 02782 if( getcwd(zOut, nOut-1)==0 ){ 02783 return SQLITE_CANTOPEN; 02784 } 02785 nCwd = strlen(zOut); 02786 sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath); 02787 } 02788 return SQLITE_OK; 02789 02790 #if 0 02791 /* 02792 ** Remove "/./" path elements and convert "/A/./" path elements 02793 ** to just "/". 02794 */ 02795 if( zFull ){ 02796 int i, j; 02797 for(i=j=0; zFull[i]; i++){ 02798 if( zFull[i]=='/' ){ 02799 if( zFull[i+1]=='/' ) continue; 02800 if( zFull[i+1]=='.' && zFull[i+2]=='/' ){ 02801 i += 1; 02802 continue; 02803 } 02804 if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){ 02805 while( j>0 && zFull[j-1]!='/' ){ j--; } 02806 i += 3; 02807 continue; 02808 } 02809 } 02810 zFull[j++] = zFull[i]; 02811 } 02812 zFull[j] = 0; 02813 } 02814 #endif 02815 } 02816 02817 02818 #ifndef SQLITE_OMIT_LOAD_EXTENSION 02819 /* 02820 ** Interfaces for opening a shared library, finding entry points 02821 ** within the shared library, and closing the shared library. 02822 */ 02823 #include <dlfcn.h> 02824 static void *unixDlOpen(sqlite3_vfs *pVfs, const char *zFilename){ 02825 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 02826 } 02827 02828 /* 02829 ** SQLite calls this function immediately after a call to unixDlSym() or 02830 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 02831 ** message is available, it is written to zBufOut. If no error message 02832 ** is available, zBufOut is left unmodified and SQLite uses a default 02833 ** error message. 02834 */ 02835 static void unixDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){ 02836 char *zErr; 02837 enterMutex(); 02838 zErr = dlerror(); 02839 if( zErr ){ 02840 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 02841 } 02842 leaveMutex(); 02843 } 02844 static void *unixDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol){ 02845 return dlsym(pHandle, zSymbol); 02846 } 02847 static void unixDlClose(sqlite3_vfs *pVfs, void *pHandle){ 02848 dlclose(pHandle); 02849 } 02850 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 02851 #define unixDlOpen 0 02852 #define unixDlError 0 02853 #define unixDlSym 0 02854 #define unixDlClose 0 02855 #endif 02856 02857 /* 02858 ** Write nBuf bytes of random data to the supplied buffer zBuf. 02859 */ 02860 static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ 02861 02862 assert(nBuf>=(sizeof(time_t)+sizeof(int))); 02863 02864 /* We have to initialize zBuf to prevent valgrind from reporting 02865 ** errors. The reports issued by valgrind are incorrect - we would 02866 ** prefer that the randomness be increased by making use of the 02867 ** uninitialized space in zBuf - but valgrind errors tend to worry 02868 ** some users. Rather than argue, it seems easier just to initialize 02869 ** the whole array and silence valgrind, even if that means less randomness 02870 ** in the random seed. 02871 ** 02872 ** When testing, initializing zBuf[] to zero is all we do. That means 02873 ** that we always use the same random number sequence. This makes the 02874 ** tests repeatable. 02875 */ 02876 memset(zBuf, 0, nBuf); 02877 #if !defined(SQLITE_TEST) 02878 { 02879 int pid, fd; 02880 fd = open("/dev/urandom", O_RDONLY); 02881 if( fd<0 ){ 02882 time_t t; 02883 time(&t); 02884 memcpy(zBuf, &t, sizeof(t)); 02885 pid = getpid(); 02886 memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid)); 02887 assert( sizeof(t)+sizeof(pid)<=nBuf ); 02888 nBuf = sizeof(t) + sizeof(pid); 02889 }else{ 02890 nBuf = read(fd, zBuf, nBuf); 02891 close(fd); 02892 } 02893 } 02894 #endif 02895 return nBuf; 02896 } 02897 02898 02899 /* 02900 ** Sleep for a little while. Return the amount of time slept. 02901 ** The argument is the number of microseconds we want to sleep. 02902 ** The return value is the number of microseconds of sleep actually 02903 ** requested from the underlying operating system, a number which 02904 ** might be greater than or equal to the argument, but not less 02905 ** than the argument. 02906 */ 02907 static int unixSleep(sqlite3_vfs *pVfs, int microseconds){ 02908 #if defined(HAVE_USLEEP) && HAVE_USLEEP 02909 usleep(microseconds); 02910 return microseconds; 02911 #else 02912 int seconds = (microseconds+999999)/1000000; 02913 sleep(seconds); 02914 return seconds*1000000; 02915 #endif 02916 } 02917 02918 /* 02919 ** The following variable, if set to a non-zero value, becomes the result 02920 ** returned from sqlite3OsCurrentTime(). This is used for testing. 02921 */ 02922 #ifdef SQLITE_TEST 02923 int sqlite3_current_time = 0; 02924 #endif 02925 02926 /* 02927 ** Find the current time (in Universal Coordinated Time). Write the 02928 ** current time and date as a Julian Day number into *prNow and 02929 ** return 0. Return 1 if the time and date cannot be found. 02930 */ 02931 static int unixCurrentTime(sqlite3_vfs *pVfs, double *prNow){ 02932 #ifdef NO_GETTOD 02933 time_t t; 02934 time(&t); 02935 *prNow = t/86400.0 + 2440587.5; 02936 #else 02937 struct timeval sNow; 02938 gettimeofday(&sNow, 0); 02939 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0; 02940 #endif 02941 #ifdef SQLITE_TEST 02942 if( sqlite3_current_time ){ 02943 *prNow = sqlite3_current_time/86400.0 + 2440587.5; 02944 } 02945 #endif 02946 return 0; 02947 } 02948 02949 static int unixGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ 02950 return 0; 02951 } 02952 02953 /* 02954 ** Initialize the operating system interface. 02955 */ 02956 int sqlite3_os_init(void){ 02957 /* Macro to define the static contents of an sqlite3_vfs structure for 02958 ** the unix backend. The two parameters are the values to use for 02959 ** the sqlite3_vfs.zName and sqlite3_vfs.pAppData fields, respectively. 02960 ** 02961 */ 02962 #define UNIXVFS(zVfsName, pVfsAppData) { \ 02963 1, /* iVersion */ \ 02964 sizeof(unixFile), /* szOsFile */ \ 02965 MAX_PATHNAME, /* mxPathname */ \ 02966 0, /* pNext */ \ 02967 zVfsName, /* zName */ \ 02968 (void *)pVfsAppData, /* pAppData */ \ 02969 unixOpen, /* xOpen */ \ 02970 unixDelete, /* xDelete */ \ 02971 unixAccess, /* xAccess */ \ 02972 unixFullPathname, /* xFullPathname */ \ 02973 unixDlOpen, /* xDlOpen */ \ 02974 unixDlError, /* xDlError */ \ 02975 unixDlSym, /* xDlSym */ \ 02976 unixDlClose, /* xDlClose */ \ 02977 unixRandomness, /* xRandomness */ \ 02978 unixSleep, /* xSleep */ \ 02979 unixCurrentTime, /* xCurrentTime */ \ 02980 unixGetLastError /* xGetLastError */ \ 02981 } 02982 02983 static sqlite3_vfs unixVfs = UNIXVFS("unix", 0); 02984 #if SQLITE_ENABLE_LOCKING_STYLE 02985 int i; 02986 static sqlite3_vfs aVfs[] = { 02987 UNIXVFS("unix-posix", LOCKING_STYLE_POSIX), 02988 UNIXVFS("unix-afp", LOCKING_STYLE_AFP), 02989 UNIXVFS("unix-flock", LOCKING_STYLE_FLOCK), 02990 UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE), 02991 UNIXVFS("unix-none", LOCKING_STYLE_NONE) 02992 }; 02993 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 02994 sqlite3_vfs_register(&aVfs[i], 0); 02995 } 02996 #endif 02997 sqlite3_vfs_register(&unixVfs, 1); 02998 return SQLITE_OK; 02999 } 03000 03001 /* 03002 ** Shutdown the operating system interface. This is a no-op for unix. 03003 */ 03004 int sqlite3_os_end(void){ 03005 return SQLITE_OK; 03006 } 03007 03008 #endif /* SQLITE_OS_UNIX */
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:55 2011 by Doxygen 1.6.1