os_unix.c

Go to the documentation of this file.
00001 /*
00002 ** 2004 May 22
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 ******************************************************************************
00012 **
00013 ** This file contains code that is specific to Unix systems.
00014 **
00015 ** $Id: os_unix.c,v 1.209 2008/11/11 18:34:35 danielk1977 Exp $
00016 */
00017 #include "sqliteInt.h"
00018 #if SQLITE_OS_UNIX              /* This file is used on unix only */
00019 
00020 /*
00021 ** If SQLITE_ENABLE_LOCKING_STYLE is defined and is non-zero, then several
00022 ** alternative locking implementations are provided:
00023 **
00024 **   * POSIX locking (the default),
00025 **   * No locking,
00026 **   * Dot-file locking,
00027 **   * flock() locking,
00028 **   * AFP locking (OSX only).
00029 **
00030 ** SQLITE_ENABLE_LOCKING_STYLE only works on a Mac. It is turned on by
00031 ** default on a Mac and disabled on all other posix platforms.
00032 */
00033 #if !defined(SQLITE_ENABLE_LOCKING_STYLE)
00034 #  if defined(__DARWIN__)
00035 #    define SQLITE_ENABLE_LOCKING_STYLE 1
00036 #  else
00037 #    define SQLITE_ENABLE_LOCKING_STYLE 0
00038 #  endif
00039 #endif
00040 
00041 /*
00042 ** These #defines should enable >2GB file support on Posix if the
00043 ** underlying operating system supports it.  If the OS lacks
00044 ** large file support, these should be no-ops.
00045 **
00046 ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
00047 ** on the compiler command line.  This is necessary if you are compiling
00048 ** on a recent machine (ex: RedHat 7.2) but you want your code to work
00049 ** on an older machine (ex: RedHat 6.0).  If you compile on RedHat 7.2
00050 ** without this option, LFS is enable.  But LFS does not exist in the kernel
00051 ** in RedHat 6.0, so the code won't work.  Hence, for maximum binary
00052 ** portability you should omit LFS.
00053 */
00054 #ifndef SQLITE_DISABLE_LFS
00055 # define _LARGE_FILE       1
00056 # ifndef _FILE_OFFSET_BITS
00057 #   define _FILE_OFFSET_BITS 64
00058 # endif
00059 # define _LARGEFILE_SOURCE 1
00060 #endif
00061 
00062 /*
00063 ** standard include files.
00064 */
00065 #include <sys/types.h>
00066 #include <sys/stat.h>
00067 #include <fcntl.h>
00068 #include <unistd.h>
00069 #include <time.h>
00070 #include <sys/time.h>
00071 #include <errno.h>
00072 
00073 #if SQLITE_ENABLE_LOCKING_STYLE
00074 #include <sys/ioctl.h>
00075 #include <sys/param.h>
00076 #include <sys/mount.h>
00077 #endif /* SQLITE_ENABLE_LOCKING_STYLE */
00078 
00079 /*
00080 ** If we are to be thread-safe, include the pthreads header and define
00081 ** the SQLITE_UNIX_THREADS macro.
00082 */
00083 #if SQLITE_THREADSAFE
00084 # include <pthread.h>
00085 # define SQLITE_UNIX_THREADS 1
00086 #endif
00087 
00088 /*
00089 ** Default permissions when creating a new file
00090 */
00091 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
00092 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
00093 #endif
00094 
00095 /*
00096 ** Maximum supported path-length.
00097 */
00098 #define MAX_PATHNAME 512
00099 
00100 
00101 /*
00102 ** The unixFile structure is subclass of sqlite3_file specific for the unix
00103 ** protability layer.
00104 */
00105 typedef struct unixFile unixFile;
00106 struct unixFile {
00107   sqlite3_io_methods const *pMethod;  /* Always the first entry */
00108 #ifdef SQLITE_TEST
00109   /* In test mode, increase the size of this structure a bit so that 
00110   ** it is larger than the struct CrashFile defined in test6.c.
00111   */
00112   char aPadding[32];
00113 #endif
00114   struct openCnt *pOpen;    /* Info about all open fd's on this inode */
00115   struct lockInfo *pLock;   /* Info about locks on this inode */
00116 #if SQLITE_ENABLE_LOCKING_STYLE
00117   void *lockingContext;     /* Locking style specific state */
00118 #endif
00119   int h;                    /* The file descriptor */
00120   unsigned char locktype;   /* The type of lock held on this fd */
00121   int dirfd;                /* File descriptor for the directory */
00122 #if SQLITE_THREADSAFE
00123   pthread_t tid;            /* The thread that "owns" this unixFile */
00124 #endif
00125   int lastErrno;            /* The unix errno from the last I/O error */
00126 };
00127 
00128 /*
00129 ** Include code that is common to all os_*.c files
00130 */
00131 #include "os_common.h"
00132 
00133 /*
00134 ** Define various macros that are missing from some systems.
00135 */
00136 #ifndef O_LARGEFILE
00137 # define O_LARGEFILE 0
00138 #endif
00139 #ifdef SQLITE_DISABLE_LFS
00140 # undef O_LARGEFILE
00141 # define O_LARGEFILE 0
00142 #endif
00143 #ifndef O_NOFOLLOW
00144 # define O_NOFOLLOW 0
00145 #endif
00146 #ifndef O_BINARY
00147 # define O_BINARY 0
00148 #endif
00149 
00150 /*
00151 ** The DJGPP compiler environment looks mostly like Unix, but it
00152 ** lacks the fcntl() system call.  So redefine fcntl() to be something
00153 ** that always succeeds.  This means that locking does not occur under
00154 ** DJGPP.  But it is DOS - what did you expect?
00155 */
00156 #ifdef __DJGPP__
00157 # define fcntl(A,B,C) 0
00158 #endif
00159 
00160 /*
00161 ** The threadid macro resolves to the thread-id or to 0.  Used for
00162 ** testing and debugging only.
00163 */
00164 #if SQLITE_THREADSAFE
00165 #define threadid pthread_self()
00166 #else
00167 #define threadid 0
00168 #endif
00169 
00170 /*
00171 ** Set or check the unixFile.tid field.  This field is set when an unixFile
00172 ** is first opened.  All subsequent uses of the unixFile verify that the
00173 ** same thread is operating on the unixFile.  Some operating systems do
00174 ** not allow locks to be overridden by other threads and that restriction
00175 ** means that sqlite3* database handles cannot be moved from one thread
00176 ** to another.  This logic makes sure a user does not try to do that
00177 ** by mistake.
00178 **
00179 ** Version 3.3.1 (2006-01-15):  unixFile can be moved from one thread to
00180 ** another as long as we are running on a system that supports threads
00181 ** overriding each others locks (which now the most common behavior)
00182 ** or if no locks are held.  But the unixFile.pLock field needs to be
00183 ** recomputed because its key includes the thread-id.  See the 
00184 ** transferOwnership() function below for additional information
00185 */
00186 #if SQLITE_THREADSAFE
00187 # define SET_THREADID(X)   (X)->tid = pthread_self()
00188 # define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
00189                             !pthread_equal((X)->tid, pthread_self()))
00190 #else
00191 # define SET_THREADID(X)
00192 # define CHECK_THREADID(X) 0
00193 #endif
00194 
00195 /*
00196 ** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
00197 ** section 6.5.2.2 lines 483 through 490 specify that when a process
00198 ** sets or clears a lock, that operation overrides any prior locks set
00199 ** by the same process.  It does not explicitly say so, but this implies
00200 ** that it overrides locks set by the same process using a different
00201 ** file descriptor.  Consider this test case:
00202 **       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
00203 **
00204 ** Suppose ./file1 and ./file2 are really the same file (because
00205 ** one is a hard or symbolic link to the other) then if you set
00206 ** an exclusive lock on fd1, then try to get an exclusive lock
00207 ** on fd2, it works.  I would have expected the second lock to
00208 ** fail since there was already a lock on the file due to fd1.
00209 ** But not so.  Since both locks came from the same process, the
00210 ** second overrides the first, even though they were on different
00211 ** file descriptors opened on different file names.
00212 **
00213 ** Bummer.  If you ask me, this is broken.  Badly broken.  It means
00214 ** that we cannot use POSIX locks to synchronize file access among
00215 ** competing threads of the same process.  POSIX locks will work fine
00216 ** to synchronize access for threads in separate processes, but not
00217 ** threads within the same process.
00218 **
00219 ** To work around the problem, SQLite has to manage file locks internally
00220 ** on its own.  Whenever a new database is opened, we have to find the
00221 ** specific inode of the database file (the inode is determined by the
00222 ** st_dev and st_ino fields of the stat structure that fstat() fills in)
00223 ** and check for locks already existing on that inode.  When locks are
00224 ** created or removed, we have to look at our own internal record of the
00225 ** locks to see if another thread has previously set a lock on that same
00226 ** inode.
00227 **
00228 ** The sqlite3_file structure for POSIX is no longer just an integer file
00229 ** descriptor.  It is now a structure that holds the integer file
00230 ** descriptor and a pointer to a structure that describes the internal
00231 ** locks on the corresponding inode.  There is one locking structure
00232 ** per inode, so if the same inode is opened twice, both unixFile structures
00233 ** point to the same locking structure.  The locking structure keeps
00234 ** a reference count (so we will know when to delete it) and a "cnt"
00235 ** field that tells us its internal lock status.  cnt==0 means the
00236 ** file is unlocked.  cnt==-1 means the file has an exclusive lock.
00237 ** cnt>0 means there are cnt shared locks on the file.
00238 **
00239 ** Any attempt to lock or unlock a file first checks the locking
00240 ** structure.  The fcntl() system call is only invoked to set a 
00241 ** POSIX lock if the internal lock structure transitions between
00242 ** a locked and an unlocked state.
00243 **
00244 ** 2004-Jan-11:
00245 ** More recent discoveries about POSIX advisory locks.  (The more
00246 ** I discover, the more I realize the a POSIX advisory locks are
00247 ** an abomination.)
00248 **
00249 ** If you close a file descriptor that points to a file that has locks,
00250 ** all locks on that file that are owned by the current process are
00251 ** released.  To work around this problem, each unixFile structure contains
00252 ** a pointer to an openCnt structure.  There is one openCnt structure
00253 ** per open inode, which means that multiple unixFile can point to a single
00254 ** openCnt.  When an attempt is made to close an unixFile, if there are
00255 ** other unixFile open on the same inode that are holding locks, the call
00256 ** to close() the file descriptor is deferred until all of the locks clear.
00257 ** The openCnt structure keeps a list of file descriptors that need to
00258 ** be closed and that list is walked (and cleared) when the last lock
00259 ** clears.
00260 **
00261 ** First, under Linux threads, because each thread has a separate
00262 ** process ID, lock operations in one thread do not override locks
00263 ** to the same file in other threads.  Linux threads behave like
00264 ** separate processes in this respect.  But, if you close a file
00265 ** descriptor in linux threads, all locks are cleared, even locks
00266 ** on other threads and even though the other threads have different
00267 ** process IDs.  Linux threads is inconsistent in this respect.
00268 ** (I'm beginning to think that linux threads is an abomination too.)
00269 ** The consequence of this all is that the hash table for the lockInfo
00270 ** structure has to include the process id as part of its key because
00271 ** locks in different threads are treated as distinct.  But the 
00272 ** openCnt structure should not include the process id in its
00273 ** key because close() clears lock on all threads, not just the current
00274 ** thread.  Were it not for this goofiness in linux threads, we could
00275 ** combine the lockInfo and openCnt structures into a single structure.
00276 **
00277 ** 2004-Jun-28:
00278 ** On some versions of linux, threads can override each others locks.
00279 ** On others not.  Sometimes you can change the behavior on the same
00280 ** system by setting the LD_ASSUME_KERNEL environment variable.  The
00281 ** POSIX standard is silent as to which behavior is correct, as far
00282 ** as I can tell, so other versions of unix might show the same
00283 ** inconsistency.  There is no little doubt in my mind that posix
00284 ** advisory locks and linux threads are profoundly broken.
00285 **
00286 ** To work around the inconsistencies, we have to test at runtime 
00287 ** whether or not threads can override each others locks.  This test
00288 ** is run once, the first time any lock is attempted.  A static 
00289 ** variable is set to record the results of this test for future
00290 ** use.
00291 */
00292 
00293 /*
00294 ** An instance of the following structure serves as the key used
00295 ** to locate a particular lockInfo structure given its inode.
00296 **
00297 ** If threads cannot override each others locks, then we set the
00298 ** lockKey.tid field to the thread ID.  If threads can override
00299 ** each others locks then tid is always set to zero.  tid is omitted
00300 ** if we compile without threading support.
00301 */
00302 struct lockKey {
00303   dev_t dev;       /* Device number */
00304   ino_t ino;       /* Inode number */
00305 #if SQLITE_THREADSAFE
00306   pthread_t tid;   /* Thread ID or zero if threads can override each other */
00307 #endif
00308 };
00309 
00310 /*
00311 ** An instance of the following structure is allocated for each open
00312 ** inode on each thread with a different process ID.  (Threads have
00313 ** different process IDs on linux, but not on most other unixes.)
00314 **
00315 ** A single inode can have multiple file descriptors, so each unixFile
00316 ** structure contains a pointer to an instance of this object and this
00317 ** object keeps a count of the number of unixFile pointing to it.
00318 */
00319 struct lockInfo {
00320   struct lockKey key;  /* The lookup key */
00321   int cnt;             /* Number of SHARED locks held */
00322   int locktype;        /* One of SHARED_LOCK, RESERVED_LOCK etc. */
00323   int nRef;            /* Number of pointers to this structure */
00324   struct lockInfo *pNext, *pPrev;   /* List of all lockInfo objects */
00325 };
00326 
00327 /*
00328 ** An instance of the following structure serves as the key used
00329 ** to locate a particular openCnt structure given its inode.  This
00330 ** is the same as the lockKey except that the thread ID is omitted.
00331 */
00332 struct openKey {
00333   dev_t dev;   /* Device number */
00334   ino_t ino;   /* Inode number */
00335 };
00336 
00337 /*
00338 ** An instance of the following structure is allocated for each open
00339 ** inode.  This structure keeps track of the number of locks on that
00340 ** inode.  If a close is attempted against an inode that is holding
00341 ** locks, the close is deferred until all locks clear by adding the
00342 ** file descriptor to be closed to the pending list.
00343 */
00344 struct openCnt {
00345   struct openKey key;   /* The lookup key */
00346   int nRef;             /* Number of pointers to this structure */
00347   int nLock;            /* Number of outstanding locks */
00348   int nPending;         /* Number of pending close() operations */
00349   int *aPending;        /* Malloced space holding fd's awaiting a close() */
00350   struct openCnt *pNext, *pPrev;   /* List of all openCnt objects */
00351 };
00352 
00353 /*
00354 ** List of all lockInfo and openCnt objects.  This used to be a hash
00355 ** table.  But the number of objects is rarely more than a dozen and
00356 ** never exceeds a few thousand.  And lookup is not on a critical
00357 ** path oo a simple linked list will suffice.
00358 */
00359 static struct lockInfo *lockList = 0;
00360 static struct openCnt *openList = 0;
00361 
00362 /*
00363 ** The locking styles are associated with the different file locking
00364 ** capabilities supported by different file systems.  
00365 **
00366 ** POSIX locking style fully supports shared and exclusive byte-range locks 
00367 ** AFP locking only supports exclusive byte-range locks
00368 ** FLOCK only supports a single file-global exclusive lock
00369 ** DOTLOCK isn't a true locking style, it refers to the use of a special
00370 **   file named the same as the database file with a '.lock' extension, this
00371 **   can be used on file systems that do not offer any reliable file locking
00372 ** NO locking means that no locking will be attempted, this is only used for
00373 **   read-only file systems currently
00374 ** UNSUPPORTED means that no locking will be attempted, this is only used for
00375 **   file systems that are known to be unsupported
00376 */
00377 #define LOCKING_STYLE_POSIX        1
00378 #define LOCKING_STYLE_NONE         2
00379 #define LOCKING_STYLE_DOTFILE      3
00380 #define LOCKING_STYLE_FLOCK        4
00381 #define LOCKING_STYLE_AFP          5
00382 
00383 /*
00384 ** Only set the lastErrno if the error code is a real error and not 
00385 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK
00386 */
00387 #define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))
00388 
00389 /*
00390 ** Helper functions to obtain and relinquish the global mutex.
00391 */
00392 static void enterMutex(void){
00393   sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
00394 }
00395 static void leaveMutex(void){
00396   sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
00397 }
00398 
00399 #if SQLITE_THREADSAFE
00400 /*
00401 ** This variable records whether or not threads can override each others
00402 ** locks.
00403 **
00404 **    0:  No.  Threads cannot override each others locks.
00405 **    1:  Yes.  Threads can override each others locks.
00406 **   -1:  We don't know yet.
00407 **
00408 ** On some systems, we know at compile-time if threads can override each
00409 ** others locks.  On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
00410 ** will be set appropriately.  On other systems, we have to check at
00411 ** runtime.  On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
00412 ** undefined.
00413 **
00414 ** This variable normally has file scope only.  But during testing, we make
00415 ** it a global so that the test code can change its value in order to verify
00416 ** that the right stuff happens in either case.
00417 */
00418 #ifndef SQLITE_THREAD_OVERRIDE_LOCK
00419 # define SQLITE_THREAD_OVERRIDE_LOCK -1
00420 #endif
00421 #ifdef SQLITE_TEST
00422 int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
00423 #else
00424 static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
00425 #endif
00426 
00427 /*
00428 ** This structure holds information passed into individual test
00429 ** threads by the testThreadLockingBehavior() routine.
00430 */
00431 struct threadTestData {
00432   int fd;                /* File to be locked */
00433   struct flock lock;     /* The locking operation */
00434   int result;            /* Result of the locking operation */
00435 };
00436 
00437 #ifdef SQLITE_LOCK_TRACE
00438 /*
00439 ** Print out information about all locking operations.
00440 **
00441 ** This routine is used for troubleshooting locks on multithreaded
00442 ** platforms.  Enable by compiling with the -DSQLITE_LOCK_TRACE
00443 ** command-line option on the compiler.  This code is normally
00444 ** turned off.
00445 */
00446 static int lockTrace(int fd, int op, struct flock *p){
00447   char *zOpName, *zType;
00448   int s;
00449   int savedErrno;
00450   if( op==F_GETLK ){
00451     zOpName = "GETLK";
00452   }else if( op==F_SETLK ){
00453     zOpName = "SETLK";
00454   }else{
00455     s = fcntl(fd, op, p);
00456     sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
00457     return s;
00458   }
00459   if( p->l_type==F_RDLCK ){
00460     zType = "RDLCK";
00461   }else if( p->l_type==F_WRLCK ){
00462     zType = "WRLCK";
00463   }else if( p->l_type==F_UNLCK ){
00464     zType = "UNLCK";
00465   }else{
00466     assert( 0 );
00467   }
00468   assert( p->l_whence==SEEK_SET );
00469   s = fcntl(fd, op, p);
00470   savedErrno = errno;
00471   sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
00472      threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
00473      (int)p->l_pid, s);
00474   if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
00475     struct flock l2;
00476     l2 = *p;
00477     fcntl(fd, F_GETLK, &l2);
00478     if( l2.l_type==F_RDLCK ){
00479       zType = "RDLCK";
00480     }else if( l2.l_type==F_WRLCK ){
00481       zType = "WRLCK";
00482     }else if( l2.l_type==F_UNLCK ){
00483       zType = "UNLCK";
00484     }else{
00485       assert( 0 );
00486     }
00487     sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
00488        zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
00489   }
00490   errno = savedErrno;
00491   return s;
00492 }
00493 #define fcntl lockTrace
00494 #endif /* SQLITE_LOCK_TRACE */
00495 
00496 #ifdef __linux__
00497 /*
00498 ** This function is used as the main routine for a thread launched by
00499 ** testThreadLockingBehavior(). It tests whether the shared-lock obtained
00500 ** by the main thread in testThreadLockingBehavior() conflicts with a
00501 ** hypothetical write-lock obtained by this thread on the same file.
00502 **
00503 ** The write-lock is not actually acquired, as this is not possible if 
00504 ** the file is open in read-only mode (see ticket #3472).
00505 */ 
00506 static void *threadLockingTest(void *pArg){
00507   struct threadTestData *pData = (struct threadTestData*)pArg;
00508   pData->result = fcntl(pData->fd, F_GETLK, &pData->lock);
00509   return pArg;
00510 }
00511 
00512 /*
00513 ** This procedure attempts to determine whether or not threads
00514 ** can override each others locks then sets the 
00515 ** threadsOverrideEachOthersLocks variable appropriately.
00516 */
00517 static void testThreadLockingBehavior(int fd_orig){
00518   int fd;
00519   int rc;
00520   struct threadTestData d;
00521   struct flock l;
00522   pthread_t t;
00523 
00524   fd = dup(fd_orig);
00525   if( fd<0 ) return;
00526   memset(&l, 0, sizeof(l));
00527   l.l_type = F_RDLCK;
00528   l.l_len = 1;
00529   l.l_start = 0;
00530   l.l_whence = SEEK_SET;
00531   rc = fcntl(fd_orig, F_SETLK, &l);
00532   if( rc!=0 ) return;
00533   memset(&d, 0, sizeof(d));
00534   d.fd = fd;
00535   d.lock = l;
00536   d.lock.l_type = F_WRLCK;
00537   pthread_create(&t, 0, threadLockingTest, &d);
00538   pthread_join(t, 0);
00539   close(fd);
00540   if( d.result!=0 ) return;
00541   threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK);
00542 }
00543 #else
00544 /*
00545 ** On anything other than linux, assume threads override each others locks.
00546 */
00547 static void testThreadLockingBehavior(int fd_orig){
00548   threadsOverrideEachOthersLocks = 1;
00549 }
00550 #endif /* __linux__ */
00551 
00552 #endif /* SQLITE_THREADSAFE */
00553 
00554 /*
00555 ** Release a lockInfo structure previously allocated by findLockInfo().
00556 */
00557 static void releaseLockInfo(struct lockInfo *pLock){
00558   if( pLock ){
00559     pLock->nRef--;
00560     if( pLock->nRef==0 ){
00561       if( pLock->pPrev ){
00562         assert( pLock->pPrev->pNext==pLock );
00563         pLock->pPrev->pNext = pLock->pNext;
00564       }else{
00565         assert( lockList==pLock );
00566         lockList = pLock->pNext;
00567       }
00568       if( pLock->pNext ){
00569         assert( pLock->pNext->pPrev==pLock );
00570         pLock->pNext->pPrev = pLock->pPrev;
00571       }
00572       sqlite3_free(pLock);
00573     }
00574   }
00575 }
00576 
00577 /*
00578 ** Release a openCnt structure previously allocated by findLockInfo().
00579 */
00580 static void releaseOpenCnt(struct openCnt *pOpen){
00581   if( pOpen ){
00582     pOpen->nRef--;
00583     if( pOpen->nRef==0 ){
00584       if( pOpen->pPrev ){
00585         assert( pOpen->pPrev->pNext==pOpen );
00586         pOpen->pPrev->pNext = pOpen->pNext;
00587       }else{
00588         assert( openList==pOpen );
00589         openList = pOpen->pNext;
00590       }
00591       if( pOpen->pNext ){
00592         assert( pOpen->pNext->pPrev==pOpen );
00593         pOpen->pNext->pPrev = pOpen->pPrev;
00594       }
00595       sqlite3_free(pOpen->aPending);
00596       sqlite3_free(pOpen);
00597     }
00598   }
00599 }
00600 
00601 #if SQLITE_ENABLE_LOCKING_STYLE
00602 /*
00603 ** Tests a byte-range locking query to see if byte range locks are 
00604 ** supported, if not we fall back to dotlockLockingStyle.
00605 */
00606 static int testLockingStyle(int fd){
00607   struct flock lockInfo;
00608 
00609   /* Test byte-range lock using fcntl(). If the call succeeds, 
00610   ** assume that the file-system supports POSIX style locks. 
00611   */
00612   lockInfo.l_len = 1;
00613   lockInfo.l_start = 0;
00614   lockInfo.l_whence = SEEK_SET;
00615   lockInfo.l_type = F_RDLCK;
00616   if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
00617     return LOCKING_STYLE_POSIX;
00618   }
00619   
00620   /* Testing for flock() can give false positives.  So if if the above 
00621   ** test fails, then we fall back to using dot-file style locking.
00622   */  
00623   return LOCKING_STYLE_DOTFILE;
00624 }
00625 #endif
00626 
00627 /* 
00628 ** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the 
00629 ** f_fstypename entry in the statfs structure as returned by stat() for 
00630 ** the file system hosting the database file and selects  the appropriate
00631 ** locking style based on its value.  These values and assignments are 
00632 ** based on Darwin/OSX behavior and have not been thoroughly tested on 
00633 ** other systems.
00634 **
00635 ** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
00636 ** returns LOCKING_STYLE_POSIX.
00637 */
00638 static int detectLockingStyle(
00639   sqlite3_vfs *pVfs,
00640   const char *filePath, 
00641   int fd
00642 ){
00643 #if SQLITE_ENABLE_LOCKING_STYLE
00644   struct Mapping {
00645     const char *zFilesystem;
00646     int eLockingStyle;
00647   } aMap[] = {
00648     { "hfs",    LOCKING_STYLE_POSIX },
00649     { "ufs",    LOCKING_STYLE_POSIX },
00650     { "afpfs",  LOCKING_STYLE_AFP },
00651 #ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
00652     { "smbfs",  LOCKING_STYLE_AFP },
00653 #else
00654     { "smbfs",  LOCKING_STYLE_FLOCK },
00655 #endif
00656     { "msdos",  LOCKING_STYLE_DOTFILE },
00657     { "webdav", LOCKING_STYLE_NONE },
00658     { 0, 0 }
00659   };
00660   int i;
00661   struct statfs fsInfo;
00662 
00663   if( !filePath ){
00664     return LOCKING_STYLE_NONE;
00665   }
00666   if( pVfs->pAppData ){
00667     return SQLITE_PTR_TO_INT(pVfs->pAppData);
00668   }
00669 
00670   if( statfs(filePath, &fsInfo) != -1 ){
00671     if( fsInfo.f_flags & MNT_RDONLY ){
00672       return LOCKING_STYLE_NONE;
00673     }
00674     for(i=0; aMap[i].zFilesystem; i++){
00675       if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
00676         return aMap[i].eLockingStyle;
00677       }
00678     }
00679   }
00680 
00681   /* Default case. Handles, amongst others, "nfs". */
00682   return testLockingStyle(fd);  
00683 #endif
00684   return LOCKING_STYLE_POSIX;
00685 }
00686 
00687 /*
00688 ** Given a file descriptor, locate lockInfo and openCnt structures that
00689 ** describes that file descriptor.  Create new ones if necessary.  The
00690 ** return values might be uninitialized if an error occurs.
00691 **
00692 ** Return an appropriate error code.
00693 */
00694 static int findLockInfo(
00695   int fd,                      /* The file descriptor used in the key */
00696   struct lockInfo **ppLock,    /* Return the lockInfo structure here */
00697   struct openCnt **ppOpen      /* Return the openCnt structure here */
00698 ){
00699   int rc;
00700   struct lockKey key1;
00701   struct openKey key2;
00702   struct stat statbuf;
00703   struct lockInfo *pLock;
00704   struct openCnt *pOpen;
00705   rc = fstat(fd, &statbuf);
00706   if( rc!=0 ){
00707 #ifdef EOVERFLOW
00708     if( errno==EOVERFLOW ) return SQLITE_NOLFS;
00709 #endif
00710     return SQLITE_IOERR;
00711   }
00712 
00713   /* On OS X on an msdos filesystem, the inode number is reported
00714   ** incorrectly for zero-size files.  See ticket #3260.  To work
00715   ** around this problem (we consider it a bug in OS X, not SQLite)
00716   ** we always increase the file size to 1 by writing a single byte
00717   ** prior to accessing the inode number.  The one byte written is
00718   ** an ASCII 'S' character which also happens to be the first byte
00719   ** in the header of every SQLite database.  In this way, if there
00720   ** is a race condition such that another thread has already populated
00721   ** the first page of the database, no damage is done.
00722   */
00723   if( statbuf.st_size==0 ){
00724     write(fd, "S", 1);
00725     rc = fstat(fd, &statbuf);
00726     if( rc!=0 ){
00727       return SQLITE_IOERR;
00728     }
00729   }
00730 
00731   memset(&key1, 0, sizeof(key1));
00732   key1.dev = statbuf.st_dev;
00733   key1.ino = statbuf.st_ino;
00734 #if SQLITE_THREADSAFE
00735   if( threadsOverrideEachOthersLocks<0 ){
00736     testThreadLockingBehavior(fd);
00737   }
00738   key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
00739 #endif
00740   memset(&key2, 0, sizeof(key2));
00741   key2.dev = statbuf.st_dev;
00742   key2.ino = statbuf.st_ino;
00743   pLock = lockList;
00744   while( pLock && memcmp(&key1, &pLock->key, sizeof(key1)) ){
00745     pLock = pLock->pNext;
00746   }
00747   if( pLock==0 ){
00748     pLock = sqlite3_malloc( sizeof(*pLock) );
00749     if( pLock==0 ){
00750       rc = SQLITE_NOMEM;
00751       goto exit_findlockinfo;
00752     }
00753     pLock->key = key1;
00754     pLock->nRef = 1;
00755     pLock->cnt = 0;
00756     pLock->locktype = 0;
00757     pLock->pNext = lockList;
00758     pLock->pPrev = 0;
00759     if( lockList ) lockList->pPrev = pLock;
00760     lockList = pLock;
00761   }else{
00762     pLock->nRef++;
00763   }
00764   *ppLock = pLock;
00765   if( ppOpen!=0 ){
00766     pOpen = openList;
00767     while( pOpen && memcmp(&key2, &pOpen->key, sizeof(key2)) ){
00768       pOpen = pOpen->pNext;
00769     }
00770     if( pOpen==0 ){
00771       pOpen = sqlite3_malloc( sizeof(*pOpen) );
00772       if( pOpen==0 ){
00773         releaseLockInfo(pLock);
00774         rc = SQLITE_NOMEM;
00775         goto exit_findlockinfo;
00776       }
00777       pOpen->key = key2;
00778       pOpen->nRef = 1;
00779       pOpen->nLock = 0;
00780       pOpen->nPending = 0;
00781       pOpen->aPending = 0;
00782       pOpen->pNext = openList;
00783       pOpen->pPrev = 0;
00784       if( openList ) openList->pPrev = pOpen;
00785       openList = pOpen;
00786     }else{
00787       pOpen->nRef++;
00788     }
00789     *ppOpen = pOpen;
00790   }
00791 
00792 exit_findlockinfo:
00793   return rc;
00794 }
00795 
00796 #ifdef SQLITE_DEBUG
00797 /*
00798 ** Helper function for printing out trace information from debugging
00799 ** binaries. This returns the string represetation of the supplied
00800 ** integer lock-type.
00801 */
00802 static const char *locktypeName(int locktype){
00803   switch( locktype ){
00804   case NO_LOCK: return "NONE";
00805   case SHARED_LOCK: return "SHARED";
00806   case RESERVED_LOCK: return "RESERVED";
00807   case PENDING_LOCK: return "PENDING";
00808   case EXCLUSIVE_LOCK: return "EXCLUSIVE";
00809   }
00810   return "ERROR";
00811 }
00812 #endif
00813 
00814 /*
00815 ** If we are currently in a different thread than the thread that the
00816 ** unixFile argument belongs to, then transfer ownership of the unixFile
00817 ** over to the current thread.
00818 **
00819 ** A unixFile is only owned by a thread on systems where one thread is
00820 ** unable to override locks created by a different thread.  RedHat9 is
00821 ** an example of such a system.
00822 **
00823 ** Ownership transfer is only allowed if the unixFile is currently unlocked.
00824 ** If the unixFile is locked and an ownership is wrong, then return
00825 ** SQLITE_MISUSE.  SQLITE_OK is returned if everything works.
00826 */
00827 #if SQLITE_THREADSAFE
00828 static int transferOwnership(unixFile *pFile){
00829   int rc;
00830   pthread_t hSelf;
00831   if( threadsOverrideEachOthersLocks ){
00832     /* Ownership transfers not needed on this system */
00833     return SQLITE_OK;
00834   }
00835   hSelf = pthread_self();
00836   if( pthread_equal(pFile->tid, hSelf) ){
00837     /* We are still in the same thread */
00838     OSTRACE1("No-transfer, same thread\n");
00839     return SQLITE_OK;
00840   }
00841   if( pFile->locktype!=NO_LOCK ){
00842     /* We cannot change ownership while we are holding a lock! */
00843     return SQLITE_MISUSE;
00844   }
00845   OSTRACE4("Transfer ownership of %d from %d to %d\n",
00846             pFile->h, pFile->tid, hSelf);
00847   pFile->tid = hSelf;
00848   if (pFile->pLock != NULL) {
00849     releaseLockInfo(pFile->pLock);
00850     rc = findLockInfo(pFile->h, &pFile->pLock, 0);
00851     OSTRACE5("LOCK    %d is now %s(%s,%d)\n", pFile->h,
00852            locktypeName(pFile->locktype),
00853            locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
00854     return rc;
00855   } else {
00856     return SQLITE_OK;
00857   }
00858 }
00859 #else
00860   /* On single-threaded builds, ownership transfer is a no-op */
00861 # define transferOwnership(X) SQLITE_OK
00862 #endif
00863 
00864 /*
00865 ** Seek to the offset passed as the second argument, then read cnt 
00866 ** bytes into pBuf. Return the number of bytes actually read.
00867 **
00868 ** NB:  If you define USE_PREAD or USE_PREAD64, then it might also
00869 ** be necessary to define _XOPEN_SOURCE to be 500.  This varies from
00870 ** one system to another.  Since SQLite does not define USE_PREAD
00871 ** any any form by default, we will not attempt to define _XOPEN_SOURCE.
00872 ** See tickets #2741 and #2681.
00873 */
00874 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
00875   int got;
00876   i64 newOffset;
00877   TIMER_START;
00878 #if defined(USE_PREAD)
00879   got = pread(id->h, pBuf, cnt, offset);
00880   SimulateIOError( got = -1 );
00881 #elif defined(USE_PREAD64)
00882   got = pread64(id->h, pBuf, cnt, offset);
00883   SimulateIOError( got = -1 );
00884 #else
00885   newOffset = lseek(id->h, offset, SEEK_SET);
00886   SimulateIOError( newOffset-- );
00887   if( newOffset!=offset ){
00888     return -1;
00889   }
00890   got = read(id->h, pBuf, cnt);
00891 #endif
00892   TIMER_END;
00893   OSTRACE5("READ    %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
00894   return got;
00895 }
00896 
00897 /*
00898 ** Read data from a file into a buffer.  Return SQLITE_OK if all
00899 ** bytes were read successfully and SQLITE_IOERR if anything goes
00900 ** wrong.
00901 */
00902 static int unixRead(
00903   sqlite3_file *id, 
00904   void *pBuf, 
00905   int amt,
00906   sqlite3_int64 offset
00907 ){
00908   int got;
00909   assert( id );
00910   got = seekAndRead((unixFile*)id, offset, pBuf, amt);
00911   if( got==amt ){
00912     return SQLITE_OK;
00913   }else if( got<0 ){
00914     return SQLITE_IOERR_READ;
00915   }else{
00916     /* Unread parts of the buffer must be zero-filled */
00917     memset(&((char*)pBuf)[got], 0, amt-got);
00918     return SQLITE_IOERR_SHORT_READ;
00919   }
00920 }
00921 
00922 /*
00923 ** Seek to the offset in id->offset then read cnt bytes into pBuf.
00924 ** Return the number of bytes actually read.  Update the offset.
00925 */
00926 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
00927   int got;
00928   i64 newOffset;
00929   TIMER_START;
00930 #if defined(USE_PREAD)
00931   got = pwrite(id->h, pBuf, cnt, offset);
00932 #elif defined(USE_PREAD64)
00933   got = pwrite64(id->h, pBuf, cnt, offset);
00934 #else
00935   newOffset = lseek(id->h, offset, SEEK_SET);
00936   if( newOffset!=offset ){
00937     return -1;
00938   }
00939   got = write(id->h, pBuf, cnt);
00940 #endif
00941   TIMER_END;
00942   OSTRACE5("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
00943   return got;
00944 }
00945 
00946 
00947 /*
00948 ** Write data from a buffer into a file.  Return SQLITE_OK on success
00949 ** or some other error code on failure.
00950 */
00951 static int unixWrite(
00952   sqlite3_file *id, 
00953   const void *pBuf, 
00954   int amt,
00955   sqlite3_int64 offset 
00956 ){
00957   int wrote = 0;
00958   assert( id );
00959   assert( amt>0 );
00960   while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
00961     amt -= wrote;
00962     offset += wrote;
00963     pBuf = &((char*)pBuf)[wrote];
00964   }
00965   SimulateIOError(( wrote=(-1), amt=1 ));
00966   SimulateDiskfullError(( wrote=0, amt=1 ));
00967   if( amt>0 ){
00968     if( wrote<0 ){
00969       return SQLITE_IOERR_WRITE;
00970     }else{
00971       return SQLITE_FULL;
00972     }
00973   }
00974   return SQLITE_OK;
00975 }
00976 
00977 #ifdef SQLITE_TEST
00978 /*
00979 ** Count the number of fullsyncs and normal syncs.  This is used to test
00980 ** that syncs and fullsyncs are occuring at the right times.
00981 */
00982 int sqlite3_sync_count = 0;
00983 int sqlite3_fullsync_count = 0;
00984 #endif
00985 
00986 /*
00987 ** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
00988 ** Otherwise use fsync() in its place.
00989 */
00990 #ifndef HAVE_FDATASYNC
00991 # define fdatasync fsync
00992 #endif
00993 
00994 /*
00995 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
00996 ** the F_FULLFSYNC macro is defined.  F_FULLFSYNC is currently
00997 ** only available on Mac OS X.  But that could change.
00998 */
00999 #ifdef F_FULLFSYNC
01000 # define HAVE_FULLFSYNC 1
01001 #else
01002 # define HAVE_FULLFSYNC 0
01003 #endif
01004 
01005 
01006 /*
01007 ** The fsync() system call does not work as advertised on many
01008 ** unix systems.  The following procedure is an attempt to make
01009 ** it work better.
01010 **
01011 ** The SQLITE_NO_SYNC macro disables all fsync()s.  This is useful
01012 ** for testing when we want to run through the test suite quickly.
01013 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
01014 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
01015 ** or power failure will likely corrupt the database file.
01016 */
01017 static int full_fsync(int fd, int fullSync, int dataOnly){
01018   int rc;
01019 
01020   /* Record the number of times that we do a normal fsync() and 
01021   ** FULLSYNC.  This is used during testing to verify that this procedure
01022   ** gets called with the correct arguments.
01023   */
01024 #ifdef SQLITE_TEST
01025   if( fullSync ) sqlite3_fullsync_count++;
01026   sqlite3_sync_count++;
01027 #endif
01028 
01029   /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
01030   ** no-op
01031   */
01032 #ifdef SQLITE_NO_SYNC
01033   rc = SQLITE_OK;
01034 #else
01035 
01036 #if HAVE_FULLFSYNC
01037   if( fullSync ){
01038     rc = fcntl(fd, F_FULLFSYNC, 0);
01039   }else{
01040     rc = 1;
01041   }
01042   /* If the FULLFSYNC failed, fall back to attempting an fsync().
01043    * It shouldn't be possible for fullfsync to fail on the local 
01044    * file system (on OSX), so failure indicates that FULLFSYNC
01045    * isn't supported for this file system. So, attempt an fsync 
01046    * and (for now) ignore the overhead of a superfluous fcntl call.  
01047    * It'd be better to detect fullfsync support once and avoid 
01048    * the fcntl call every time sync is called.
01049    */
01050   if( rc ) rc = fsync(fd);
01051 
01052 #else 
01053   if( dataOnly ){
01054     rc = fdatasync(fd);
01055   }else{
01056     rc = fsync(fd);
01057   }
01058 #endif /* HAVE_FULLFSYNC */
01059 #endif /* defined(SQLITE_NO_SYNC) */
01060 
01061   return rc;
01062 }
01063 
01064 /*
01065 ** Make sure all writes to a particular file are committed to disk.
01066 **
01067 ** If dataOnly==0 then both the file itself and its metadata (file
01068 ** size, access time, etc) are synced.  If dataOnly!=0 then only the
01069 ** file data is synced.
01070 **
01071 ** Under Unix, also make sure that the directory entry for the file
01072 ** has been created by fsync-ing the directory that contains the file.
01073 ** If we do not do this and we encounter a power failure, the directory
01074 ** entry for the journal might not exist after we reboot.  The next
01075 ** SQLite to access the file will not know that the journal exists (because
01076 ** the directory entry for the journal was never created) and the transaction
01077 ** will not roll back - possibly leading to database corruption.
01078 */
01079 static int unixSync(sqlite3_file *id, int flags){
01080   int rc;
01081   unixFile *pFile = (unixFile*)id;
01082 
01083   int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
01084   int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
01085 
01086   /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
01087   assert((flags&0x0F)==SQLITE_SYNC_NORMAL
01088       || (flags&0x0F)==SQLITE_SYNC_FULL
01089   );
01090 
01091   /* Unix cannot, but some systems may return SQLITE_FULL from here. This
01092   ** line is to test that doing so does not cause any problems.
01093   */
01094   SimulateDiskfullError( return SQLITE_FULL );
01095 
01096   assert( pFile );
01097   OSTRACE2("SYNC    %-3d\n", pFile->h);
01098   rc = full_fsync(pFile->h, isFullsync, isDataOnly);
01099   SimulateIOError( rc=1 );
01100   if( rc ){
01101     return SQLITE_IOERR_FSYNC;
01102   }
01103   if( pFile->dirfd>=0 ){
01104     OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
01105             HAVE_FULLFSYNC, isFullsync);
01106 #ifndef SQLITE_DISABLE_DIRSYNC
01107     /* The directory sync is only attempted if full_fsync is
01108     ** turned off or unavailable.  If a full_fsync occurred above,
01109     ** then the directory sync is superfluous.
01110     */
01111     if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
01112        /*
01113        ** We have received multiple reports of fsync() returning
01114        ** errors when applied to directories on certain file systems.
01115        ** A failed directory sync is not a big deal.  So it seems
01116        ** better to ignore the error.  Ticket #1657
01117        */
01118        /* return SQLITE_IOERR; */
01119     }
01120 #endif
01121     close(pFile->dirfd);  /* Only need to sync once, so close the directory */
01122     pFile->dirfd = -1;    /* when we are done. */
01123   }
01124   return SQLITE_OK;
01125 }
01126 
01127 /*
01128 ** Truncate an open file to a specified size
01129 */
01130 static int unixTruncate(sqlite3_file *id, i64 nByte){
01131   int rc;
01132   assert( id );
01133   SimulateIOError( return SQLITE_IOERR_TRUNCATE );
01134   rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
01135   if( rc ){
01136     return SQLITE_IOERR_TRUNCATE;
01137   }else{
01138     return SQLITE_OK;
01139   }
01140 }
01141 
01142 /*
01143 ** Determine the current size of a file in bytes
01144 */
01145 static int unixFileSize(sqlite3_file *id, i64 *pSize){
01146   int rc;
01147   struct stat buf;
01148   assert( id );
01149   rc = fstat(((unixFile*)id)->h, &buf);
01150   SimulateIOError( rc=1 );
01151   if( rc!=0 ){
01152     return SQLITE_IOERR_FSTAT;
01153   }
01154   *pSize = buf.st_size;
01155 
01156   /* When opening a zero-size database, the findLockInfo() procedure
01157   ** writes a single byte into that file in order to work around a bug
01158   ** in the OS-X msdos filesystem.  In order to avoid problems with upper
01159   ** layers, we need to report this file size as zero even though it is
01160   ** really 1.   Ticket #3260.
01161   */
01162   if( *pSize==1 ) *pSize = 0;
01163 
01164 
01165   return SQLITE_OK;
01166 }
01167 
01168 /*
01169 ** This routine translates a standard POSIX errno code into something
01170 ** useful to the clients of the sqlite3 functions.  Specifically, it is
01171 ** intended to translate a variety of "try again" errors into SQLITE_BUSY
01172 ** and a variety of "please close the file descriptor NOW" errors into 
01173 ** SQLITE_IOERR
01174 ** 
01175 ** Errors during initialization of locks, or file system support for locks,
01176 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
01177 */
01178 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
01179   switch (posixError) {
01180   case 0: 
01181     return SQLITE_OK;
01182     
01183   case EAGAIN:
01184   case ETIMEDOUT:
01185   case EBUSY:
01186   case EINTR:
01187   case ENOLCK:  
01188     /* random NFS retry error, unless during file system support 
01189      * introspection, in which it actually means what it says */
01190     return SQLITE_BUSY;
01191     
01192   case EACCES: 
01193     /* EACCES is like EAGAIN during locking operations, but not any other time*/
01194     if( (sqliteIOErr == SQLITE_IOERR_LOCK) || 
01195   (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 
01196   (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
01197   (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
01198       return SQLITE_BUSY;
01199     }
01200     /* else fall through */
01201   case EPERM: 
01202     return SQLITE_PERM;
01203     
01204   case EDEADLK:
01205     return SQLITE_IOERR_BLOCKED;
01206     
01207 #if EOPNOTSUPP!=ENOTSUP
01208   case EOPNOTSUPP: 
01209     /* something went terribly awry, unless during file system support 
01210      * introspection, in which it actually means what it says */
01211 #endif
01212 #ifdef ENOTSUP
01213   case ENOTSUP: 
01214     /* invalid fd, unless during file system support introspection, in which 
01215      * it actually means what it says */
01216 #endif
01217   case EIO:
01218   case EBADF:
01219   case EINVAL:
01220   case ENOTCONN:
01221   case ENODEV:
01222   case ENXIO:
01223   case ENOENT:
01224   case ESTALE:
01225   case ENOSYS:
01226     /* these should force the client to close the file and reconnect */
01227     
01228   default: 
01229     return sqliteIOErr;
01230   }
01231 }
01232 
01233 /*
01234 ** This routine checks if there is a RESERVED lock held on the specified
01235 ** file by this or any other process. If such a lock is held, set *pResOut
01236 ** to a non-zero value otherwise *pResOut is set to zero.  The return value
01237 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
01238 */
01239 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
01240   int rc = SQLITE_OK;
01241   int reserved = 0;
01242   unixFile *pFile = (unixFile*)id;
01243 
01244   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
01245 
01246   assert( pFile );
01247   enterMutex(); /* Because pFile->pLock is shared across threads */
01248 
01249   /* Check if a thread in this process holds such a lock */
01250   if( pFile->pLock->locktype>SHARED_LOCK ){
01251     reserved = 1;
01252   }
01253 
01254   /* Otherwise see if some other process holds it.
01255   */
01256   if( !reserved ){
01257     struct flock lock;
01258     lock.l_whence = SEEK_SET;
01259     lock.l_start = RESERVED_BYTE;
01260     lock.l_len = 1;
01261     lock.l_type = F_WRLCK;
01262     if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
01263       int tErrno = errno;
01264       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
01265       pFile->lastErrno = tErrno;
01266     } else if( lock.l_type!=F_UNLCK ){
01267       reserved = 1;
01268     }
01269   }
01270   
01271   leaveMutex();
01272   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
01273 
01274   *pResOut = reserved;
01275   return rc;
01276 }
01277 
01278 /*
01279 ** Lock the file with the lock specified by parameter locktype - one
01280 ** of the following:
01281 **
01282 **     (1) SHARED_LOCK
01283 **     (2) RESERVED_LOCK
01284 **     (3) PENDING_LOCK
01285 **     (4) EXCLUSIVE_LOCK
01286 **
01287 ** Sometimes when requesting one lock state, additional lock states
01288 ** are inserted in between.  The locking might fail on one of the later
01289 ** transitions leaving the lock state different from what it started but
01290 ** still short of its goal.  The following chart shows the allowed
01291 ** transitions and the inserted intermediate states:
01292 **
01293 **    UNLOCKED -> SHARED
01294 **    SHARED -> RESERVED
01295 **    SHARED -> (PENDING) -> EXCLUSIVE
01296 **    RESERVED -> (PENDING) -> EXCLUSIVE
01297 **    PENDING -> EXCLUSIVE
01298 **
01299 ** This routine will only increase a lock.  Use the sqlite3OsUnlock()
01300 ** routine to lower a locking level.
01301 */
01302 static int unixLock(sqlite3_file *id, int locktype){
01303   /* The following describes the implementation of the various locks and
01304   ** lock transitions in terms of the POSIX advisory shared and exclusive
01305   ** lock primitives (called read-locks and write-locks below, to avoid
01306   ** confusion with SQLite lock names). The algorithms are complicated
01307   ** slightly in order to be compatible with windows systems simultaneously
01308   ** accessing the same database file, in case that is ever required.
01309   **
01310   ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
01311   ** byte', each single bytes at well known offsets, and the 'shared byte
01312   ** range', a range of 510 bytes at a well known offset.
01313   **
01314   ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
01315   ** byte'.  If this is successful, a random byte from the 'shared byte
01316   ** range' is read-locked and the lock on the 'pending byte' released.
01317   **
01318   ** A process may only obtain a RESERVED lock after it has a SHARED lock.
01319   ** A RESERVED lock is implemented by grabbing a write-lock on the
01320   ** 'reserved byte'. 
01321   **
01322   ** A process may only obtain a PENDING lock after it has obtained a
01323   ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
01324   ** on the 'pending byte'. This ensures that no new SHARED locks can be
01325   ** obtained, but existing SHARED locks are allowed to persist. A process
01326   ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
01327   ** This property is used by the algorithm for rolling back a journal file
01328   ** after a crash.
01329   **
01330   ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
01331   ** implemented by obtaining a write-lock on the entire 'shared byte
01332   ** range'. Since all other locks require a read-lock on one of the bytes
01333   ** within this range, this ensures that no other locks are held on the
01334   ** database. 
01335   **
01336   ** The reason a single byte cannot be used instead of the 'shared byte
01337   ** range' is that some versions of windows do not support read-locks. By
01338   ** locking a random byte from a range, concurrent SHARED locks may exist
01339   ** even if the locking primitive used is always a write-lock.
01340   */
01341   int rc = SQLITE_OK;
01342   unixFile *pFile = (unixFile*)id;
01343   struct lockInfo *pLock = pFile->pLock;
01344   struct flock lock;
01345   int s;
01346 
01347   assert( pFile );
01348   OSTRACE7("LOCK    %d %s was %s(%s,%d) pid=%d\n", pFile->h,
01349       locktypeName(locktype), locktypeName(pFile->locktype),
01350       locktypeName(pLock->locktype), pLock->cnt , getpid());
01351 
01352   /* If there is already a lock of this type or more restrictive on the
01353   ** unixFile, do nothing. Don't use the end_lock: exit path, as
01354   ** enterMutex() hasn't been called yet.
01355   */
01356   if( pFile->locktype>=locktype ){
01357     OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
01358             locktypeName(locktype));
01359     return SQLITE_OK;
01360   }
01361 
01362   /* Make sure the locking sequence is correct
01363   */
01364   assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
01365   assert( locktype!=PENDING_LOCK );
01366   assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
01367 
01368   /* This mutex is needed because pFile->pLock is shared across threads
01369   */
01370   enterMutex();
01371 
01372   /* Make sure the current thread owns the pFile.
01373   */
01374   rc = transferOwnership(pFile);
01375   if( rc!=SQLITE_OK ){
01376     leaveMutex();
01377     return rc;
01378   }
01379   pLock = pFile->pLock;
01380 
01381   /* If some thread using this PID has a lock via a different unixFile*
01382   ** handle that precludes the requested lock, return BUSY.
01383   */
01384   if( (pFile->locktype!=pLock->locktype && 
01385           (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
01386   ){
01387     rc = SQLITE_BUSY;
01388     goto end_lock;
01389   }
01390 
01391   /* If a SHARED lock is requested, and some thread using this PID already
01392   ** has a SHARED or RESERVED lock, then increment reference counts and
01393   ** return SQLITE_OK.
01394   */
01395   if( locktype==SHARED_LOCK && 
01396       (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
01397     assert( locktype==SHARED_LOCK );
01398     assert( pFile->locktype==0 );
01399     assert( pLock->cnt>0 );
01400     pFile->locktype = SHARED_LOCK;
01401     pLock->cnt++;
01402     pFile->pOpen->nLock++;
01403     goto end_lock;
01404   }
01405 
01406   lock.l_len = 1L;
01407 
01408   lock.l_whence = SEEK_SET;
01409 
01410   /* A PENDING lock is needed before acquiring a SHARED lock and before
01411   ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
01412   ** be released.
01413   */
01414   if( locktype==SHARED_LOCK 
01415       || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
01416   ){
01417     lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
01418     lock.l_start = PENDING_BYTE;
01419     s = fcntl(pFile->h, F_SETLK, &lock);
01420     if( s==(-1) ){
01421       int tErrno = errno;
01422       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
01423       if( IS_LOCK_ERROR(rc) ){
01424         pFile->lastErrno = tErrno;
01425       }
01426       goto end_lock;
01427     }
01428   }
01429 
01430 
01431   /* If control gets to this point, then actually go ahead and make
01432   ** operating system calls for the specified lock.
01433   */
01434   if( locktype==SHARED_LOCK ){
01435     int tErrno = 0;
01436     assert( pLock->cnt==0 );
01437     assert( pLock->locktype==0 );
01438 
01439     /* Now get the read-lock */
01440     lock.l_start = SHARED_FIRST;
01441     lock.l_len = SHARED_SIZE;
01442     if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
01443       tErrno = errno;
01444     }
01445     /* Drop the temporary PENDING lock */
01446     lock.l_start = PENDING_BYTE;
01447     lock.l_len = 1L;
01448     lock.l_type = F_UNLCK;
01449     if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
01450       if( s != -1 ){
01451         /* This could happen with a network mount */
01452         tErrno = errno; 
01453         rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 
01454         if( IS_LOCK_ERROR(rc) ){
01455           pFile->lastErrno = tErrno;
01456         }
01457         goto end_lock;
01458       }
01459     }
01460     if( s==(-1) ){
01461       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
01462       if( IS_LOCK_ERROR(rc) ){
01463         pFile->lastErrno = tErrno;
01464       }
01465     }else{
01466       pFile->locktype = SHARED_LOCK;
01467       pFile->pOpen->nLock++;
01468       pLock->cnt = 1;
01469     }
01470   }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
01471     /* We are trying for an exclusive lock but another thread in this
01472     ** same process is still holding a shared lock. */
01473     rc = SQLITE_BUSY;
01474   }else{
01475     /* The request was for a RESERVED or EXCLUSIVE lock.  It is
01476     ** assumed that there is a SHARED or greater lock on the file
01477     ** already.
01478     */
01479     assert( 0!=pFile->locktype );
01480     lock.l_type = F_WRLCK;
01481     switch( locktype ){
01482       case RESERVED_LOCK:
01483         lock.l_start = RESERVED_BYTE;
01484         break;
01485       case EXCLUSIVE_LOCK:
01486         lock.l_start = SHARED_FIRST;
01487         lock.l_len = SHARED_SIZE;
01488         break;
01489       default:
01490         assert(0);
01491     }
01492     s = fcntl(pFile->h, F_SETLK, &lock);
01493     if( s==(-1) ){
01494       int tErrno = errno;
01495       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
01496       if( IS_LOCK_ERROR(rc) ){
01497         pFile->lastErrno = tErrno;
01498       }
01499     }
01500   }
01501   
01502   if( rc==SQLITE_OK ){
01503     pFile->locktype = locktype;
01504     pLock->locktype = locktype;
01505   }else if( locktype==EXCLUSIVE_LOCK ){
01506     pFile->locktype = PENDING_LOCK;
01507     pLock->locktype = PENDING_LOCK;
01508   }
01509 
01510 end_lock:
01511   leaveMutex();
01512   OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
01513       rc==SQLITE_OK ? "ok" : "failed");
01514   return rc;
01515 }
01516 
01517 /*
01518 ** Lower the locking level on file descriptor pFile to locktype.  locktype
01519 ** must be either NO_LOCK or SHARED_LOCK.
01520 **
01521 ** If the locking level of the file descriptor is already at or below
01522 ** the requested locking level, this routine is a no-op.
01523 */
01524 static int unixUnlock(sqlite3_file *id, int locktype){
01525   struct lockInfo *pLock;
01526   struct flock lock;
01527   int rc = SQLITE_OK;
01528   unixFile *pFile = (unixFile*)id;
01529   int h;
01530 
01531   assert( pFile );
01532   OSTRACE7("UNLOCK  %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
01533       pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
01534 
01535   assert( locktype<=SHARED_LOCK );
01536   if( pFile->locktype<=locktype ){
01537     return SQLITE_OK;
01538   }
01539   if( CHECK_THREADID(pFile) ){
01540     return SQLITE_MISUSE;
01541   }
01542   enterMutex();
01543   h = pFile->h;
01544   pLock = pFile->pLock;
01545   assert( pLock->cnt!=0 );
01546   if( pFile->locktype>SHARED_LOCK ){
01547     assert( pLock->locktype==pFile->locktype );
01548     SimulateIOErrorBenign(1);
01549     SimulateIOError( h=(-1) )
01550     SimulateIOErrorBenign(0);
01551     if( locktype==SHARED_LOCK ){
01552       lock.l_type = F_RDLCK;
01553       lock.l_whence = SEEK_SET;
01554       lock.l_start = SHARED_FIRST;
01555       lock.l_len = SHARED_SIZE;
01556       if( fcntl(h, F_SETLK, &lock)==(-1) ){
01557         int tErrno = errno;
01558         rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
01559         if( IS_LOCK_ERROR(rc) ){
01560           pFile->lastErrno = tErrno;
01561         }
01562         goto end_unlock;
01563       }
01564     }
01565     lock.l_type = F_UNLCK;
01566     lock.l_whence = SEEK_SET;
01567     lock.l_start = PENDING_BYTE;
01568     lock.l_len = 2L;  assert( PENDING_BYTE+1==RESERVED_BYTE );
01569     if( fcntl(h, F_SETLK, &lock)!=(-1) ){
01570       pLock->locktype = SHARED_LOCK;
01571     }else{
01572       int tErrno = errno;
01573       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
01574       if( IS_LOCK_ERROR(rc) ){
01575         pFile->lastErrno = tErrno;
01576       }
01577       goto end_unlock;
01578     }
01579   }
01580   if( locktype==NO_LOCK ){
01581     struct openCnt *pOpen;
01582 
01583     /* Decrement the shared lock counter.  Release the lock using an
01584     ** OS call only when all threads in this same process have released
01585     ** the lock.
01586     */
01587     pLock->cnt--;
01588     if( pLock->cnt==0 ){
01589       lock.l_type = F_UNLCK;
01590       lock.l_whence = SEEK_SET;
01591       lock.l_start = lock.l_len = 0L;
01592       SimulateIOErrorBenign(1);
01593       SimulateIOError( h=(-1) )
01594       SimulateIOErrorBenign(0);
01595       if( fcntl(h, F_SETLK, &lock)!=(-1) ){
01596         pLock->locktype = NO_LOCK;
01597       }else{
01598         int tErrno = errno;
01599         rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
01600         if( IS_LOCK_ERROR(rc) ){
01601           pFile->lastErrno = tErrno;
01602         }
01603         pLock->cnt = 1;
01604         goto end_unlock;
01605       }
01606     }
01607 
01608     /* Decrement the count of locks against this same file.  When the
01609     ** count reaches zero, close any other file descriptors whose close
01610     ** was deferred because of outstanding locks.
01611     */
01612     if( rc==SQLITE_OK ){
01613       pOpen = pFile->pOpen;
01614       pOpen->nLock--;
01615       assert( pOpen->nLock>=0 );
01616       if( pOpen->nLock==0 && pOpen->nPending>0 ){
01617         int i;
01618         for(i=0; i<pOpen->nPending; i++){
01619           close(pOpen->aPending[i]);
01620         }
01621         sqlite3_free(pOpen->aPending);
01622         pOpen->nPending = 0;
01623         pOpen->aPending = 0;
01624       }
01625     }
01626   }
01627   
01628 end_unlock:
01629   leaveMutex();
01630   if( rc==SQLITE_OK ) pFile->locktype = locktype;
01631   return rc;
01632 }
01633 
01634 /*
01635 ** This function performs the parts of the "close file" operation 
01636 ** common to all locking schemes. It closes the directory and file
01637 ** handles, if they are valid, and sets all fields of the unixFile
01638 ** structure to 0.
01639 */
01640 static int closeUnixFile(sqlite3_file *id){
01641   unixFile *pFile = (unixFile*)id;
01642   if( pFile ){
01643     if( pFile->dirfd>=0 ){
01644       close(pFile->dirfd);
01645     }
01646     if( pFile->h>=0 ){
01647       close(pFile->h);
01648     }
01649     OSTRACE2("CLOSE   %-3d\n", pFile->h);
01650     OpenCounter(-1);
01651     memset(pFile, 0, sizeof(unixFile));
01652   }
01653   return SQLITE_OK;
01654 }
01655 
01656 /*
01657 ** Close a file.
01658 */
01659 static int unixClose(sqlite3_file *id){
01660   if( id ){
01661     unixFile *pFile = (unixFile *)id;
01662     unixUnlock(id, NO_LOCK);
01663     enterMutex();
01664     if( pFile->pOpen && pFile->pOpen->nLock ){
01665       /* If there are outstanding locks, do not actually close the file just
01666       ** yet because that would clear those locks.  Instead, add the file
01667       ** descriptor to pOpen->aPending.  It will be automatically closed when
01668       ** the last lock is cleared.
01669       */
01670       int *aNew;
01671       struct openCnt *pOpen = pFile->pOpen;
01672       aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
01673       if( aNew==0 ){
01674         /* If a malloc fails, just leak the file descriptor */
01675       }else{
01676         pOpen->aPending = aNew;
01677         pOpen->aPending[pOpen->nPending] = pFile->h;
01678         pOpen->nPending++;
01679         pFile->h = -1;
01680       }
01681     }
01682     releaseLockInfo(pFile->pLock);
01683     releaseOpenCnt(pFile->pOpen);
01684     closeUnixFile(id);
01685     leaveMutex();
01686   }
01687   return SQLITE_OK;
01688 }
01689 
01690 
01691 #if SQLITE_ENABLE_LOCKING_STYLE
01692 #pragma mark AFP Support
01693 
01694 /*
01695  ** The afpLockingContext structure contains all afp lock specific state
01696  */
01697 typedef struct afpLockingContext afpLockingContext;
01698 struct afpLockingContext {
01699   unsigned long long sharedLockByte;
01700   const char *filePath;
01701 };
01702 
01703 struct ByteRangeLockPB2
01704 {
01705   unsigned long long offset;        /* offset to first byte to lock */
01706   unsigned long long length;        /* nbr of bytes to lock */
01707   unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
01708   unsigned char unLockFlag;         /* 1 = unlock, 0 = lock */
01709   unsigned char startEndFlag;       /* 1=rel to end of fork, 0=rel to start */
01710   int fd;                           /* file desc to assoc this lock with */
01711 };
01712 
01713 #define afpfsByteRangeLock2FSCTL        _IOWR('z', 23, struct ByteRangeLockPB2)
01714 
01715 /* 
01716  ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
01717  */
01718 static int _AFPFSSetLock(
01719   const char *path, 
01720   unixFile *pFile, 
01721   unsigned long long offset, 
01722   unsigned long long length, 
01723   int setLockFlag
01724 ){
01725   struct ByteRangeLockPB2       pb;
01726   int                     err;
01727   
01728   pb.unLockFlag = setLockFlag ? 0 : 1;
01729   pb.startEndFlag = 0;
01730   pb.offset = offset;
01731   pb.length = length; 
01732   pb.fd = pFile->h;
01733   OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n", 
01734     (setLockFlag?"ON":"OFF"), pFile->h, offset, length);
01735   err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
01736   if ( err==-1 ) {
01737     int rc;
01738     int tErrno = errno;
01739     OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno));
01740     rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); /* error */
01741     if( IS_LOCK_ERROR(rc) ){
01742       pFile->lastErrno = tErrno;
01743     }
01744     return rc;
01745   } else {
01746     return SQLITE_OK;
01747   }
01748 }
01749 
01750 /* AFP-style reserved lock checking following the behavior of 
01751 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
01752 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
01753   int rc = SQLITE_OK;
01754   int reserved = 0;
01755   unixFile *pFile = (unixFile*)id;
01756   
01757   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
01758   
01759   assert( pFile );
01760   afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
01761   
01762   /* Check if a thread in this process holds such a lock */
01763   if( pFile->locktype>SHARED_LOCK ){
01764     reserved = 1;
01765   }
01766   
01767   /* Otherwise see if some other process holds it.
01768    */
01769   if( !reserved ){
01770     /* lock the RESERVED byte */
01771     int lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1);  
01772     if( SQLITE_OK==lrc ){
01773       /* if we succeeded in taking the reserved lock, unlock it to restore
01774       ** the original state */
01775       lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1, 0);
01776     } else {
01777       /* if we failed to get the lock then someone else must have it */
01778       reserved = 1;
01779     }
01780     if( IS_LOCK_ERROR(lrc) ){
01781       rc=lrc;
01782     }
01783   }
01784   
01785   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
01786   
01787   *pResOut = reserved;
01788   return rc;
01789 }
01790 
01791 /* AFP-style locking following the behavior of unixLock, see the unixLock 
01792 ** function comments for details of lock management. */
01793 static int afpLock(sqlite3_file *id, int locktype){
01794   int rc = SQLITE_OK;
01795   unixFile *pFile = (unixFile*)id;
01796   afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
01797   
01798   assert( pFile );
01799   OSTRACE5("LOCK    %d %s was %s pid=%d\n", pFile->h,
01800          locktypeName(locktype), locktypeName(pFile->locktype), getpid());
01801 
01802   /* If there is already a lock of this type or more restrictive on the
01803   ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
01804   ** enterMutex() hasn't been called yet.
01805   */
01806   if( pFile->locktype>=locktype ){
01807     OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
01808            locktypeName(locktype));
01809     return SQLITE_OK;
01810   }
01811 
01812   /* Make sure the locking sequence is correct
01813   */
01814   assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
01815   assert( locktype!=PENDING_LOCK );
01816   assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
01817   
01818   /* This mutex is needed because pFile->pLock is shared across threads
01819   */
01820   enterMutex();
01821 
01822   /* Make sure the current thread owns the pFile.
01823   */
01824   rc = transferOwnership(pFile);
01825   if( rc!=SQLITE_OK ){
01826     leaveMutex();
01827     return rc;
01828   }
01829     
01830   /* A PENDING lock is needed before acquiring a SHARED lock and before
01831   ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
01832   ** be released.
01833   */
01834   if( locktype==SHARED_LOCK 
01835       || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
01836   ){
01837     int failed;
01838     failed = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 1);
01839     if (failed) {
01840       rc = failed;
01841       goto afp_end_lock;
01842     }
01843   }
01844   
01845   /* If control gets to this point, then actually go ahead and make
01846   ** operating system calls for the specified lock.
01847   */
01848   if( locktype==SHARED_LOCK ){
01849     int lk, lrc1, lrc2, lrc1Errno;
01850     
01851     /* Now get the read-lock SHARED_LOCK */
01852     /* note that the quality of the randomness doesn't matter that much */
01853     lk = random(); 
01854     context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
01855     lrc1 = _AFPFSSetLock(context->filePath, pFile, 
01856           SHARED_FIRST+context->sharedLockByte, 1, 1);
01857     if( IS_LOCK_ERROR(lrc1) ){
01858       lrc1Errno = pFile->lastErrno;
01859     }
01860     /* Drop the temporary PENDING lock */
01861     lrc2 = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 0);
01862     
01863     if( IS_LOCK_ERROR(lrc1) ) {
01864       pFile->lastErrno = lrc1Errno;
01865       rc = lrc1;
01866       goto afp_end_lock;
01867     } else if( IS_LOCK_ERROR(lrc2) ){
01868       rc = lrc2;
01869       goto afp_end_lock;
01870     } else if( lrc1 != SQLITE_OK ) {
01871       rc = lrc1;
01872     } else {
01873       pFile->locktype = SHARED_LOCK;
01874     }
01875   }else{
01876     /* The request was for a RESERVED or EXCLUSIVE lock.  It is
01877     ** assumed that there is a SHARED or greater lock on the file
01878     ** already.
01879     */
01880     int failed = 0;
01881     assert( 0!=pFile->locktype );
01882     if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
01883         /* Acquire a RESERVED lock */
01884         failed = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1);
01885     }
01886     if (!failed && locktype == EXCLUSIVE_LOCK) {
01887       /* Acquire an EXCLUSIVE lock */
01888         
01889       /* Remove the shared lock before trying the range.  we'll need to 
01890       ** reestablish the shared lock if we can't get the  afpUnlock
01891       */
01892       if (!(failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST +
01893                          context->sharedLockByte, 1, 0))) {
01894         /* now attemmpt to get the exclusive lock range */
01895         failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST, 
01896                                SHARED_SIZE, 1);
01897         if (failed && (failed = _AFPFSSetLock(context->filePath, pFile, 
01898                        SHARED_FIRST + context->sharedLockByte, 1, 1))) {
01899           rc = failed;
01900         }
01901       } else {
01902         rc = failed; 
01903       }
01904     }
01905     if( failed ){
01906       rc = failed;
01907     }
01908   }
01909   
01910   if( rc==SQLITE_OK ){
01911     pFile->locktype = locktype;
01912   }else if( locktype==EXCLUSIVE_LOCK ){
01913     pFile->locktype = PENDING_LOCK;
01914   }
01915   
01916 afp_end_lock:
01917   leaveMutex();
01918   OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
01919          rc==SQLITE_OK ? "ok" : "failed");
01920   return rc;
01921 }
01922 
01923 /*
01924 ** Lower the locking level on file descriptor pFile to locktype.  locktype
01925 ** must be either NO_LOCK or SHARED_LOCK.
01926 **
01927 ** If the locking level of the file descriptor is already at or below
01928 ** the requested locking level, this routine is a no-op.
01929 */
01930 static int afpUnlock(sqlite3_file *id, int locktype) {
01931   int rc = SQLITE_OK;
01932   unixFile *pFile = (unixFile*)id;
01933   afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
01934 
01935   assert( pFile );
01936   OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
01937          pFile->locktype, getpid());
01938 
01939   assert( locktype<=SHARED_LOCK );
01940   if( pFile->locktype<=locktype ){
01941     return SQLITE_OK;
01942   }
01943   if( CHECK_THREADID(pFile) ){
01944     return SQLITE_MISUSE;
01945   }
01946   enterMutex();
01947   int failed = SQLITE_OK;
01948   if( pFile->locktype>SHARED_LOCK ){
01949     if( locktype==SHARED_LOCK ){
01950 
01951       /* unlock the exclusive range - then re-establish the shared lock */
01952       if (pFile->locktype==EXCLUSIVE_LOCK) {
01953         failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST, 
01954                                  SHARED_SIZE, 0);
01955         if (!failed) {
01956           /* successfully removed the exclusive lock */
01957           if ((failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST+
01958                             context->sharedLockByte, 1, 1))) {
01959             /* failed to re-establish our shared lock */
01960             rc = failed;
01961           }
01962         } else {
01963           rc = failed;
01964         } 
01965       }
01966     }
01967     if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
01968       if ((failed = _AFPFSSetLock(context->filePath, pFile, 
01969                                   PENDING_BYTE, 1, 0))){
01970         /* failed to release the pending lock */
01971         rc = failed; 
01972       }
01973     } 
01974     if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
01975       if ((failed = _AFPFSSetLock(context->filePath, pFile, 
01976                                   RESERVED_BYTE, 1, 0))) {
01977         /* failed to release the reserved lock */
01978         rc = failed;  
01979       }
01980     } 
01981   }
01982   if( locktype==NO_LOCK ){
01983     int failed = _AFPFSSetLock(context->filePath, pFile, 
01984                                SHARED_FIRST + context->sharedLockByte, 1, 0);
01985     if (failed) {
01986       rc = failed;  
01987     }
01988   }
01989   if (rc == SQLITE_OK)
01990     pFile->locktype = locktype;
01991   leaveMutex();
01992   return rc;
01993 }
01994 
01995 /*
01996 ** Close a file & cleanup AFP specific locking context 
01997 */
01998 static int afpClose(sqlite3_file *id) {
01999   if( id ){
02000     unixFile *pFile = (unixFile*)id;
02001     afpUnlock(id, NO_LOCK);
02002     sqlite3_free(pFile->lockingContext);
02003   }
02004   return closeUnixFile(id);
02005 }
02006 
02007 
02008 #pragma mark flock() style locking
02009 
02010 /*
02011 ** The flockLockingContext is not used
02012 */
02013 typedef void flockLockingContext;
02014 
02015 /* flock-style reserved lock checking following the behavior of 
02016  ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
02017 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
02018   int rc = SQLITE_OK;
02019   int reserved = 0;
02020   unixFile *pFile = (unixFile*)id;
02021   
02022   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
02023   
02024   assert( pFile );
02025   
02026   /* Check if a thread in this process holds such a lock */
02027   if( pFile->locktype>SHARED_LOCK ){
02028     reserved = 1;
02029   }
02030   
02031   /* Otherwise see if some other process holds it. */
02032   if( !reserved ){
02033     /* attempt to get the lock */
02034     int lrc = flock(pFile->h, LOCK_EX | LOCK_NB);
02035     if( !lrc ){
02036       /* got the lock, unlock it */
02037       lrc = flock(pFile->h, LOCK_UN);
02038       if ( lrc ) {
02039         int tErrno = errno;
02040         /* unlock failed with an error */
02041         lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 
02042         if( IS_LOCK_ERROR(lrc) ){
02043           pFile->lastErrno = tErrno;
02044           rc = lrc;
02045         }
02046       }
02047     } else {
02048       int tErrno = errno;
02049       reserved = 1;
02050       /* someone else might have it reserved */
02051       lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 
02052       if( IS_LOCK_ERROR(lrc) ){
02053         pFile->lastErrno = tErrno;
02054         rc = lrc;
02055       }
02056     }
02057   }
02058   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
02059 
02060   *pResOut = reserved;
02061   return rc;
02062 }
02063 
02064 static int flockLock(sqlite3_file *id, int locktype) {
02065   int rc = SQLITE_OK;
02066   unixFile *pFile = (unixFile*)id;
02067 
02068   assert( pFile );
02069 
02070   /* if we already have a lock, it is exclusive.  
02071   ** Just adjust level and punt on outta here. */
02072   if (pFile->locktype > NO_LOCK) {
02073     pFile->locktype = locktype;
02074     return SQLITE_OK;
02075   }
02076   
02077   /* grab an exclusive lock */
02078   
02079   if (flock(pFile->h, LOCK_EX | LOCK_NB)) {
02080     int tErrno = errno;
02081     /* didn't get, must be busy */
02082     rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
02083     if( IS_LOCK_ERROR(rc) ){
02084       pFile->lastErrno = tErrno;
02085     }
02086   } else {
02087     /* got it, set the type and return ok */
02088     pFile->locktype = locktype;
02089   }
02090   OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
02091            rc==SQLITE_OK ? "ok" : "failed");
02092   return rc;
02093 }
02094 
02095 static int flockUnlock(sqlite3_file *id, int locktype) {
02096   unixFile *pFile = (unixFile*)id;
02097   
02098   assert( pFile );
02099   OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
02100            pFile->locktype, getpid());
02101   assert( locktype<=SHARED_LOCK );
02102   
02103   /* no-op if possible */
02104   if( pFile->locktype==locktype ){
02105     return SQLITE_OK;
02106   }
02107   
02108   /* shared can just be set because we always have an exclusive */
02109   if (locktype==SHARED_LOCK) {
02110     pFile->locktype = locktype;
02111     return SQLITE_OK;
02112   }
02113   
02114   /* no, really, unlock. */
02115   int rc = flock(pFile->h, LOCK_UN);
02116   if (rc) {
02117     int r, tErrno = errno;
02118     r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
02119     if( IS_LOCK_ERROR(r) ){
02120       pFile->lastErrno = tErrno;
02121     }
02122     return r;
02123   } else {
02124     pFile->locktype = NO_LOCK;
02125     return SQLITE_OK;
02126   }
02127 }
02128 
02129 /*
02130 ** Close a file.
02131 */
02132 static int flockClose(sqlite3_file *id) {
02133   if( id ){
02134     flockUnlock(id, NO_LOCK);
02135   }
02136   return closeUnixFile(id);
02137 }
02138 
02139 #pragma mark Old-School .lock file based locking
02140 
02141 /* Dotlock-style reserved lock checking following the behavior of 
02142 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
02143 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
02144   int rc = SQLITE_OK;
02145   int reserved = 0;
02146   unixFile *pFile = (unixFile*)id;
02147 
02148   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
02149   
02150   assert( pFile );
02151 
02152   /* Check if a thread in this process holds such a lock */
02153   if( pFile->locktype>SHARED_LOCK ){
02154     reserved = 1;
02155   }
02156   
02157   /* Otherwise see if some other process holds it. */
02158   if( !reserved ){
02159     char *zLockFile = (char *)pFile->lockingContext;
02160     struct stat statBuf;
02161     
02162     if( lstat(zLockFile, &statBuf)==0 ){
02163       /* file exists, someone else has the lock */
02164       reserved = 1;
02165     }else{
02166       /* file does not exist, we could have it if we want it */
02167       int tErrno = errno;
02168       if( ENOENT != tErrno ){
02169         rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
02170         pFile->lastErrno = tErrno;
02171       }
02172     }
02173   }
02174   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
02175 
02176   *pResOut = reserved;
02177   return rc;
02178 }
02179 
02180 static int dotlockLock(sqlite3_file *id, int locktype) {
02181   unixFile *pFile = (unixFile*)id;
02182   int fd;
02183   char *zLockFile = (char *)pFile->lockingContext;
02184   int rc=SQLITE_OK;
02185 
02186   /* if we already have a lock, it is exclusive.  
02187   ** Just adjust level and punt on outta here. */
02188   if (pFile->locktype > NO_LOCK) {
02189     pFile->locktype = locktype;
02190     
02191     /* Always update the timestamp on the old file */
02192     utimes(zLockFile, NULL);
02193     rc = SQLITE_OK;
02194     goto dotlock_end_lock;
02195   }
02196   
02197   /* check to see if lock file already exists */
02198   struct stat statBuf;
02199   if (lstat(zLockFile,&statBuf) == 0){
02200     rc = SQLITE_BUSY; /* it does, busy */
02201     goto dotlock_end_lock;
02202   }
02203   
02204   /* grab an exclusive lock */
02205   fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
02206   if( fd<0 ){
02207     /* failed to open/create the file, someone else may have stolen the lock */
02208     int tErrno = errno;
02209     if( EEXIST == tErrno ){
02210       rc = SQLITE_BUSY;
02211     } else {
02212       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
02213       if( IS_LOCK_ERROR(rc) ){
02214   pFile->lastErrno = tErrno;
02215       }
02216     }
02217     goto dotlock_end_lock;
02218   } 
02219   close(fd);
02220   
02221   /* got it, set the type and return ok */
02222   pFile->locktype = locktype;
02223 
02224  dotlock_end_lock:
02225   return rc;
02226 }
02227 
02228 static int dotlockUnlock(sqlite3_file *id, int locktype) {
02229   unixFile *pFile = (unixFile*)id;
02230   char *zLockFile = (char *)pFile->lockingContext;
02231 
02232   assert( pFile );
02233   OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
02234      pFile->locktype, getpid());
02235   assert( locktype<=SHARED_LOCK );
02236   
02237   /* no-op if possible */
02238   if( pFile->locktype==locktype ){
02239     return SQLITE_OK;
02240   }
02241   
02242   /* shared can just be set because we always have an exclusive */
02243   if (locktype==SHARED_LOCK) {
02244     pFile->locktype = locktype;
02245     return SQLITE_OK;
02246   }
02247   
02248   /* no, really, unlock. */
02249   if (unlink(zLockFile) ) {
02250     int rc, tErrno = errno;
02251     if( ENOENT != tErrno ){
02252       rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
02253     }
02254     if( IS_LOCK_ERROR(rc) ){
02255       pFile->lastErrno = tErrno;
02256     }
02257     return rc; 
02258   }
02259   pFile->locktype = NO_LOCK;
02260   return SQLITE_OK;
02261 }
02262 
02263 /*
02264  ** Close a file.
02265  */
02266 static int dotlockClose(sqlite3_file *id) {
02267   if( id ){
02268     unixFile *pFile = (unixFile*)id;
02269     dotlockUnlock(id, NO_LOCK);
02270     sqlite3_free(pFile->lockingContext);
02271   }
02272   return closeUnixFile(id);
02273 }
02274 
02275 
02276 #endif /* SQLITE_ENABLE_LOCKING_STYLE */
02277 
02278 /*
02279 ** The nolockLockingContext is void
02280 */
02281 typedef void nolockLockingContext;
02282 
02283 static int nolockCheckReservedLock(sqlite3_file *id, int *pResOut) {
02284   *pResOut = 0;
02285   return SQLITE_OK;
02286 }
02287 
02288 static int nolockLock(sqlite3_file *id, int locktype) {
02289   return SQLITE_OK;
02290 }
02291 
02292 static int nolockUnlock(sqlite3_file *id, int locktype) {
02293   return SQLITE_OK;
02294 }
02295 
02296 /*
02297 ** Close a file.
02298 */
02299 static int nolockClose(sqlite3_file *id) {
02300   return closeUnixFile(id);
02301 }
02302 
02303 
02304 /*
02305 ** Information and control of an open file handle.
02306 */
02307 static int unixFileControl(sqlite3_file *id, int op, void *pArg){
02308   switch( op ){
02309     case SQLITE_FCNTL_LOCKSTATE: {
02310       *(int*)pArg = ((unixFile*)id)->locktype;
02311       return SQLITE_OK;
02312     }
02313   }
02314   return SQLITE_ERROR;
02315 }
02316 
02317 /*
02318 ** Return the sector size in bytes of the underlying block device for
02319 ** the specified file. This is almost always 512 bytes, but may be
02320 ** larger for some devices.
02321 **
02322 ** SQLite code assumes this function cannot fail. It also assumes that
02323 ** if two files are created in the same file-system directory (i.e.
02324 ** a database and its journal file) that the sector size will be the
02325 ** same for both.
02326 */
02327 static int unixSectorSize(sqlite3_file *id){
02328   return SQLITE_DEFAULT_SECTOR_SIZE;
02329 }
02330 
02331 /*
02332 ** Return the device characteristics for the file. This is always 0.
02333 */
02334 static int unixDeviceCharacteristics(sqlite3_file *id){
02335   return 0;
02336 }
02337 
02338 /*
02339 ** Initialize the contents of the unixFile structure pointed to by pId.
02340 **
02341 ** When locking extensions are enabled, the filepath and locking style 
02342 ** are needed to determine the unixFile pMethod to use for locking operations.
02343 ** The locking-style specific lockingContext data structure is created 
02344 ** and assigned here also.
02345 */
02346 static int fillInUnixFile(
02347   sqlite3_vfs *pVfs,      /* Pointer to vfs object */
02348   int h,                  /* Open file descriptor of file being opened */
02349   int dirfd,              /* Directory file descriptor */
02350   sqlite3_file *pId,      /* Write to the unixFile structure here */
02351   const char *zFilename,  /* Name of the file being opened */
02352   int noLock              /* Omit locking if true */
02353 ){
02354   int eLockingStyle;
02355   unixFile *pNew = (unixFile *)pId;
02356   int rc = SQLITE_OK;
02357 
02358   /* Macro to define the static contents of an sqlite3_io_methods 
02359   ** structure for a unix backend file. Different locking methods
02360   ** require different functions for the xClose, xLock, xUnlock and
02361   ** xCheckReservedLock methods.
02362   */
02363   #define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) {    \
02364     1,                          /* iVersion */                           \
02365     xClose,                     /* xClose */                             \
02366     unixRead,                   /* xRead */                              \
02367     unixWrite,                  /* xWrite */                             \
02368     unixTruncate,               /* xTruncate */                          \
02369     unixSync,                   /* xSync */                              \
02370     unixFileSize,               /* xFileSize */                          \
02371     xLock,                      /* xLock */                              \
02372     xUnlock,                    /* xUnlock */                            \
02373     xCheckReservedLock,         /* xCheckReservedLock */                 \
02374     unixFileControl,            /* xFileControl */                       \
02375     unixSectorSize,             /* xSectorSize */                        \
02376     unixDeviceCharacteristics   /* xDeviceCapabilities */                \
02377   }
02378   static sqlite3_io_methods aIoMethod[] = {
02379     IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock) 
02380    ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
02381 #if SQLITE_ENABLE_LOCKING_STYLE
02382    ,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock)
02383    ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock)
02384    ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock)
02385 #endif
02386   };
02387   /* The order of the IOMETHODS macros above is important.  It must be the
02388   ** same order as the LOCKING_STYLE numbers
02389   */
02390   assert(LOCKING_STYLE_POSIX==1);
02391   assert(LOCKING_STYLE_NONE==2);
02392   assert(LOCKING_STYLE_DOTFILE==3);
02393   assert(LOCKING_STYLE_FLOCK==4);
02394   assert(LOCKING_STYLE_AFP==5);
02395 
02396   assert( pNew->pLock==NULL );
02397   assert( pNew->pOpen==NULL );
02398 
02399   OSTRACE3("OPEN    %-3d %s\n", h, zFilename);    
02400   pNew->h = h;
02401   pNew->dirfd = dirfd;
02402   SET_THREADID(pNew);
02403 
02404   if( noLock ){
02405     eLockingStyle = LOCKING_STYLE_NONE;
02406   }else{
02407     eLockingStyle = detectLockingStyle(pVfs, zFilename, h);
02408   }
02409 
02410   switch( eLockingStyle ){
02411 
02412     case LOCKING_STYLE_POSIX: {
02413       enterMutex();
02414       rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen);
02415       leaveMutex();
02416       break;
02417     }
02418 
02419 #if SQLITE_ENABLE_LOCKING_STYLE
02420     case LOCKING_STYLE_AFP: {
02421       /* AFP locking uses the file path so it needs to be included in
02422       ** the afpLockingContext.
02423       */
02424       afpLockingContext *pCtx;
02425       pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
02426       if( pCtx==0 ){
02427         rc = SQLITE_NOMEM;
02428       }else{
02429         /* NB: zFilename exists and remains valid until the file is closed
02430         ** according to requirement F11141.  So we do not need to make a
02431         ** copy of the filename. */
02432         pCtx->filePath = zFilename;
02433         srandomdev();
02434       }
02435       break;
02436     }
02437 
02438     case LOCKING_STYLE_DOTFILE: {
02439       /* Dotfile locking uses the file path so it needs to be included in
02440       ** the dotlockLockingContext 
02441       */
02442       char *zLockFile;
02443       int nFilename;
02444       nFilename = strlen(zFilename) + 6;
02445       zLockFile = (char *)sqlite3_malloc(nFilename);
02446       if( zLockFile==0 ){
02447         rc = SQLITE_NOMEM;
02448       }else{
02449         sqlite3_snprintf(nFilename, zLockFile, "%s.lock", zFilename);
02450       }
02451       pNew->lockingContext = zLockFile;
02452       break;
02453     }
02454 
02455     case LOCKING_STYLE_FLOCK: 
02456     case LOCKING_STYLE_NONE: 
02457       break;
02458 #endif
02459   }
02460   
02461   pNew->lastErrno = 0;
02462   if( rc!=SQLITE_OK ){
02463     if( dirfd>=0 ) close(dirfd);
02464     close(h);
02465   }else{
02466     pNew->pMethod = &aIoMethod[eLockingStyle-1];
02467     OpenCounter(+1);
02468   }
02469   return rc;
02470 }
02471 
02472 /*
02473 ** Open a file descriptor to the directory containing file zFilename.
02474 ** If successful, *pFd is set to the opened file descriptor and
02475 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
02476 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
02477 ** value.
02478 **
02479 ** If SQLITE_OK is returned, the caller is responsible for closing
02480 ** the file descriptor *pFd using close().
02481 */
02482 static int openDirectory(const char *zFilename, int *pFd){
02483   int ii;
02484   int fd = -1;
02485   char zDirname[MAX_PATHNAME+1];
02486 
02487   sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
02488   for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--);
02489   if( ii>0 ){
02490     zDirname[ii] = '\0';
02491     fd = open(zDirname, O_RDONLY|O_BINARY, 0);
02492     if( fd>=0 ){
02493 #ifdef FD_CLOEXEC
02494       fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
02495 #endif
02496       OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname);
02497     }
02498   }
02499   *pFd = fd;
02500   return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN);
02501 }
02502 
02503 /*
02504 ** Create a temporary file name in zBuf.  zBuf must be allocated
02505 ** by the calling process and must be big enough to hold at least
02506 ** pVfs->mxPathname bytes.
02507 */
02508 static int getTempname(int nBuf, char *zBuf){
02509   static const char *azDirs[] = {
02510      0,
02511      "/var/tmp",
02512      "/usr/tmp",
02513      "/tmp",
02514      ".",
02515   };
02516   static const unsigned char zChars[] =
02517     "abcdefghijklmnopqrstuvwxyz"
02518     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
02519     "0123456789";
02520   int i, j;
02521   struct stat buf;
02522   const char *zDir = ".";
02523 
02524   /* It's odd to simulate an io-error here, but really this is just
02525   ** using the io-error infrastructure to test that SQLite handles this
02526   ** function failing. 
02527   */
02528   SimulateIOError( return SQLITE_IOERR );
02529 
02530   azDirs[0] = sqlite3_temp_directory;
02531   for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
02532     if( azDirs[i]==0 ) continue;
02533     if( stat(azDirs[i], &buf) ) continue;
02534     if( !S_ISDIR(buf.st_mode) ) continue;
02535     if( access(azDirs[i], 07) ) continue;
02536     zDir = azDirs[i];
02537     break;
02538   }
02539 
02540   /* Check that the output buffer is large enough for the temporary file 
02541   ** name. If it is not, return SQLITE_ERROR.
02542   */
02543   if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= nBuf ){
02544     return SQLITE_ERROR;
02545   }
02546 
02547   do{
02548     sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
02549     j = strlen(zBuf);
02550     sqlite3_randomness(15, &zBuf[j]);
02551     for(i=0; i<15; i++, j++){
02552       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
02553     }
02554     zBuf[j] = 0;
02555   }while( access(zBuf,0)==0 );
02556   return SQLITE_OK;
02557 }
02558 
02559 
02560 /*
02561 ** Open the file zPath.
02562 ** 
02563 ** Previously, the SQLite OS layer used three functions in place of this
02564 ** one:
02565 **
02566 **     sqlite3OsOpenReadWrite();
02567 **     sqlite3OsOpenReadOnly();
02568 **     sqlite3OsOpenExclusive();
02569 **
02570 ** These calls correspond to the following combinations of flags:
02571 **
02572 **     ReadWrite() ->     (READWRITE | CREATE)
02573 **     ReadOnly()  ->     (READONLY) 
02574 **     OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
02575 **
02576 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If
02577 ** true, the file was configured to be automatically deleted when the
02578 ** file handle closed. To achieve the same effect using this new 
02579 ** interface, add the DELETEONCLOSE flag to those specified above for 
02580 ** OpenExclusive().
02581 */
02582 static int unixOpen(
02583   sqlite3_vfs *pVfs, 
02584   const char *zPath, 
02585   sqlite3_file *pFile,
02586   int flags,
02587   int *pOutFlags
02588 ){
02589   int fd = 0;                    /* File descriptor returned by open() */
02590   int dirfd = -1;                /* Directory file descriptor */
02591   int oflags = 0;                /* Flags to pass to open() */
02592   int eType = flags&0xFFFFFF00;  /* Type of file to open */
02593   int noLock;                    /* True to omit locking primitives */
02594 
02595   int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
02596   int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
02597   int isCreate     = (flags & SQLITE_OPEN_CREATE);
02598   int isReadonly   = (flags & SQLITE_OPEN_READONLY);
02599   int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);
02600 
02601   /* If creating a master or main-file journal, this function will open
02602   ** a file-descriptor on the directory too. The first time unixSync()
02603   ** is called the directory file descriptor will be fsync()ed and close()d.
02604   */
02605   int isOpenDirectory = (isCreate && 
02606       (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL)
02607   );
02608 
02609   /* If argument zPath is a NULL pointer, this function is required to open
02610   ** a temporary file. Use this buffer to store the file name in.
02611   */
02612   char zTmpname[MAX_PATHNAME+1];
02613   const char *zName = zPath;
02614 
02615   /* Check the following statements are true: 
02616   **
02617   **   (a) Exactly one of the READWRITE and READONLY flags must be set, and 
02618   **   (b) if CREATE is set, then READWRITE must also be set, and
02619   **   (c) if EXCLUSIVE is set, then CREATE must also be set.
02620   **   (d) if DELETEONCLOSE is set, then CREATE must also be set.
02621   */
02622   assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
02623   assert(isCreate==0 || isReadWrite);
02624   assert(isExclusive==0 || isCreate);
02625   assert(isDelete==0 || isCreate);
02626 
02627   /* The main DB, main journal, and master journal are never automatically
02628   ** deleted
02629   */
02630   assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
02631   assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
02632   assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
02633 
02634   /* Assert that the upper layer has set one of the "file-type" flags. */
02635   assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB 
02636        || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 
02637        || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
02638        || eType==SQLITE_OPEN_TRANSIENT_DB
02639   );
02640 
02641   memset(pFile, 0, sizeof(unixFile));
02642 
02643   if( !zName ){
02644     int rc;
02645     assert(isDelete && !isOpenDirectory);
02646     rc = getTempname(MAX_PATHNAME+1, zTmpname);
02647     if( rc!=SQLITE_OK ){
02648       return rc;
02649     }
02650     zName = zTmpname;
02651   }
02652 
02653   if( isReadonly )  oflags |= O_RDONLY;
02654   if( isReadWrite ) oflags |= O_RDWR;
02655   if( isCreate )    oflags |= O_CREAT;
02656   if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
02657   oflags |= (O_LARGEFILE|O_BINARY);
02658 
02659   fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
02660   if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
02661     /* Failed to open the file for read/write access. Try read-only. */
02662     flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
02663     flags |= SQLITE_OPEN_READONLY;
02664     return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
02665   }
02666   if( fd<0 ){
02667     return SQLITE_CANTOPEN;
02668   }
02669   if( isDelete ){
02670     unlink(zName);
02671   }
02672   if( pOutFlags ){
02673     *pOutFlags = flags;
02674   }
02675 
02676   assert(fd!=0);
02677   if( isOpenDirectory ){
02678     int rc = openDirectory(zPath, &dirfd);
02679     if( rc!=SQLITE_OK ){
02680       close(fd);
02681       return rc;
02682     }
02683   }
02684 
02685 #ifdef FD_CLOEXEC
02686   fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
02687 #endif
02688 
02689   noLock = eType!=SQLITE_OPEN_MAIN_DB;
02690   return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock);
02691 }
02692 
02693 /*
02694 ** Delete the file at zPath. If the dirSync argument is true, fsync()
02695 ** the directory after deleting the file.
02696 */
02697 static int unixDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
02698   int rc = SQLITE_OK;
02699   SimulateIOError(return SQLITE_IOERR_DELETE);
02700   unlink(zPath);
02701 #ifndef SQLITE_DISABLE_DIRSYNC
02702   if( dirSync ){
02703     int fd;
02704     rc = openDirectory(zPath, &fd);
02705     if( rc==SQLITE_OK ){
02706       if( fsync(fd) ){
02707         rc = SQLITE_IOERR_DIR_FSYNC;
02708       }
02709       close(fd);
02710     }
02711   }
02712 #endif
02713   return rc;
02714 }
02715 
02716 /*
02717 ** Test the existance of or access permissions of file zPath. The
02718 ** test performed depends on the value of flags:
02719 **
02720 **     SQLITE_ACCESS_EXISTS: Return 1 if the file exists
02721 **     SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
02722 **     SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
02723 **
02724 ** Otherwise return 0.
02725 */
02726 static int unixAccess(
02727   sqlite3_vfs *pVfs, 
02728   const char *zPath, 
02729   int flags, 
02730   int *pResOut
02731 ){
02732   int amode = 0;
02733   SimulateIOError( return SQLITE_IOERR_ACCESS; );
02734   switch( flags ){
02735     case SQLITE_ACCESS_EXISTS:
02736       amode = F_OK;
02737       break;
02738     case SQLITE_ACCESS_READWRITE:
02739       amode = W_OK|R_OK;
02740       break;
02741     case SQLITE_ACCESS_READ:
02742       amode = R_OK;
02743       break;
02744 
02745     default:
02746       assert(!"Invalid flags argument");
02747   }
02748   *pResOut = (access(zPath, amode)==0);
02749   return SQLITE_OK;
02750 }
02751 
02752 
02753 /*
02754 ** Turn a relative pathname into a full pathname. The relative path
02755 ** is stored as a nul-terminated string in the buffer pointed to by
02756 ** zPath. 
02757 **
02758 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 
02759 ** (in this case, MAX_PATHNAME bytes). The full-path is written to
02760 ** this buffer before returning.
02761 */
02762 static int unixFullPathname(
02763   sqlite3_vfs *pVfs,            /* Pointer to vfs object */
02764   const char *zPath,            /* Possibly relative input path */
02765   int nOut,                     /* Size of output buffer in bytes */
02766   char *zOut                    /* Output buffer */
02767 ){
02768 
02769   /* It's odd to simulate an io-error here, but really this is just
02770   ** using the io-error infrastructure to test that SQLite handles this
02771   ** function failing. This function could fail if, for example, the
02772   ** current working directly has been unlinked.
02773   */
02774   SimulateIOError( return SQLITE_ERROR );
02775 
02776   assert( pVfs->mxPathname==MAX_PATHNAME );
02777   zOut[nOut-1] = '\0';
02778   if( zPath[0]=='/' ){
02779     sqlite3_snprintf(nOut, zOut, "%s", zPath);
02780   }else{
02781     int nCwd;
02782     if( getcwd(zOut, nOut-1)==0 ){
02783       return SQLITE_CANTOPEN;
02784     }
02785     nCwd = strlen(zOut);
02786     sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
02787   }
02788   return SQLITE_OK;
02789 
02790 #if 0
02791   /*
02792   ** Remove "/./" path elements and convert "/A/./" path elements
02793   ** to just "/".
02794   */
02795   if( zFull ){
02796     int i, j;
02797     for(i=j=0; zFull[i]; i++){
02798       if( zFull[i]=='/' ){
02799         if( zFull[i+1]=='/' ) continue;
02800         if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
02801           i += 1;
02802           continue;
02803         }
02804         if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
02805           while( j>0 && zFull[j-1]!='/' ){ j--; }
02806           i += 3;
02807           continue;
02808         }
02809       }
02810       zFull[j++] = zFull[i];
02811     }
02812     zFull[j] = 0;
02813   }
02814 #endif
02815 }
02816 
02817 
02818 #ifndef SQLITE_OMIT_LOAD_EXTENSION
02819 /*
02820 ** Interfaces for opening a shared library, finding entry points
02821 ** within the shared library, and closing the shared library.
02822 */
02823 #include <dlfcn.h>
02824 static void *unixDlOpen(sqlite3_vfs *pVfs, const char *zFilename){
02825   return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
02826 }
02827 
02828 /*
02829 ** SQLite calls this function immediately after a call to unixDlSym() or
02830 ** unixDlOpen() fails (returns a null pointer). If a more detailed error
02831 ** message is available, it is written to zBufOut. If no error message
02832 ** is available, zBufOut is left unmodified and SQLite uses a default
02833 ** error message.
02834 */
02835 static void unixDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
02836   char *zErr;
02837   enterMutex();
02838   zErr = dlerror();
02839   if( zErr ){
02840     sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
02841   }
02842   leaveMutex();
02843 }
02844 static void *unixDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol){
02845   return dlsym(pHandle, zSymbol);
02846 }
02847 static void unixDlClose(sqlite3_vfs *pVfs, void *pHandle){
02848   dlclose(pHandle);
02849 }
02850 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
02851   #define unixDlOpen  0
02852   #define unixDlError 0
02853   #define unixDlSym   0
02854   #define unixDlClose 0
02855 #endif
02856 
02857 /*
02858 ** Write nBuf bytes of random data to the supplied buffer zBuf.
02859 */
02860 static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
02861 
02862   assert(nBuf>=(sizeof(time_t)+sizeof(int)));
02863 
02864   /* We have to initialize zBuf to prevent valgrind from reporting
02865   ** errors.  The reports issued by valgrind are incorrect - we would
02866   ** prefer that the randomness be increased by making use of the
02867   ** uninitialized space in zBuf - but valgrind errors tend to worry
02868   ** some users.  Rather than argue, it seems easier just to initialize
02869   ** the whole array and silence valgrind, even if that means less randomness
02870   ** in the random seed.
02871   **
02872   ** When testing, initializing zBuf[] to zero is all we do.  That means
02873   ** that we always use the same random number sequence.  This makes the
02874   ** tests repeatable.
02875   */
02876   memset(zBuf, 0, nBuf);
02877 #if !defined(SQLITE_TEST)
02878   {
02879     int pid, fd;
02880     fd = open("/dev/urandom", O_RDONLY);
02881     if( fd<0 ){
02882       time_t t;
02883       time(&t);
02884       memcpy(zBuf, &t, sizeof(t));
02885       pid = getpid();
02886       memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
02887       assert( sizeof(t)+sizeof(pid)<=nBuf );
02888       nBuf = sizeof(t) + sizeof(pid);
02889     }else{
02890       nBuf = read(fd, zBuf, nBuf);
02891       close(fd);
02892     }
02893   }
02894 #endif
02895   return nBuf;
02896 }
02897 
02898 
02899 /*
02900 ** Sleep for a little while.  Return the amount of time slept.
02901 ** The argument is the number of microseconds we want to sleep.
02902 ** The return value is the number of microseconds of sleep actually
02903 ** requested from the underlying operating system, a number which
02904 ** might be greater than or equal to the argument, but not less
02905 ** than the argument.
02906 */
02907 static int unixSleep(sqlite3_vfs *pVfs, int microseconds){
02908 #if defined(HAVE_USLEEP) && HAVE_USLEEP
02909   usleep(microseconds);
02910   return microseconds;
02911 #else
02912   int seconds = (microseconds+999999)/1000000;
02913   sleep(seconds);
02914   return seconds*1000000;
02915 #endif
02916 }
02917 
02918 /*
02919 ** The following variable, if set to a non-zero value, becomes the result
02920 ** returned from sqlite3OsCurrentTime().  This is used for testing.
02921 */
02922 #ifdef SQLITE_TEST
02923 int sqlite3_current_time = 0;
02924 #endif
02925 
02926 /*
02927 ** Find the current time (in Universal Coordinated Time).  Write the
02928 ** current time and date as a Julian Day number into *prNow and
02929 ** return 0.  Return 1 if the time and date cannot be found.
02930 */
02931 static int unixCurrentTime(sqlite3_vfs *pVfs, double *prNow){
02932 #ifdef NO_GETTOD
02933   time_t t;
02934   time(&t);
02935   *prNow = t/86400.0 + 2440587.5;
02936 #else
02937   struct timeval sNow;
02938   gettimeofday(&sNow, 0);
02939   *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
02940 #endif
02941 #ifdef SQLITE_TEST
02942   if( sqlite3_current_time ){
02943     *prNow = sqlite3_current_time/86400.0 + 2440587.5;
02944   }
02945 #endif
02946   return 0;
02947 }
02948 
02949 static int unixGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
02950   return 0;
02951 }
02952 
02953 /*
02954 ** Initialize the operating system interface.
02955 */
02956 int sqlite3_os_init(void){ 
02957   /* Macro to define the static contents of an sqlite3_vfs structure for
02958   ** the unix backend. The two parameters are the values to use for
02959   ** the sqlite3_vfs.zName and sqlite3_vfs.pAppData fields, respectively.
02960   ** 
02961   */
02962   #define UNIXVFS(zVfsName, pVfsAppData) {                  \
02963     1,                    /* iVersion */                    \
02964     sizeof(unixFile),     /* szOsFile */                    \
02965     MAX_PATHNAME,         /* mxPathname */                  \
02966     0,                    /* pNext */                       \
02967     zVfsName,             /* zName */                       \
02968     (void *)pVfsAppData,  /* pAppData */                    \
02969     unixOpen,             /* xOpen */                       \
02970     unixDelete,           /* xDelete */                     \
02971     unixAccess,           /* xAccess */                     \
02972     unixFullPathname,     /* xFullPathname */               \
02973     unixDlOpen,           /* xDlOpen */                     \
02974     unixDlError,          /* xDlError */                    \
02975     unixDlSym,            /* xDlSym */                      \
02976     unixDlClose,          /* xDlClose */                    \
02977     unixRandomness,       /* xRandomness */                 \
02978     unixSleep,            /* xSleep */                      \
02979     unixCurrentTime,      /* xCurrentTime */                \
02980     unixGetLastError      /* xGetLastError */               \
02981   }
02982 
02983   static sqlite3_vfs unixVfs = UNIXVFS("unix", 0);
02984 #if SQLITE_ENABLE_LOCKING_STYLE
02985   int i;
02986   static sqlite3_vfs aVfs[] = {
02987     UNIXVFS("unix-posix",   LOCKING_STYLE_POSIX), 
02988     UNIXVFS("unix-afp",     LOCKING_STYLE_AFP), 
02989     UNIXVFS("unix-flock",   LOCKING_STYLE_FLOCK), 
02990     UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE), 
02991     UNIXVFS("unix-none",    LOCKING_STYLE_NONE)
02992   };
02993   for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
02994     sqlite3_vfs_register(&aVfs[i], 0);
02995   }
02996 #endif
02997   sqlite3_vfs_register(&unixVfs, 1);
02998   return SQLITE_OK; 
02999 }
03000 
03001 /*
03002 ** Shutdown the operating system interface. This is a no-op for unix.
03003 */
03004 int sqlite3_os_end(void){ 
03005   return SQLITE_OK; 
03006 }
03007  
03008 #endif /* SQLITE_OS_UNIX */

ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:55 2011 by Doxygen 1.6.1