pager.c

Go to the documentation of this file.
00001 /*
00002 ** 2001 September 15
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 *************************************************************************
00012 ** This is the implementation of the page cache subsystem or "pager".
00013 ** 
00014 ** The pager is used to access a database disk file.  It implements
00015 ** atomic commit and rollback through the use of a journal file that
00016 ** is separate from the database file.  The pager also implements file
00017 ** locking to prevent two processes from writing the same database
00018 ** file simultaneously, or one process from reading the database while
00019 ** another is writing.
00020 **
00021 ** @(#) $Id: pager.c,v 1.502 2008/11/07 00:24:54 drh Exp $
00022 */
00023 #ifndef SQLITE_OMIT_DISKIO
00024 #include "sqliteInt.h"
00025 
00026 /*
00027 ** Macros for troubleshooting.  Normally turned off
00028 */
00029 #if 0
00030 #define sqlite3DebugPrintf printf
00031 #define PAGERTRACE1(X)       sqlite3DebugPrintf(X)
00032 #define PAGERTRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
00033 #define PAGERTRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
00034 #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
00035 #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
00036 #else
00037 #define PAGERTRACE1(X)
00038 #define PAGERTRACE2(X,Y)
00039 #define PAGERTRACE3(X,Y,Z)
00040 #define PAGERTRACE4(X,Y,Z,W)
00041 #define PAGERTRACE5(X,Y,Z,W,V)
00042 #endif
00043 
00044 /*
00045 ** The following two macros are used within the PAGERTRACEX() macros above
00046 ** to print out file-descriptors. 
00047 **
00048 ** PAGERID() takes a pointer to a Pager struct as its argument. The
00049 ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
00050 ** struct as its argument.
00051 */
00052 #define PAGERID(p) ((int)(p->fd))
00053 #define FILEHANDLEID(fd) ((int)fd)
00054 
00055 /*
00056 ** The page cache as a whole is always in one of the following
00057 ** states:
00058 **
00059 **   PAGER_UNLOCK        The page cache is not currently reading or 
00060 **                       writing the database file.  There is no
00061 **                       data held in memory.  This is the initial
00062 **                       state.
00063 **
00064 **   PAGER_SHARED        The page cache is reading the database.
00065 **                       Writing is not permitted.  There can be
00066 **                       multiple readers accessing the same database
00067 **                       file at the same time.
00068 **
00069 **   PAGER_RESERVED      This process has reserved the database for writing
00070 **                       but has not yet made any changes.  Only one process
00071 **                       at a time can reserve the database.  The original
00072 **                       database file has not been modified so other
00073 **                       processes may still be reading the on-disk
00074 **                       database file.
00075 **
00076 **   PAGER_EXCLUSIVE     The page cache is writing the database.
00077 **                       Access is exclusive.  No other processes or
00078 **                       threads can be reading or writing while one
00079 **                       process is writing.
00080 **
00081 **   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
00082 **                       after all dirty pages have been written to the
00083 **                       database file and the file has been synced to
00084 **                       disk. All that remains to do is to remove or
00085 **                       truncate the journal file and the transaction 
00086 **                       will be committed.
00087 **
00088 ** The page cache comes up in PAGER_UNLOCK.  The first time a
00089 ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
00090 ** After all pages have been released using sqlite_page_unref(),
00091 ** the state transitions back to PAGER_UNLOCK.  The first time
00092 ** that sqlite3PagerWrite() is called, the state transitions to
00093 ** PAGER_RESERVED.  (Note that sqlite3PagerWrite() can only be
00094 ** called on an outstanding page which means that the pager must
00095 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
00096 ** PAGER_RESERVED means that there is an open rollback journal.
00097 ** The transition to PAGER_EXCLUSIVE occurs before any changes
00098 ** are made to the database file, though writes to the rollback
00099 ** journal occurs with just PAGER_RESERVED.  After an sqlite3PagerRollback()
00100 ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
00101 ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
00102 */
00103 #define PAGER_UNLOCK      0
00104 #define PAGER_SHARED      1   /* same as SHARED_LOCK */
00105 #define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
00106 #define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
00107 #define PAGER_SYNCED      5
00108 
00109 /*
00110 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
00111 ** then failed attempts to get a reserved lock will invoke the busy callback.
00112 ** This is off by default.  To see why, consider the following scenario:
00113 ** 
00114 ** Suppose thread A already has a shared lock and wants a reserved lock.
00115 ** Thread B already has a reserved lock and wants an exclusive lock.  If
00116 ** both threads are using their busy callbacks, it might be a long time
00117 ** be for one of the threads give up and allows the other to proceed.
00118 ** But if the thread trying to get the reserved lock gives up quickly
00119 ** (if it never invokes its busy callback) then the contention will be
00120 ** resolved quickly.
00121 */
00122 #ifndef SQLITE_BUSY_RESERVED_LOCK
00123 # define SQLITE_BUSY_RESERVED_LOCK 0
00124 #endif
00125 
00126 /*
00127 ** This macro rounds values up so that if the value is an address it
00128 ** is guaranteed to be an address that is aligned to an 8-byte boundary.
00129 */
00130 #define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
00131 
00132 /*
00133 ** A macro used for invoking the codec if there is one
00134 */
00135 #ifdef SQLITE_HAS_CODEC
00136 # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
00137 # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
00138 #else
00139 # define CODEC1(P,D,N,X) /* NO-OP */
00140 # define CODEC2(P,D,N,X) ((char*)D)
00141 #endif
00142 
00143 /*
00144 ** A open page cache is an instance of the following structure.
00145 **
00146 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
00147 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
00148 ** and is returned as the result of every major pager API call.  The
00149 ** SQLITE_FULL return code is slightly different. It persists only until the
00150 ** next successful rollback is performed on the pager cache. Also,
00151 ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
00152 ** APIs, they may still be used successfully.
00153 */
00154 struct Pager {
00155   sqlite3_vfs *pVfs;          /* OS functions to use for IO */
00156   u8 journalOpen;             /* True if journal file descriptors is valid */
00157   u8 journalStarted;          /* True if header of journal is synced */
00158   u8 useJournal;              /* Use a rollback journal on this file */
00159   u8 noReadlock;              /* Do not bother to obtain readlocks */
00160   u8 stmtOpen;                /* True if the statement subjournal is open */
00161   u8 stmtInUse;               /* True we are in a statement subtransaction */
00162   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
00163   u8 noSync;                  /* Do not sync the journal if true */
00164   u8 fullSync;                /* Do extra syncs of the journal for robustness */
00165   u8 sync_flags;              /* One of SYNC_NORMAL or SYNC_FULL */
00166   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
00167   u8 tempFile;                /* zFilename is a temporary file */
00168   u8 readOnly;                /* True for a read-only database */
00169   u8 needSync;                /* True if an fsync() is needed on the journal */
00170   u8 dirtyCache;              /* True if cached pages have changed */
00171   u8 alwaysRollback;          /* Disable DontRollback() for all pages */
00172   u8 memDb;                   /* True to inhibit all file I/O */
00173   u8 setMaster;               /* True if a m-j name has been written to jrnl */
00174   u8 doNotSync;               /* Boolean. While true, do not spill the cache */
00175   u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
00176   u8 journalMode;             /* On of the PAGER_JOURNALMODE_* values */
00177   u8 dbModified;              /* True if there are any changes to the Db */
00178   u8 changeCountDone;         /* Set after incrementing the change-counter */
00179   u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
00180   int errCode;                /* One of several kinds of errors */
00181   int dbSize;                 /* Number of pages in the file */
00182   int origDbSize;             /* dbSize before the current change */
00183   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
00184   int nRec;                   /* Number of pages written to the journal */
00185   u32 cksumInit;              /* Quasi-random value added to every checksum */
00186   int stmtNRec;               /* Number of records in stmt subjournal */
00187   int nExtra;                 /* Add this many bytes to each in-memory page */
00188   int pageSize;               /* Number of bytes in a page */
00189   int nPage;                  /* Total number of in-memory pages */
00190   int mxPage;                 /* Maximum number of pages to hold in cache */
00191   Pgno mxPgno;                /* Maximum allowed size of the database */
00192   Bitvec *pInJournal;         /* One bit for each page in the database file */
00193   Bitvec *pInStmt;            /* One bit for each page in the database */
00194   Bitvec *pAlwaysRollback;    /* One bit for each page marked always-rollback */
00195   char *zFilename;            /* Name of the database file */
00196   char *zJournal;             /* Name of the journal file */
00197   char *zDirectory;           /* Directory hold database and journal files */
00198   sqlite3_file *fd, *jfd;     /* File descriptors for database and journal */
00199   sqlite3_file *stfd;         /* File descriptor for the statement subjournal*/
00200   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
00201   i64 journalOff;             /* Current byte offset in the journal file */
00202   i64 journalHdr;             /* Byte offset to previous journal header */
00203   i64 stmtHdrOff;             /* First journal header written this statement */
00204   i64 stmtCksum;              /* cksumInit when statement was started */
00205   i64 stmtJSize;              /* Size of journal at stmt_begin() */
00206   u32 sectorSize;             /* Assumed sector size during rollback */
00207 #ifdef SQLITE_TEST
00208   int nHit, nMiss;            /* Cache hits and missing */
00209   int nRead, nWrite;          /* Database pages read/written */
00210 #endif
00211   void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
00212 #ifdef SQLITE_HAS_CODEC
00213   void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
00214   void *pCodecArg;            /* First argument to xCodec() */
00215 #endif
00216   char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
00217   char dbFileVers[16];        /* Changes whenever database file changes */
00218   i64 journalSizeLimit;       /* Size limit for persistent journal files */
00219   PCache *pPCache;            /* Pointer to page cache object */
00220 };
00221 
00222 /*
00223 ** The following global variables hold counters used for
00224 ** testing purposes only.  These variables do not exist in
00225 ** a non-testing build.  These variables are not thread-safe.
00226 */
00227 #ifdef SQLITE_TEST
00228 int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
00229 int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
00230 int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
00231 # define PAGER_INCR(v)  v++
00232 #else
00233 # define PAGER_INCR(v)
00234 #endif
00235 
00236 
00237 
00238 /*
00239 ** Journal files begin with the following magic string.  The data
00240 ** was obtained from /dev/random.  It is used only as a sanity check.
00241 **
00242 ** Since version 2.8.0, the journal format contains additional sanity
00243 ** checking information.  If the power fails while the journal is begin
00244 ** written, semi-random garbage data might appear in the journal
00245 ** file after power is restored.  If an attempt is then made
00246 ** to roll the journal back, the database could be corrupted.  The additional
00247 ** sanity checking data is an attempt to discover the garbage in the
00248 ** journal and ignore it.
00249 **
00250 ** The sanity checking information for the new journal format consists
00251 ** of a 32-bit checksum on each page of data.  The checksum covers both
00252 ** the page number and the pPager->pageSize bytes of data for the page.
00253 ** This cksum is initialized to a 32-bit random value that appears in the
00254 ** journal file right after the header.  The random initializer is important,
00255 ** because garbage data that appears at the end of a journal is likely
00256 ** data that was once in other files that have now been deleted.  If the
00257 ** garbage data came from an obsolete journal file, the checksums might
00258 ** be correct.  But by initializing the checksum to random value which
00259 ** is different for every journal, we minimize that risk.
00260 */
00261 static const unsigned char aJournalMagic[] = {
00262   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
00263 };
00264 
00265 /*
00266 ** The size of the header and of each page in the journal is determined
00267 ** by the following macros.
00268 */
00269 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
00270 
00271 /*
00272 ** The journal header size for this pager. In the future, this could be
00273 ** set to some value read from the disk controller. The important
00274 ** characteristic is that it is the same size as a disk sector.
00275 */
00276 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
00277 
00278 /*
00279 ** The macro MEMDB is true if we are dealing with an in-memory database.
00280 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
00281 ** the value of MEMDB will be a constant and the compiler will optimize
00282 ** out code that would never execute.
00283 */
00284 #ifdef SQLITE_OMIT_MEMORYDB
00285 # define MEMDB 0
00286 #else
00287 # define MEMDB pPager->memDb
00288 #endif
00289 
00290 /*
00291 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
00292 ** reserved for working around a windows/posix incompatibility). It is
00293 ** used in the journal to signify that the remainder of the journal file 
00294 ** is devoted to storing a master journal name - there are no more pages to
00295 ** roll back. See comments for function writeMasterJournal() for details.
00296 */
00297 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
00298 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
00299 
00300 /*
00301 ** The maximum legal page number is (2^31 - 1).
00302 */
00303 #define PAGER_MAX_PGNO 2147483647
00304 
00305 /*
00306 ** Return true if page *pPg has already been written to the statement
00307 ** journal (or statement snapshot has been created, if *pPg is part
00308 ** of an in-memory database).
00309 */
00310 static int pageInStatement(PgHdr *pPg){
00311   Pager *pPager = pPg->pPager;
00312   return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
00313 }
00314 
00315 /*
00316 ** Read a 32-bit integer from the given file descriptor.  Store the integer
00317 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
00318 ** error code is something goes wrong.
00319 **
00320 ** All values are stored on disk as big-endian.
00321 */
00322 static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
00323   unsigned char ac[4];
00324   int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
00325   if( rc==SQLITE_OK ){
00326     *pRes = sqlite3Get4byte(ac);
00327   }
00328   return rc;
00329 }
00330 
00331 /*
00332 ** Write a 32-bit integer into a string buffer in big-endian byte order.
00333 */
00334 #define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
00335 
00336 /*
00337 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
00338 ** on success or an error code is something goes wrong.
00339 */
00340 static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
00341   char ac[4];
00342   put32bits(ac, val);
00343   return sqlite3OsWrite(fd, ac, 4, offset);
00344 }
00345 
00346 /*
00347 ** If file pFd is open, call sqlite3OsUnlock() on it.
00348 */
00349 static int osUnlock(sqlite3_file *pFd, int eLock){
00350   if( !pFd->pMethods ){
00351     return SQLITE_OK;
00352   }
00353   return sqlite3OsUnlock(pFd, eLock);
00354 }
00355 
00356 /*
00357 ** This function determines whether or not the atomic-write optimization
00358 ** can be used with this pager. The optimization can be used if:
00359 **
00360 **  (a) the value returned by OsDeviceCharacteristics() indicates that
00361 **      a database page may be written atomically, and
00362 **  (b) the value returned by OsSectorSize() is less than or equal
00363 **      to the page size.
00364 **
00365 ** If the optimization cannot be used, 0 is returned. If it can be used,
00366 ** then the value returned is the size of the journal file when it
00367 ** contains rollback data for exactly one page.
00368 */
00369 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
00370 static int jrnlBufferSize(Pager *pPager){
00371   int dc;           /* Device characteristics */
00372   int nSector;      /* Sector size */
00373   int szPage;        /* Page size */
00374   sqlite3_file *fd = pPager->fd;
00375 
00376   if( fd->pMethods ){
00377     dc = sqlite3OsDeviceCharacteristics(fd);
00378     nSector = sqlite3OsSectorSize(fd);
00379     szPage = pPager->pageSize;
00380   }
00381 
00382   assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
00383   assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
00384 
00385   if( !fd->pMethods || 
00386        (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){
00387     return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
00388   }
00389   return 0;
00390 }
00391 #endif
00392 
00393 /*
00394 ** This function should be called when an error occurs within the pager
00395 ** code. The first argument is a pointer to the pager structure, the
00396 ** second the error-code about to be returned by a pager API function. 
00397 ** The value returned is a copy of the second argument to this function. 
00398 **
00399 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
00400 ** the error becomes persistent. Until the persisten error is cleared,
00401 ** subsequent API calls on this Pager will immediately return the same 
00402 ** error code.
00403 **
00404 ** A persistent error indicates that the contents of the pager-cache 
00405 ** cannot be trusted. This state can be cleared by completely discarding 
00406 ** the contents of the pager-cache. If a transaction was active when
00407 ** the persistent error occured, then the rollback journal may need
00408 ** to be replayed.
00409 */
00410 static void pager_unlock(Pager *pPager);
00411 static int pager_error(Pager *pPager, int rc){
00412   int rc2 = rc & 0xff;
00413   assert(
00414        pPager->errCode==SQLITE_FULL ||
00415        pPager->errCode==SQLITE_OK ||
00416        (pPager->errCode & 0xff)==SQLITE_IOERR
00417   );
00418   if(
00419     rc2==SQLITE_FULL ||
00420     rc2==SQLITE_IOERR ||
00421     rc2==SQLITE_CORRUPT
00422   ){
00423     pPager->errCode = rc;
00424     if( pPager->state==PAGER_UNLOCK 
00425      && sqlite3PcacheRefCount(pPager->pPCache)==0 
00426     ){
00427       /* If the pager is already unlocked, call pager_unlock() now to
00428       ** clear the error state and ensure that the pager-cache is 
00429       ** completely empty.
00430       */
00431       pager_unlock(pPager);
00432     }
00433   }
00434   return rc;
00435 }
00436 
00437 /*
00438 ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
00439 ** on the cache using a hash function.  This is used for testing
00440 ** and debugging only.
00441 */
00442 #ifdef SQLITE_CHECK_PAGES
00443 /*
00444 ** Return a 32-bit hash of the page data for pPage.
00445 */
00446 static u32 pager_datahash(int nByte, unsigned char *pData){
00447   u32 hash = 0;
00448   int i;
00449   for(i=0; i<nByte; i++){
00450     hash = (hash*1039) + pData[i];
00451   }
00452   return hash;
00453 }
00454 static u32 pager_pagehash(PgHdr *pPage){
00455   return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
00456 }
00457 static u32 pager_set_pagehash(PgHdr *pPage){
00458   pPage->pageHash = pager_pagehash(pPage);
00459 }
00460 
00461 /*
00462 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
00463 ** is defined, and NDEBUG is not defined, an assert() statement checks
00464 ** that the page is either dirty or still matches the calculated page-hash.
00465 */
00466 #define CHECK_PAGE(x) checkPage(x)
00467 static void checkPage(PgHdr *pPg){
00468   Pager *pPager = pPg->pPager;
00469   assert( !pPg->pageHash || pPager->errCode
00470       || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
00471 }
00472 
00473 #else
00474 #define pager_datahash(X,Y)  0
00475 #define pager_pagehash(X)  0
00476 #define CHECK_PAGE(x)
00477 #endif  /* SQLITE_CHECK_PAGES */
00478 
00479 /*
00480 ** When this is called the journal file for pager pPager must be open.
00481 ** The master journal file name is read from the end of the file and 
00482 ** written into memory supplied by the caller. 
00483 **
00484 ** zMaster must point to a buffer of at least nMaster bytes allocated by
00485 ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
00486 ** enough space to write the master journal name). If the master journal
00487 ** name in the journal is longer than nMaster bytes (including a
00488 ** nul-terminator), then this is handled as if no master journal name
00489 ** were present in the journal.
00490 **
00491 ** If no master journal file name is present zMaster[0] is set to 0 and
00492 ** SQLITE_OK returned.
00493 */
00494 static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
00495   int rc;
00496   u32 len;
00497   i64 szJ;
00498   u32 cksum;
00499   u32 u;                   /* Unsigned loop counter */
00500   unsigned char aMagic[8]; /* A buffer to hold the magic header */
00501 
00502   zMaster[0] = '\0';
00503 
00504   rc = sqlite3OsFileSize(pJrnl, &szJ);
00505   if( rc!=SQLITE_OK || szJ<16 ) return rc;
00506 
00507   rc = read32bits(pJrnl, szJ-16, &len);
00508   if( rc!=SQLITE_OK ) return rc;
00509 
00510   if( len>=nMaster ){
00511     return SQLITE_OK;
00512   }
00513 
00514   rc = read32bits(pJrnl, szJ-12, &cksum);
00515   if( rc!=SQLITE_OK ) return rc;
00516 
00517   rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
00518   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
00519 
00520   rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
00521   if( rc!=SQLITE_OK ){
00522     return rc;
00523   }
00524   zMaster[len] = '\0';
00525 
00526   /* See if the checksum matches the master journal name */
00527   for(u=0; u<len; u++){
00528     cksum -= zMaster[u];
00529    }
00530   if( cksum ){
00531     /* If the checksum doesn't add up, then one or more of the disk sectors
00532     ** containing the master journal filename is corrupted. This means
00533     ** definitely roll back, so just return SQLITE_OK and report a (nul)
00534     ** master-journal filename.
00535     */
00536     zMaster[0] = '\0';
00537   }
00538    
00539   return SQLITE_OK;
00540 }
00541 
00542 /*
00543 ** Seek the journal file descriptor to the next sector boundary where a
00544 ** journal header may be read or written. Pager.journalOff is updated with
00545 ** the new seek offset.
00546 **
00547 ** i.e for a sector size of 512:
00548 **
00549 ** Input Offset              Output Offset
00550 ** ---------------------------------------
00551 ** 0                         0
00552 ** 512                       512
00553 ** 100                       512
00554 ** 2000                      2048
00555 ** 
00556 */
00557 static void seekJournalHdr(Pager *pPager){
00558   i64 offset = 0;
00559   i64 c = pPager->journalOff;
00560   if( c ){
00561     offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
00562   }
00563   assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
00564   assert( offset>=c );
00565   assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
00566   pPager->journalOff = offset;
00567 }
00568 
00569 /*
00570 ** Write zeros over the header of the journal file.  This has the
00571 ** effect of invalidating the journal file and committing the
00572 ** transaction.
00573 */
00574 static int zeroJournalHdr(Pager *pPager, int doTruncate){
00575   int rc = SQLITE_OK;
00576   static const char zeroHdr[28] = {0};
00577 
00578   if( pPager->journalOff ){
00579     i64 iLimit = pPager->journalSizeLimit;
00580 
00581     IOTRACE(("JZEROHDR %p\n", pPager))
00582     if( doTruncate || iLimit==0 ){
00583       rc = sqlite3OsTruncate(pPager->jfd, 0);
00584     }else{
00585       rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
00586     }
00587     if( rc==SQLITE_OK && !pPager->noSync ){
00588       rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
00589     }
00590 
00591     /* At this point the transaction is committed but the write lock 
00592     ** is still held on the file. If there is a size limit configured for 
00593     ** the persistent journal and the journal file currently consumes more
00594     ** space than that limit allows for, truncate it now. There is no need
00595     ** to sync the file following this operation.
00596     */
00597     if( rc==SQLITE_OK && iLimit>0 ){
00598       i64 sz;
00599       rc = sqlite3OsFileSize(pPager->jfd, &sz);
00600       if( rc==SQLITE_OK && sz>iLimit ){
00601         rc = sqlite3OsTruncate(pPager->jfd, iLimit);
00602       }
00603     }
00604   }
00605   return rc;
00606 }
00607 
00608 /*
00609 ** The journal file must be open when this routine is called. A journal
00610 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
00611 ** current location.
00612 **
00613 ** The format for the journal header is as follows:
00614 ** - 8 bytes: Magic identifying journal format.
00615 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
00616 ** - 4 bytes: Random number used for page hash.
00617 ** - 4 bytes: Initial database page count.
00618 ** - 4 bytes: Sector size used by the process that wrote this journal.
00619 ** - 4 bytes: Database page size.
00620 ** 
00621 ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
00622 */
00623 static int writeJournalHdr(Pager *pPager){
00624   int rc = SQLITE_OK;
00625   char *zHeader = pPager->pTmpSpace;
00626   int nHeader = pPager->pageSize;
00627   int nWrite;
00628 
00629   if( nHeader>JOURNAL_HDR_SZ(pPager) ){
00630     nHeader = JOURNAL_HDR_SZ(pPager);
00631   }
00632 
00633   if( pPager->stmtHdrOff==0 ){
00634     pPager->stmtHdrOff = pPager->journalOff;
00635   }
00636 
00637   seekJournalHdr(pPager);
00638   pPager->journalHdr = pPager->journalOff;
00639 
00640   memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
00641 
00642   /* 
00643   ** Write the nRec Field - the number of page records that follow this
00644   ** journal header. Normally, zero is written to this value at this time.
00645   ** After the records are added to the journal (and the journal synced, 
00646   ** if in full-sync mode), the zero is overwritten with the true number
00647   ** of records (see syncJournal()).
00648   **
00649   ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
00650   ** reading the journal this value tells SQLite to assume that the
00651   ** rest of the journal file contains valid page records. This assumption
00652   ** is dangerous, as if a failure occured whilst writing to the journal
00653   ** file it may contain some garbage data. There are two scenarios
00654   ** where this risk can be ignored:
00655   **
00656   **   * When the pager is in no-sync mode. Corruption can follow a
00657   **     power failure in this case anyway.
00658   **
00659   **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
00660   **     that garbage data is never appended to the journal file.
00661   */
00662   assert(pPager->fd->pMethods||pPager->noSync);
00663   if( (pPager->noSync) || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY)
00664    || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
00665   ){
00666     put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
00667   }else{
00668     put32bits(&zHeader[sizeof(aJournalMagic)], 0);
00669   }
00670 
00671   /* The random check-hash initialiser */ 
00672   sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
00673   put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
00674   /* The initial database size */
00675   put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
00676   /* The assumed sector size for this process */
00677   put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
00678   if( pPager->journalHdr==0 ){
00679     /* The page size */
00680     put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
00681   }
00682 
00683   for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
00684     IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
00685     rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
00686     pPager->journalOff += nHeader;
00687   }
00688 
00689   return rc;
00690 }
00691 
00692 /*
00693 ** The journal file must be open when this is called. A journal header file
00694 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
00695 ** file. See comments above function writeJournalHdr() for a description of
00696 ** the journal header format.
00697 **
00698 ** If the header is read successfully, *nRec is set to the number of
00699 ** page records following this header and *dbSize is set to the size of the
00700 ** database before the transaction began, in pages. Also, pPager->cksumInit
00701 ** is set to the value read from the journal header. SQLITE_OK is returned
00702 ** in this case.
00703 **
00704 ** If the journal header file appears to be corrupted, SQLITE_DONE is
00705 ** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
00706 ** cannot be read from the journal file an error code is returned.
00707 */
00708 static int readJournalHdr(
00709   Pager *pPager, 
00710   i64 journalSize,
00711   u32 *pNRec, 
00712   u32 *pDbSize
00713 ){
00714   int rc;
00715   unsigned char aMagic[8]; /* A buffer to hold the magic header */
00716   i64 jrnlOff;
00717   int iPageSize;
00718 
00719   seekJournalHdr(pPager);
00720   if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
00721     return SQLITE_DONE;
00722   }
00723   jrnlOff = pPager->journalOff;
00724 
00725   rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
00726   if( rc ) return rc;
00727   jrnlOff += sizeof(aMagic);
00728 
00729   if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
00730     return SQLITE_DONE;
00731   }
00732 
00733   rc = read32bits(pPager->jfd, jrnlOff, pNRec);
00734   if( rc ) return rc;
00735 
00736   rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
00737   if( rc ) return rc;
00738 
00739   rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
00740   if( rc ) return rc;
00741 
00742   rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize);
00743   if( rc==SQLITE_OK 
00744    && iPageSize>=512 
00745    && iPageSize<=SQLITE_MAX_PAGE_SIZE 
00746    && ((iPageSize-1)&iPageSize)==0 
00747   ){
00748     u16 pagesize = iPageSize;
00749     rc = sqlite3PagerSetPagesize(pPager, &pagesize);
00750   }
00751   if( rc ) return rc;
00752 
00753   /* Update the assumed sector-size to match the value used by 
00754   ** the process that created this journal. If this journal was
00755   ** created by a process other than this one, then this routine
00756   ** is being called from within pager_playback(). The local value
00757   ** of Pager.sectorSize is restored at the end of that routine.
00758   */
00759   rc = read32bits(pPager->jfd, jrnlOff+12, &pPager->sectorSize);
00760   if( rc ) return rc;
00761   if( (pPager->sectorSize & (pPager->sectorSize-1))!=0
00762          || pPager->sectorSize>0x1000000 ){
00763     return SQLITE_DONE;
00764   }
00765 
00766   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
00767   return SQLITE_OK;
00768 }
00769 
00770 
00771 /*
00772 ** Write the supplied master journal name into the journal file for pager
00773 ** pPager at the current location. The master journal name must be the last
00774 ** thing written to a journal file. If the pager is in full-sync mode, the
00775 ** journal file descriptor is advanced to the next sector boundary before
00776 ** anything is written. The format is:
00777 **
00778 ** + 4 bytes: PAGER_MJ_PGNO.
00779 ** + N bytes: length of master journal name.
00780 ** + 4 bytes: N
00781 ** + 4 bytes: Master journal name checksum.
00782 ** + 8 bytes: aJournalMagic[].
00783 **
00784 ** The master journal page checksum is the sum of the bytes in the master
00785 ** journal name.
00786 **
00787 ** If zMaster is a NULL pointer (occurs for a single database transaction), 
00788 ** this call is a no-op.
00789 */
00790 static int writeMasterJournal(Pager *pPager, const char *zMaster){
00791   int rc;
00792   int len; 
00793   int i; 
00794   i64 jrnlOff;
00795   i64 jrnlSize;
00796   u32 cksum = 0;
00797   char zBuf[sizeof(aJournalMagic)+2*4];
00798 
00799   if( !zMaster || pPager->setMaster ) return SQLITE_OK;
00800   if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ) return SQLITE_OK;
00801   pPager->setMaster = 1;
00802 
00803   len = strlen(zMaster);
00804   for(i=0; i<len; i++){
00805     cksum += zMaster[i];
00806   }
00807 
00808   /* If in full-sync mode, advance to the next disk sector before writing
00809   ** the master journal name. This is in case the previous page written to
00810   ** the journal has already been synced.
00811   */
00812   if( pPager->fullSync ){
00813     seekJournalHdr(pPager);
00814   }
00815   jrnlOff = pPager->journalOff;
00816   pPager->journalOff += (len+20);
00817 
00818   rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
00819   if( rc!=SQLITE_OK ) return rc;
00820   jrnlOff += 4;
00821 
00822   rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
00823   if( rc!=SQLITE_OK ) return rc;
00824   jrnlOff += len;
00825 
00826   put32bits(zBuf, len);
00827   put32bits(&zBuf[4], cksum);
00828   memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
00829   rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
00830   jrnlOff += 8+sizeof(aJournalMagic);
00831   pPager->needSync = !pPager->noSync;
00832 
00833   /* If the pager is in peristent-journal mode, then the physical 
00834   ** journal-file may extend past the end of the master-journal name
00835   ** and 8 bytes of magic data just written to the file. This is 
00836   ** dangerous because the code to rollback a hot-journal file
00837   ** will not be able to find the master-journal name to determine 
00838   ** whether or not the journal is hot. 
00839   **
00840   ** Easiest thing to do in this scenario is to truncate the journal 
00841   ** file to the required size.
00842   */ 
00843   if( (rc==SQLITE_OK)
00844    && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK
00845    && jrnlSize>jrnlOff
00846   ){
00847     rc = sqlite3OsTruncate(pPager->jfd, jrnlOff);
00848   }
00849   return rc;
00850 }
00851 
00852 /*
00853 ** Find a page in the hash table given its page number.  Return
00854 ** a pointer to the page or NULL if not found.
00855 */
00856 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
00857   PgHdr *p;
00858   sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
00859   return p;
00860 }
00861 
00862 /*
00863 ** Clear the in-memory cache.  This routine
00864 ** sets the state of the pager back to what it was when it was first
00865 ** opened.  Any outstanding pages are invalidated and subsequent attempts
00866 ** to access those pages will likely result in a coredump.
00867 */
00868 static void pager_reset(Pager *pPager){
00869   if( pPager->errCode ) return;
00870   sqlite3PcacheClear(pPager->pPCache);
00871 }
00872 
00873 /*
00874 ** Unlock the database file. 
00875 **
00876 ** If the pager is currently in error state, discard the contents of 
00877 ** the cache and reset the Pager structure internal state. If there is
00878 ** an open journal-file, then the next time a shared-lock is obtained
00879 ** on the pager file (by this or any other process), it will be
00880 ** treated as a hot-journal and rolled back.
00881 */
00882 static void pager_unlock(Pager *pPager){
00883   if( !pPager->exclusiveMode ){
00884     int rc = osUnlock(pPager->fd, NO_LOCK);
00885     if( rc ) pPager->errCode = rc;
00886     pPager->dbSize = -1;
00887     IOTRACE(("UNLOCK %p\n", pPager))
00888 
00889     /* Always close the journal file when dropping the database lock.
00890     ** Otherwise, another connection with journal_mode=delete might
00891     ** delete the file out from under us.
00892     */
00893     if( pPager->journalOpen ){
00894       sqlite3OsClose(pPager->jfd);
00895       pPager->journalOpen = 0;
00896       sqlite3BitvecDestroy(pPager->pInJournal);
00897       pPager->pInJournal = 0;
00898       sqlite3BitvecDestroy(pPager->pAlwaysRollback);
00899       pPager->pAlwaysRollback = 0;
00900     }
00901 
00902     /* If Pager.errCode is set, the contents of the pager cache cannot be
00903     ** trusted. Now that the pager file is unlocked, the contents of the
00904     ** cache can be discarded and the error code safely cleared.
00905     */
00906     if( pPager->errCode ){
00907       if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK;
00908       pager_reset(pPager);
00909       if( pPager->stmtOpen ){
00910         sqlite3OsClose(pPager->stfd);
00911         sqlite3BitvecDestroy(pPager->pInStmt);
00912         pPager->pInStmt = 0;
00913       }
00914       pPager->stmtOpen = 0;
00915       pPager->stmtInUse = 0;
00916       pPager->journalOff = 0;
00917       pPager->journalStarted = 0;
00918       pPager->stmtAutoopen = 0;
00919       pPager->origDbSize = 0;
00920     }
00921 
00922     pPager->state = PAGER_UNLOCK;
00923     pPager->changeCountDone = 0;
00924   }
00925 }
00926 
00927 /*
00928 ** Execute a rollback if a transaction is active and unlock the 
00929 ** database file. If the pager has already entered the error state, 
00930 ** do not attempt the rollback.
00931 */
00932 static void pagerUnlockAndRollback(Pager *p){
00933   if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
00934     sqlite3BeginBenignMalloc();
00935     sqlite3PagerRollback(p);
00936     sqlite3EndBenignMalloc();
00937   }
00938   pager_unlock(p);
00939 }
00940 
00941 /*
00942 ** This routine ends a transaction.  A transaction is ended by either
00943 ** a COMMIT or a ROLLBACK.
00944 **
00945 ** When this routine is called, the pager has the journal file open and
00946 ** a RESERVED or EXCLUSIVE lock on the database.  This routine will release
00947 ** the database lock and acquires a SHARED lock in its place if that is
00948 ** the appropriate thing to do.  Release locks usually is appropriate,
00949 ** unless we are in exclusive access mode or unless this is a 
00950 ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
00951 **
00952 ** The journal file is either deleted or truncated.
00953 **
00954 ** TODO: Consider keeping the journal file open for temporary databases.
00955 ** This might give a performance improvement on windows where opening
00956 ** a file is an expensive operation.
00957 */
00958 static int pager_end_transaction(Pager *pPager, int hasMaster){
00959   int rc = SQLITE_OK;
00960   int rc2 = SQLITE_OK;
00961   if( pPager->state<PAGER_RESERVED ){
00962     return SQLITE_OK;
00963   }
00964   sqlite3PagerStmtCommit(pPager);
00965   if( pPager->stmtOpen && !pPager->exclusiveMode ){
00966     sqlite3OsClose(pPager->stfd);
00967     pPager->stmtOpen = 0;
00968   }
00969   if( pPager->journalOpen ){
00970     if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
00971       int isMemoryJournal = sqlite3IsMemJournal(pPager->jfd);
00972       sqlite3OsClose(pPager->jfd);
00973       pPager->journalOpen = 0;
00974       if( !isMemoryJournal ){
00975         rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
00976       }
00977     }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE
00978          && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){
00979       pPager->journalOff = 0;
00980       pPager->journalStarted = 0;
00981     }else if( pPager->exclusiveMode 
00982      || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
00983     ){
00984       rc = zeroJournalHdr(pPager, hasMaster);
00985       pager_error(pPager, rc);
00986       pPager->journalOff = 0;
00987       pPager->journalStarted = 0;
00988     }else{
00989       assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || rc );
00990       sqlite3OsClose(pPager->jfd);
00991       pPager->journalOpen = 0;
00992       if( rc==SQLITE_OK && !pPager->tempFile ){
00993         rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
00994       }
00995     }
00996     sqlite3BitvecDestroy(pPager->pInJournal);
00997     pPager->pInJournal = 0;
00998     sqlite3BitvecDestroy(pPager->pAlwaysRollback);
00999     pPager->pAlwaysRollback = 0;
01000     sqlite3PcacheCleanAll(pPager->pPCache);
01001 #ifdef SQLITE_CHECK_PAGES
01002     sqlite3PcacheIterate(pPager->pPCache, pager_set_pagehash);
01003 #endif
01004     sqlite3PcacheClearFlags(pPager->pPCache,
01005        PGHDR_IN_JOURNAL | PGHDR_NEED_SYNC
01006     );
01007     pPager->dirtyCache = 0;
01008     pPager->nRec = 0;
01009   }else{
01010     assert( pPager->pInJournal==0 );
01011   }
01012 
01013   if( !pPager->exclusiveMode ){
01014     rc2 = osUnlock(pPager->fd, SHARED_LOCK);
01015     pPager->state = PAGER_SHARED;
01016   }else if( pPager->state==PAGER_SYNCED ){
01017     pPager->state = PAGER_EXCLUSIVE;
01018   }
01019   pPager->origDbSize = 0;
01020   pPager->setMaster = 0;
01021   pPager->needSync = 0;
01022   /* lruListSetFirstSynced(pPager); */
01023   if( !MEMDB ){
01024     pPager->dbSize = -1;
01025   }
01026   pPager->dbModified = 0;
01027 
01028   return (rc==SQLITE_OK?rc2:rc);
01029 }
01030 
01031 /*
01032 ** Compute and return a checksum for the page of data.
01033 **
01034 ** This is not a real checksum.  It is really just the sum of the 
01035 ** random initial value and the page number.  We experimented with
01036 ** a checksum of the entire data, but that was found to be too slow.
01037 **
01038 ** Note that the page number is stored at the beginning of data and
01039 ** the checksum is stored at the end.  This is important.  If journal
01040 ** corruption occurs due to a power failure, the most likely scenario
01041 ** is that one end or the other of the record will be changed.  It is
01042 ** much less likely that the two ends of the journal record will be
01043 ** correct and the middle be corrupt.  Thus, this "checksum" scheme,
01044 ** though fast and simple, catches the mostly likely kind of corruption.
01045 **
01046 ** FIX ME:  Consider adding every 200th (or so) byte of the data to the
01047 ** checksum.  That way if a single page spans 3 or more disk sectors and
01048 ** only the middle sector is corrupt, we will still have a reasonable
01049 ** chance of failing the checksum and thus detecting the problem.
01050 */
01051 static u32 pager_cksum(Pager *pPager, const u8 *aData){
01052   u32 cksum = pPager->cksumInit;
01053   int i = pPager->pageSize-200;
01054   while( i>0 ){
01055     cksum += aData[i];
01056     i -= 200;
01057   }
01058   return cksum;
01059 }
01060 
01061 /* Forward declaration */
01062 static void makeClean(PgHdr*);
01063 
01064 /*
01065 ** Read a single page from the journal file opened on file descriptor
01066 ** jfd.  Playback this one page.
01067 **
01068 ** The isMainJrnl flag is true if this is the main rollback journal and
01069 ** false for the statement journal.  The main rollback journal uses
01070 ** checksums - the statement journal does not.
01071 */
01072 static int pager_playback_one_page(
01073   Pager *pPager,       /* The pager being played back */
01074   sqlite3_file *jfd,   /* The file that is the journal being rolled back */
01075   i64 offset,          /* Offset of the page within the journal */
01076   int isMainJrnl       /* True for main rollback journal. False for Stmt jrnl */
01077 ){
01078   int rc;
01079   PgHdr *pPg;                   /* An existing page in the cache */
01080   Pgno pgno;                    /* The page number of a page in journal */
01081   u32 cksum;                    /* Checksum used for sanity checking */
01082   u8 *aData = (u8 *)pPager->pTmpSpace;   /* Temp storage for a page */
01083 
01084   /* isMainJrnl should be true for the main journal and false for
01085   ** statement journals.  Verify that this is always the case
01086   */
01087   assert( jfd == (isMainJrnl ? pPager->jfd : pPager->stfd) );
01088   assert( aData );
01089 
01090   rc = read32bits(jfd, offset, &pgno);
01091   if( rc!=SQLITE_OK ) return rc;
01092   rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
01093   if( rc!=SQLITE_OK ) return rc;
01094   pPager->journalOff += pPager->pageSize + 4;
01095 
01096   /* Sanity checking on the page.  This is more important that I originally
01097   ** thought.  If a power failure occurs while the journal is being written,
01098   ** it could cause invalid data to be written into the journal.  We need to
01099   ** detect this invalid data (with high probability) and ignore it.
01100   */
01101   if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
01102     return SQLITE_DONE;
01103   }
01104   if( pgno>(unsigned)pPager->dbSize ){
01105     return SQLITE_OK;
01106   }
01107   if( isMainJrnl ){
01108     rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
01109     if( rc ) return rc;
01110     pPager->journalOff += 4;
01111     if( pager_cksum(pPager, aData)!=cksum ){
01112       return SQLITE_DONE;
01113     }
01114   }
01115 
01116   assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
01117 
01118   /* If the pager is in RESERVED state, then there must be a copy of this
01119   ** page in the pager cache. In this case just update the pager cache,
01120   ** not the database file. The page is left marked dirty in this case.
01121   **
01122   ** An exception to the above rule: If the database is in no-sync mode
01123   ** and a page is moved during an incremental vacuum then the page may
01124   ** not be in the pager cache. Later: if a malloc() or IO error occurs
01125   ** during a Movepage() call, then the page may not be in the cache
01126   ** either. So the condition described in the above paragraph is not
01127   ** assert()able.
01128   **
01129   ** If in EXCLUSIVE state, then we update the pager cache if it exists
01130   ** and the main file. The page is then marked not dirty.
01131   **
01132   ** Ticket #1171:  The statement journal might contain page content that is
01133   ** different from the page content at the start of the transaction.
01134   ** This occurs when a page is changed prior to the start of a statement
01135   ** then changed again within the statement.  When rolling back such a
01136   ** statement we must not write to the original database unless we know
01137   ** for certain that original page contents are synced into the main rollback
01138   ** journal.  Otherwise, a power loss might leave modified data in the
01139   ** database file without an entry in the rollback journal that can
01140   ** restore the database to its original form.  Two conditions must be
01141   ** met before writing to the database files. (1) the database must be
01142   ** locked.  (2) we know that the original page content is fully synced
01143   ** in the main journal either because the page is not in cache or else
01144   ** the page is marked as needSync==0.
01145   **
01146   ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
01147   ** is possible to fail a statement on a database that does not yet exist.
01148   ** Do not attempt to write if database file has never been opened.
01149   */
01150   pPg = pager_lookup(pPager, pgno);
01151   PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
01152                PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
01153   if( (pPager->state>=PAGER_EXCLUSIVE)
01154    && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC))
01155    && (pPager->fd->pMethods)
01156   ){
01157     i64 ofst = (pgno-1)*(i64)pPager->pageSize;
01158     rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst);
01159   }
01160   if( pPg ){
01161     /* No page should ever be explicitly rolled back that is in use, except
01162     ** for page 1 which is held in use in order to keep the lock on the
01163     ** database active. However such a page may be rolled back as a result
01164     ** of an internal error resulting in an automatic call to
01165     ** sqlite3PagerRollback().
01166     */
01167     void *pData;
01168     pData = pPg->pData;
01169     memcpy(pData, aData, pPager->pageSize);
01170     if( pPager->xReiniter ){
01171       pPager->xReiniter(pPg);
01172     }
01173     if( isMainJrnl ) makeClean(pPg);
01174 #ifdef SQLITE_CHECK_PAGES
01175     pPg->pageHash = pager_pagehash(pPg);
01176 #endif
01177     /* If this was page 1, then restore the value of Pager.dbFileVers.
01178     ** Do this before any decoding. */
01179     if( pgno==1 ){
01180       memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
01181     }
01182 
01183     /* Decode the page just read from disk */
01184     CODEC1(pPager, pData, pPg->pgno, 3);
01185     sqlite3PcacheRelease(pPg);
01186   }
01187   return rc;
01188 }
01189 
01190 /*
01191 ** Parameter zMaster is the name of a master journal file. A single journal
01192 ** file that referred to the master journal file has just been rolled back.
01193 ** This routine checks if it is possible to delete the master journal file,
01194 ** and does so if it is.
01195 **
01196 ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
01197 ** available for use within this function.
01198 **
01199 **
01200 ** The master journal file contains the names of all child journals.
01201 ** To tell if a master journal can be deleted, check to each of the
01202 ** children.  If all children are either missing or do not refer to
01203 ** a different master journal, then this master journal can be deleted.
01204 */
01205 static int pager_delmaster(Pager *pPager, const char *zMaster){
01206   sqlite3_vfs *pVfs = pPager->pVfs;
01207   int rc;
01208   int master_open = 0;
01209   sqlite3_file *pMaster;
01210   sqlite3_file *pJournal;
01211   char *zMasterJournal = 0; /* Contents of master journal file */
01212   i64 nMasterJournal;       /* Size of master journal file */
01213 
01214   /* Open the master journal file exclusively in case some other process
01215   ** is running this routine also. Not that it makes too much difference.
01216   */
01217   pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2);
01218   pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
01219   if( !pMaster ){
01220     rc = SQLITE_NOMEM;
01221   }else{
01222     int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
01223     rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
01224   }
01225   if( rc!=SQLITE_OK ) goto delmaster_out;
01226   master_open = 1;
01227 
01228   rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
01229   if( rc!=SQLITE_OK ) goto delmaster_out;
01230 
01231   if( nMasterJournal>0 ){
01232     char *zJournal;
01233     char *zMasterPtr = 0;
01234     int nMasterPtr = pPager->pVfs->mxPathname+1;
01235 
01236     /* Load the entire master journal file into space obtained from
01237     ** sqlite3_malloc() and pointed to by zMasterJournal. 
01238     */
01239     zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr);
01240     if( !zMasterJournal ){
01241       rc = SQLITE_NOMEM;
01242       goto delmaster_out;
01243     }
01244     zMasterPtr = &zMasterJournal[nMasterJournal];
01245     rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
01246     if( rc!=SQLITE_OK ) goto delmaster_out;
01247 
01248     zJournal = zMasterJournal;
01249     while( (zJournal-zMasterJournal)<nMasterJournal ){
01250       int exists;
01251       rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
01252       if( rc!=SQLITE_OK ){
01253         goto delmaster_out;
01254       }
01255       if( exists ){
01256         /* One of the journals pointed to by the master journal exists.
01257         ** Open it and check if it points at the master journal. If
01258         ** so, return without deleting the master journal file.
01259         */
01260         int c;
01261         int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
01262         rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
01263         if( rc!=SQLITE_OK ){
01264           goto delmaster_out;
01265         }
01266 
01267         rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
01268         sqlite3OsClose(pJournal);
01269         if( rc!=SQLITE_OK ){
01270           goto delmaster_out;
01271         }
01272 
01273         c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
01274         if( c ){
01275           /* We have a match. Do not delete the master journal file. */
01276           goto delmaster_out;
01277         }
01278       }
01279       zJournal += (strlen(zJournal)+1);
01280     }
01281   }
01282   
01283   rc = sqlite3OsDelete(pVfs, zMaster, 0);
01284 
01285 delmaster_out:
01286   if( zMasterJournal ){
01287     sqlite3_free(zMasterJournal);
01288   }  
01289   if( master_open ){
01290     sqlite3OsClose(pMaster);
01291   }
01292   sqlite3_free(pMaster);
01293   return rc;
01294 }
01295 
01296 
01297 static void pager_truncate_cache(Pager *pPager);
01298 
01299 /*
01300 ** Truncate the main file of the given pager to the number of pages
01301 ** indicated. Also truncate the cached representation of the file.
01302 **
01303 ** Might might be the case that the file on disk is smaller than nPage.
01304 ** This can happen, for example, if we are in the middle of a transaction
01305 ** which has extended the file size and the new pages are still all held
01306 ** in cache, then an INSERT or UPDATE does a statement rollback.  Some
01307 ** operating system implementations can get confused if you try to
01308 ** truncate a file to some size that is larger than it currently is,
01309 ** so detect this case and write a single zero byte to the end of the new
01310 ** file instead.
01311 */
01312 static int pager_truncate(Pager *pPager, int nPage){
01313   int rc = SQLITE_OK;
01314   if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
01315     i64 currentSize, newSize;
01316     rc = sqlite3OsFileSize(pPager->fd, &currentSize);
01317     newSize = pPager->pageSize*(i64)nPage;
01318     if( rc==SQLITE_OK && currentSize!=newSize ){
01319       if( currentSize>newSize ){
01320         rc = sqlite3OsTruncate(pPager->fd, newSize);
01321       }else{
01322         rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
01323       }
01324     }
01325   }
01326   if( rc==SQLITE_OK ){
01327     pPager->dbSize = nPage;
01328     pager_truncate_cache(pPager);
01329   }
01330   return rc;
01331 }
01332 
01333 /*
01334 ** Set the sectorSize for the given pager.
01335 **
01336 ** The sector size is at least as big as the sector size reported
01337 ** by sqlite3OsSectorSize().  The minimum sector size is 512.
01338 */
01339 static void setSectorSize(Pager *pPager){
01340   assert(pPager->fd->pMethods||pPager->tempFile);
01341   if( !pPager->tempFile ){
01342     /* Sector size doesn't matter for temporary files. Also, the file
01343     ** may not have been opened yet, in whcih case the OsSectorSize()
01344     ** call will segfault.
01345     */
01346     pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
01347   }
01348   if( pPager->sectorSize<512 ){
01349     pPager->sectorSize = 512;
01350   }
01351 }
01352 
01353 /*
01354 ** Playback the journal and thus restore the database file to
01355 ** the state it was in before we started making changes.  
01356 **
01357 ** The journal file format is as follows: 
01358 **
01359 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
01360 **  (2)  4 byte big-endian integer which is the number of valid page records
01361 **       in the journal.  If this value is 0xffffffff, then compute the
01362 **       number of page records from the journal size.
01363 **  (3)  4 byte big-endian integer which is the initial value for the 
01364 **       sanity checksum.
01365 **  (4)  4 byte integer which is the number of pages to truncate the
01366 **       database to during a rollback.
01367 **  (5)  4 byte big-endian integer which is the sector size.  The header
01368 **       is this many bytes in size.
01369 **  (6)  4 byte big-endian integer which is the page case.
01370 **  (7)  4 byte integer which is the number of bytes in the master journal
01371 **       name.  The value may be zero (indicate that there is no master
01372 **       journal.)
01373 **  (8)  N bytes of the master journal name.  The name will be nul-terminated
01374 **       and might be shorter than the value read from (5).  If the first byte
01375 **       of the name is \000 then there is no master journal.  The master
01376 **       journal name is stored in UTF-8.
01377 **  (9)  Zero or more pages instances, each as follows:
01378 **        +  4 byte page number.
01379 **        +  pPager->pageSize bytes of data.
01380 **        +  4 byte checksum
01381 **
01382 ** When we speak of the journal header, we mean the first 8 items above.
01383 ** Each entry in the journal is an instance of the 9th item.
01384 **
01385 ** Call the value from the second bullet "nRec".  nRec is the number of
01386 ** valid page entries in the journal.  In most cases, you can compute the
01387 ** value of nRec from the size of the journal file.  But if a power
01388 ** failure occurred while the journal was being written, it could be the
01389 ** case that the size of the journal file had already been increased but
01390 ** the extra entries had not yet made it safely to disk.  In such a case,
01391 ** the value of nRec computed from the file size would be too large.  For
01392 ** that reason, we always use the nRec value in the header.
01393 **
01394 ** If the nRec value is 0xffffffff it means that nRec should be computed
01395 ** from the file size.  This value is used when the user selects the
01396 ** no-sync option for the journal.  A power failure could lead to corruption
01397 ** in this case.  But for things like temporary table (which will be
01398 ** deleted when the power is restored) we don't care.  
01399 **
01400 ** If the file opened as the journal file is not a well-formed
01401 ** journal file then all pages up to the first corrupted page are rolled
01402 ** back (or no pages if the journal header is corrupted). The journal file
01403 ** is then deleted and SQLITE_OK returned, just as if no corruption had
01404 ** been encountered.
01405 **
01406 ** If an I/O or malloc() error occurs, the journal-file is not deleted
01407 ** and an error code is returned.
01408 */
01409 static int pager_playback(Pager *pPager, int isHot){
01410   sqlite3_vfs *pVfs = pPager->pVfs;
01411   i64 szJ;                 /* Size of the journal file in bytes */
01412   u32 nRec;                /* Number of Records in the journal */
01413   u32 u;                   /* Unsigned loop counter */
01414   Pgno mxPg = 0;           /* Size of the original file in pages */
01415   int rc;                  /* Result code of a subroutine */
01416   int res = 1;             /* Value returned by sqlite3OsAccess() */
01417   char *zMaster = 0;       /* Name of master journal file if any */
01418 
01419   /* Figure out how many records are in the journal.  Abort early if
01420   ** the journal is empty.
01421   */
01422   assert( pPager->journalOpen );
01423   rc = sqlite3OsFileSize(pPager->jfd, &szJ);
01424   if( rc!=SQLITE_OK || szJ==0 ){
01425     goto end_playback;
01426   }
01427 
01428   /* Read the master journal name from the journal, if it is present.
01429   ** If a master journal file name is specified, but the file is not
01430   ** present on disk, then the journal is not hot and does not need to be
01431   ** played back.
01432   */
01433   zMaster = pPager->pTmpSpace;
01434   rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
01435   if( rc==SQLITE_OK && zMaster[0] ){
01436     rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
01437   }
01438   zMaster = 0;
01439   if( rc!=SQLITE_OK || !res ){
01440     goto end_playback;
01441   }
01442   pPager->journalOff = 0;
01443 
01444   /* This loop terminates either when the readJournalHdr() call returns
01445   ** SQLITE_DONE or an IO error occurs. */
01446   while( 1 ){
01447 
01448     /* Read the next journal header from the journal file.  If there are
01449     ** not enough bytes left in the journal file for a complete header, or
01450     ** it is corrupted, then a process must of failed while writing it.
01451     ** This indicates nothing more needs to be rolled back.
01452     */
01453     rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
01454     if( rc!=SQLITE_OK ){ 
01455       if( rc==SQLITE_DONE ){
01456         rc = SQLITE_OK;
01457       }
01458       goto end_playback;
01459     }
01460 
01461     /* If nRec is 0xffffffff, then this journal was created by a process
01462     ** working in no-sync mode. This means that the rest of the journal
01463     ** file consists of pages, there are no more journal headers. Compute
01464     ** the value of nRec based on this assumption.
01465     */
01466     if( nRec==0xffffffff ){
01467       assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
01468       nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
01469     }
01470 
01471     /* If nRec is 0 and this rollback is of a transaction created by this
01472     ** process and if this is the final header in the journal, then it means
01473     ** that this part of the journal was being filled but has not yet been
01474     ** synced to disk.  Compute the number of pages based on the remaining
01475     ** size of the file.
01476     **
01477     ** The third term of the test was added to fix ticket #2565.
01478     */
01479     if( nRec==0 && !isHot &&
01480         pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
01481       nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
01482     }
01483 
01484     /* If this is the first header read from the journal, truncate the
01485     ** database file back to its original size.
01486     */
01487     if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
01488       rc = pager_truncate(pPager, mxPg);
01489       if( rc!=SQLITE_OK ){
01490         goto end_playback;
01491       }
01492     }
01493 
01494     /* Copy original pages out of the journal and back into the database file.
01495     */
01496     for(u=0; u<nRec; u++){
01497       rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
01498       if( rc!=SQLITE_OK ){
01499         if( rc==SQLITE_DONE ){
01500           rc = SQLITE_OK;
01501           pPager->journalOff = szJ;
01502           break;
01503         }else{
01504           /* If we are unable to rollback, then the database is probably
01505           ** going to end up being corrupt.  It is corrupt to us, anyhow.
01506           ** Perhaps the next process to come along can fix it....
01507           */
01508           rc = SQLITE_CORRUPT_BKPT;
01509           goto end_playback;
01510         }
01511       }
01512     }
01513   }
01514   /*NOTREACHED*/
01515   assert( 0 );
01516 
01517 end_playback:
01518   if( rc==SQLITE_OK ){
01519     zMaster = pPager->pTmpSpace;
01520     rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
01521   }
01522   if( rc==SQLITE_OK ){
01523     rc = pager_end_transaction(pPager, zMaster[0]!='\0');
01524   }
01525   if( rc==SQLITE_OK && zMaster[0] && res ){
01526     /* If there was a master journal and this routine will return success,
01527     ** see if it is possible to delete the master journal.
01528     */
01529     rc = pager_delmaster(pPager, zMaster);
01530   }
01531 
01532   /* The Pager.sectorSize variable may have been updated while rolling
01533   ** back a journal created by a process with a different sector size
01534   ** value. Reset it to the correct value for this process.
01535   */
01536   setSectorSize(pPager);
01537   return rc;
01538 }
01539 
01540 /*
01541 ** Playback the statement journal.
01542 **
01543 ** This is similar to playing back the transaction journal but with
01544 ** a few extra twists.
01545 **
01546 **    (1)  The number of pages in the database file at the start of
01547 **         the statement is stored in pPager->stmtSize, not in the
01548 **         journal file itself.
01549 **
01550 **    (2)  In addition to playing back the statement journal, also
01551 **         playback all pages of the transaction journal beginning
01552 **         at offset pPager->stmtJSize.
01553 */
01554 static int pager_stmt_playback(Pager *pPager){
01555   i64 szJ;                 /* Size of the full journal */
01556   i64 hdrOff;
01557   int nRec;                /* Number of Records */
01558   int i;                   /* Loop counter */
01559   int rc;
01560 
01561   szJ = pPager->journalOff;
01562 
01563   /* Set hdrOff to be the offset just after the end of the last journal
01564   ** page written before the first journal-header for this statement
01565   ** transaction was written, or the end of the file if no journal
01566   ** header was written.
01567   */
01568   hdrOff = pPager->stmtHdrOff;
01569   assert( pPager->fullSync || !hdrOff );
01570   if( !hdrOff ){
01571     hdrOff = szJ;
01572   }
01573   
01574   /* Truncate the database back to its original size.
01575   */
01576   rc = pager_truncate(pPager, pPager->stmtSize);
01577   assert( pPager->state>=PAGER_SHARED );
01578 
01579   /* Figure out how many records are in the statement journal.
01580   */
01581   assert( pPager->stmtInUse && pPager->journalOpen );
01582   nRec = pPager->stmtNRec;
01583   
01584   /* Copy original pages out of the statement journal and back into the
01585   ** database file.  Note that the statement journal omits checksums from
01586   ** each record since power-failure recovery is not important to statement
01587   ** journals.
01588   */
01589   for(i=0; i<nRec; i++){
01590     i64 offset = i*(4+pPager->pageSize);
01591     rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
01592     assert( rc!=SQLITE_DONE );
01593     if( rc!=SQLITE_OK ) goto end_stmt_playback;
01594   }
01595 
01596   /* Now roll some pages back from the transaction journal. Pager.stmtJSize
01597   ** was the size of the journal file when this statement was started, so
01598   ** everything after that needs to be rolled back, either into the
01599   ** database, the memory cache, or both.
01600   **
01601   ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
01602   ** of the first journal header written during this statement transaction.
01603   */
01604   pPager->journalOff = pPager->stmtJSize;
01605   pPager->cksumInit = pPager->stmtCksum;
01606   while( pPager->journalOff < hdrOff ){
01607     rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
01608     assert( rc!=SQLITE_DONE );
01609     if( rc!=SQLITE_OK ) goto end_stmt_playback;
01610   }
01611 
01612   while( pPager->journalOff < szJ ){
01613     u32 nJRec;         /* Number of Journal Records */
01614     u32 dummy;
01615     rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
01616     if( rc!=SQLITE_OK ){
01617       assert( rc!=SQLITE_DONE );
01618       goto end_stmt_playback;
01619     }
01620     if( nJRec==0 ){
01621       nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
01622     }
01623     for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
01624       rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
01625       assert( rc!=SQLITE_DONE );
01626       if( rc!=SQLITE_OK ) goto end_stmt_playback;
01627     }
01628   }
01629 
01630   pPager->journalOff = szJ;
01631   
01632 end_stmt_playback:
01633   if( rc==SQLITE_OK) {
01634     pPager->journalOff = szJ;
01635     /* pager_reload_cache(pPager); */
01636   }
01637   return rc;
01638 }
01639 
01640 /*
01641 ** Change the maximum number of in-memory pages that are allowed.
01642 */
01643 void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
01644   sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
01645 }
01646 
01647 /*
01648 ** Adjust the robustness of the database to damage due to OS crashes
01649 ** or power failures by changing the number of syncs()s when writing
01650 ** the rollback journal.  There are three levels:
01651 **
01652 **    OFF       sqlite3OsSync() is never called.  This is the default
01653 **              for temporary and transient files.
01654 **
01655 **    NORMAL    The journal is synced once before writes begin on the
01656 **              database.  This is normally adequate protection, but
01657 **              it is theoretically possible, though very unlikely,
01658 **              that an inopertune power failure could leave the journal
01659 **              in a state which would cause damage to the database
01660 **              when it is rolled back.
01661 **
01662 **    FULL      The journal is synced twice before writes begin on the
01663 **              database (with some additional information - the nRec field
01664 **              of the journal header - being written in between the two
01665 **              syncs).  If we assume that writing a
01666 **              single disk sector is atomic, then this mode provides
01667 **              assurance that the journal will not be corrupted to the
01668 **              point of causing damage to the database during rollback.
01669 **
01670 ** Numeric values associated with these states are OFF==1, NORMAL=2,
01671 ** and FULL=3.
01672 */
01673 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
01674 void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){
01675   pPager->noSync =  level==1 || pPager->tempFile;
01676   pPager->fullSync = level==3 && !pPager->tempFile;
01677   pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
01678   if( pPager->noSync ) pPager->needSync = 0;
01679 }
01680 #endif
01681 
01682 /*
01683 ** The following global variable is incremented whenever the library
01684 ** attempts to open a temporary file.  This information is used for
01685 ** testing and analysis only.  
01686 */
01687 #ifdef SQLITE_TEST
01688 int sqlite3_opentemp_count = 0;
01689 #endif
01690 
01691 /*
01692 ** Open a temporary file. 
01693 **
01694 ** Write the file descriptor into *fd.  Return SQLITE_OK on success or some
01695 ** other error code if we fail. The OS will automatically delete the temporary
01696 ** file when it is closed.
01697 */
01698 static int sqlite3PagerOpentemp(
01699   Pager *pPager,        /* The pager object */
01700   sqlite3_file *pFile,  /* Write the file descriptor here */
01701   int vfsFlags          /* Flags passed through to the VFS */
01702 ){
01703   int rc;
01704 
01705 #ifdef SQLITE_TEST
01706   sqlite3_opentemp_count++;  /* Used for testing and analysis only */
01707 #endif
01708 
01709   vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
01710             SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
01711   rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
01712   assert( rc!=SQLITE_OK || pFile->pMethods );
01713   return rc;
01714 }
01715 
01716 static int pagerStress(void *,PgHdr *);
01717 
01718 /*
01719 ** Create a new page cache and put a pointer to the page cache in *ppPager.
01720 ** The file to be cached need not exist.  The file is not locked until
01721 ** the first call to sqlite3PagerGet() and is only held open until the
01722 ** last page is released using sqlite3PagerUnref().
01723 **
01724 ** If zFilename is NULL then a randomly-named temporary file is created
01725 ** and used as the file to be cached.  The file will be deleted
01726 ** automatically when it is closed.
01727 **
01728 ** If zFilename is ":memory:" then all information is held in cache.
01729 ** It is never written to disk.  This can be used to implement an
01730 ** in-memory database.
01731 */
01732 int sqlite3PagerOpen(
01733   sqlite3_vfs *pVfs,       /* The virtual file system to use */
01734   Pager **ppPager,         /* Return the Pager structure here */
01735   const char *zFilename,   /* Name of the database file to open */
01736   int nExtra,              /* Extra bytes append to each in-memory page */
01737   int flags,               /* flags controlling this file */
01738   int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
01739 ){
01740   u8 *pPtr;
01741   Pager *pPager = 0;
01742   int rc = SQLITE_OK;
01743   int i;
01744   int tempFile = 0;
01745   int memDb = 0;
01746   int readOnly = 0;
01747   int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
01748   int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
01749   int journalFileSize;
01750   int pcacheSize = sqlite3PcacheSize();
01751   int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;
01752   char *zPathname = 0;
01753   int nPathname = 0;
01754 
01755   if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){
01756     journalFileSize = sqlite3JournalSize(pVfs);
01757   }else{
01758     journalFileSize = sqlite3MemJournalSize();
01759   }
01760 
01761   /* The default return is a NULL pointer */
01762   *ppPager = 0;
01763 
01764   /* Compute and store the full pathname in an allocated buffer pointed
01765   ** to by zPathname, length nPathname. Or, if this is a temporary file,
01766   ** leave both nPathname and zPathname set to 0.
01767   */
01768   if( zFilename && zFilename[0] ){
01769     nPathname = pVfs->mxPathname+1;
01770     zPathname = sqlite3Malloc(nPathname*2);
01771     if( zPathname==0 ){
01772       return SQLITE_NOMEM;
01773     }
01774 #ifndef SQLITE_OMIT_MEMORYDB
01775     if( strcmp(zFilename,":memory:")==0 ){
01776       memDb = 1;
01777       zPathname[0] = 0;
01778     }else
01779 #endif
01780     {
01781       rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
01782     }
01783     if( rc!=SQLITE_OK ){
01784       sqlite3_free(zPathname);
01785       return rc;
01786     }
01787     nPathname = strlen(zPathname);
01788   }
01789 
01790   /* Allocate memory for the pager structure */
01791   pPager = sqlite3MallocZero(
01792     sizeof(*pPager) +           /* Pager structure */
01793     pcacheSize      +           /* PCache object */
01794     journalFileSize +           /* The journal file structure */ 
01795     pVfs->szOsFile  +           /* The main db file */
01796     journalFileSize * 2 +       /* The two journal files */ 
01797     3*nPathname + 40            /* zFilename, zDirectory, zJournal */
01798   );
01799   if( !pPager ){
01800     sqlite3_free(zPathname);
01801     return SQLITE_NOMEM;
01802   }
01803   pPager->pPCache = (PCache *)&pPager[1];
01804   pPtr = ((u8 *)&pPager[1]) + pcacheSize;
01805   pPager->vfsFlags = vfsFlags;
01806   pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
01807   pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile];
01808   pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile+journalFileSize];
01809   pPager->zFilename = (char*)&pPtr[pVfs->szOsFile+2*journalFileSize];
01810   pPager->zDirectory = &pPager->zFilename[nPathname+1];
01811   pPager->zJournal = &pPager->zDirectory[nPathname+1];
01812   pPager->pVfs = pVfs;
01813   if( zPathname ){
01814     memcpy(pPager->zFilename, zPathname, nPathname+1);
01815     sqlite3_free(zPathname);
01816   }
01817 
01818   /* Open the pager file.
01819   */
01820   if( zFilename && zFilename[0] && !memDb ){
01821     if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
01822       rc = SQLITE_CANTOPEN;
01823     }else{
01824       int fout = 0;
01825       rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
01826                          pPager->vfsFlags, &fout);
01827       readOnly = (fout&SQLITE_OPEN_READONLY);
01828 
01829       /* If the file was successfully opened for read/write access,
01830       ** choose a default page size in case we have to create the
01831       ** database file. The default page size is the maximum of:
01832       **
01833       **    + SQLITE_DEFAULT_PAGE_SIZE,
01834       **    + The value returned by sqlite3OsSectorSize()
01835       **    + The largest page size that can be written atomically.
01836       */
01837       if( rc==SQLITE_OK && !readOnly ){
01838         int iSectorSize = sqlite3OsSectorSize(pPager->fd);
01839         if( szPageDflt<iSectorSize ){
01840           szPageDflt = iSectorSize;
01841         }
01842 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
01843         {
01844           int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
01845           int ii;
01846           assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
01847           assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
01848           assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
01849           for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
01850             if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii;
01851           }
01852         }
01853 #endif
01854         if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
01855           szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
01856         }
01857       }
01858     }
01859   }else{
01860     /* If a temporary file is requested, it is not opened immediately.
01861     ** In this case we accept the default page size and delay actually
01862     ** opening the file until the first call to OsWrite().
01863     **
01864     ** This branch is also run for an in-memory database. An in-memory
01865     ** database is the same as a temp-file that is never written out to
01866     ** disk and uses an in-memory rollback journal.
01867     */ 
01868     tempFile = 1;
01869     pPager->state = PAGER_EXCLUSIVE;
01870   }
01871 
01872   if( pPager && rc==SQLITE_OK ){
01873     pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt);
01874   }
01875 
01876   /* If an error occured in either of the blocks above.
01877   ** Free the Pager structure and close the file.
01878   ** Since the pager is not allocated there is no need to set 
01879   ** any Pager.errMask variables.
01880   */
01881   if( !pPager || !pPager->pTmpSpace ){
01882     sqlite3OsClose(pPager->fd);
01883     sqlite3_free(pPager);
01884     return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
01885   }
01886   nExtra = FORCE_ALIGNMENT(nExtra);
01887   sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
01888                     !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
01889 
01890   PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
01891   IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
01892 
01893   /* Fill in Pager.zDirectory[] */
01894   memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
01895   for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
01896   if( i>0 ) pPager->zDirectory[i-1] = 0;
01897 
01898   /* Fill in Pager.zJournal[] */
01899   if( zPathname ){
01900     memcpy(pPager->zJournal, pPager->zFilename, nPathname);
01901     memcpy(&pPager->zJournal[nPathname], "-journal", 9);
01902   }else{
01903     pPager->zJournal = 0;
01904   }
01905 
01906   /* pPager->journalOpen = 0; */
01907   pPager->useJournal = useJournal;
01908   pPager->noReadlock = noReadlock && readOnly;
01909   /* pPager->stmtOpen = 0; */
01910   /* pPager->stmtInUse = 0; */
01911   /* pPager->nRef = 0; */
01912   pPager->dbSize = memDb-1;
01913   pPager->pageSize = szPageDflt;
01914   /* pPager->stmtSize = 0; */
01915   /* pPager->stmtJSize = 0; */
01916   /* pPager->nPage = 0; */
01917   pPager->mxPage = 100;
01918   pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
01919   /* pPager->state = PAGER_UNLOCK; */
01920   assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
01921   /* pPager->errMask = 0; */
01922   pPager->tempFile = tempFile;
01923   assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
01924           || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
01925   assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
01926   pPager->exclusiveMode = tempFile; 
01927   pPager->memDb = memDb;
01928   pPager->readOnly = readOnly;
01929   /* pPager->needSync = 0; */
01930   pPager->noSync = pPager->tempFile || !useJournal;
01931   pPager->fullSync = (pPager->noSync?0:1);
01932   pPager->sync_flags = SQLITE_SYNC_NORMAL;
01933   /* pPager->pFirst = 0; */
01934   /* pPager->pFirstSynced = 0; */
01935   /* pPager->pLast = 0; */
01936   pPager->nExtra = nExtra;
01937   pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
01938   assert(pPager->fd->pMethods||tempFile);
01939   setSectorSize(pPager);
01940   if( memDb ){
01941     pPager->journalMode = PAGER_JOURNALMODE_MEMORY;
01942   }
01943   /* pPager->pBusyHandler = 0; */
01944   /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
01945   *ppPager = pPager;
01946   return SQLITE_OK;
01947 }
01948 
01949 /*
01950 ** Set the busy handler function.
01951 */
01952 void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
01953   pPager->pBusyHandler = pBusyHandler;
01954 }
01955 
01956 /*
01957 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
01958 ** is called when the content of a page in cache is restored to its original
01959 ** value as a result of a rollback.  The callback gives higher-level code
01960 ** an opportunity to restore the EXTRA section to agree with the restored
01961 ** page data.
01962 */
01963 void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){
01964   pPager->xReiniter = xReinit;
01965 }
01966 
01967 /*
01968 ** Set the page size to *pPageSize. If the suggest new page size is
01969 ** inappropriate, then an alternative page size is set to that
01970 ** value before returning.
01971 */
01972 int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
01973   int rc = pPager->errCode;
01974   if( rc==SQLITE_OK ){
01975     u16 pageSize = *pPageSize;
01976     assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
01977     if( pageSize && pageSize!=pPager->pageSize 
01978      && (pPager->memDb==0 || pPager->dbSize==0)
01979      && sqlite3PcacheRefCount(pPager->pPCache)==0 
01980     ){
01981       char *pNew = (char *)sqlite3PageMalloc(pageSize);
01982       if( !pNew ){
01983         rc = SQLITE_NOMEM;
01984       }else{
01985         pager_reset(pPager);
01986         pPager->pageSize = pageSize;
01987         if( !pPager->memDb ) setSectorSize(pPager);
01988         sqlite3PageFree(pPager->pTmpSpace);
01989         pPager->pTmpSpace = pNew;
01990         sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
01991       }
01992     }
01993     *pPageSize = pPager->pageSize;
01994   }
01995   return rc;
01996 }
01997 
01998 /*
01999 ** Return a pointer to the "temporary page" buffer held internally
02000 ** by the pager.  This is a buffer that is big enough to hold the
02001 ** entire content of a database page.  This buffer is used internally
02002 ** during rollback and will be overwritten whenever a rollback
02003 ** occurs.  But other modules are free to use it too, as long as
02004 ** no rollbacks are happening.
02005 */
02006 void *sqlite3PagerTempSpace(Pager *pPager){
02007   return pPager->pTmpSpace;
02008 }
02009 
02010 /*
02011 ** Attempt to set the maximum database page count if mxPage is positive. 
02012 ** Make no changes if mxPage is zero or negative.  And never reduce the
02013 ** maximum page count below the current size of the database.
02014 **
02015 ** Regardless of mxPage, return the current maximum page count.
02016 */
02017 int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
02018   if( mxPage>0 ){
02019     pPager->mxPgno = mxPage;
02020   }
02021   sqlite3PagerPagecount(pPager, 0);
02022   return pPager->mxPgno;
02023 }
02024 
02025 /*
02026 ** The following set of routines are used to disable the simulated
02027 ** I/O error mechanism.  These routines are used to avoid simulated
02028 ** errors in places where we do not care about errors.
02029 **
02030 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
02031 ** and generate no code.
02032 */
02033 #ifdef SQLITE_TEST
02034 extern int sqlite3_io_error_pending;
02035 extern int sqlite3_io_error_hit;
02036 static int saved_cnt;
02037 void disable_simulated_io_errors(void){
02038   saved_cnt = sqlite3_io_error_pending;
02039   sqlite3_io_error_pending = -1;
02040 }
02041 void enable_simulated_io_errors(void){
02042   sqlite3_io_error_pending = saved_cnt;
02043 }
02044 #else
02045 # define disable_simulated_io_errors()
02046 # define enable_simulated_io_errors()
02047 #endif
02048 
02049 /*
02050 ** Read the first N bytes from the beginning of the file into memory
02051 ** that pDest points to. 
02052 **
02053 ** No error checking is done. The rational for this is that this function 
02054 ** may be called even if the file does not exist or contain a header. In 
02055 ** these cases sqlite3OsRead() will return an error, to which the correct 
02056 ** response is to zero the memory at pDest and continue.  A real IO error 
02057 ** will presumably recur and be picked up later (Todo: Think about this).
02058 */
02059 int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
02060   int rc = SQLITE_OK;
02061   memset(pDest, 0, N);
02062   assert(pPager->fd->pMethods||pPager->tempFile);
02063   if( pPager->fd->pMethods ){
02064     IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
02065     rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
02066     if( rc==SQLITE_IOERR_SHORT_READ ){
02067       rc = SQLITE_OK;
02068     }
02069   }
02070   return rc;
02071 }
02072 
02073 /*
02074 ** Return the total number of pages in the disk file associated with
02075 ** pPager. 
02076 **
02077 ** If the PENDING_BYTE lies on the page directly after the end of the
02078 ** file, then consider this page part of the file too. For example, if
02079 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
02080 ** file is 4096 bytes, 5 is returned instead of 4.
02081 */
02082 int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
02083   i64 n = 0;
02084   int rc;
02085   assert( pPager!=0 );
02086   if( pPager->errCode ){
02087     rc = pPager->errCode;
02088     return rc;
02089   }
02090   if( pPager->dbSize>=0 ){
02091     n = pPager->dbSize;
02092   } else {
02093     assert(pPager->fd->pMethods||pPager->tempFile);
02094     if( (pPager->fd->pMethods)
02095      && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
02096       pager_error(pPager, rc);
02097       return rc;
02098     }
02099     if( n>0 && n<pPager->pageSize ){
02100       n = 1;
02101     }else{
02102       n /= pPager->pageSize;
02103     }
02104     if( pPager->state!=PAGER_UNLOCK ){
02105       pPager->dbSize = n;
02106     }
02107   }
02108   if( n==(PENDING_BYTE/pPager->pageSize) ){
02109     n++;
02110   }
02111   if( n>pPager->mxPgno ){
02112     pPager->mxPgno = n;
02113   }
02114   if( pnPage ){
02115     *pnPage = n;
02116   }
02117   return SQLITE_OK;
02118 }
02119 
02120 /*
02121 ** Forward declaration
02122 */
02123 static int syncJournal(Pager*);
02124 
02125 /*
02126 ** This routine is used to truncate the cache when a database
02127 ** is truncated.  Drop from the cache all pages whose pgno is
02128 ** larger than pPager->dbSize and is unreferenced.
02129 **
02130 ** Referenced pages larger than pPager->dbSize are zeroed.
02131 **
02132 ** Actually, at the point this routine is called, it would be
02133 ** an error to have a referenced page.  But rather than delete
02134 ** that page and guarantee a subsequent segfault, it seems better
02135 ** to zero it and hope that we error out sanely.
02136 */
02137 static void pager_truncate_cache(Pager *pPager){
02138   sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
02139 }
02140 
02141 /*
02142 ** Try to obtain a lock on a file.  Invoke the busy callback if the lock
02143 ** is currently not available.  Repeat until the busy callback returns
02144 ** false or until the lock succeeds.
02145 **
02146 ** Return SQLITE_OK on success and an error code if we cannot obtain
02147 ** the lock.
02148 */
02149 static int pager_wait_on_lock(Pager *pPager, int locktype){
02150   int rc;
02151 
02152   /* The OS lock values must be the same as the Pager lock values */
02153   assert( PAGER_SHARED==SHARED_LOCK );
02154   assert( PAGER_RESERVED==RESERVED_LOCK );
02155   assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
02156 
02157   /* If the file is currently unlocked then the size must be unknown */
02158   assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 );
02159 
02160   if( pPager->state>=locktype ){
02161     rc = SQLITE_OK;
02162   }else{
02163     if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0;
02164     do {
02165       rc = sqlite3OsLock(pPager->fd, locktype);
02166     }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
02167     if( rc==SQLITE_OK ){
02168       pPager->state = locktype;
02169       IOTRACE(("LOCK %p %d\n", pPager, locktype))
02170     }
02171   }
02172   return rc;
02173 }
02174 
02175 /*
02176 ** Truncate the file to the number of pages specified.
02177 */
02178 int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
02179   int rc = SQLITE_OK;
02180   assert( pPager->state>=PAGER_SHARED );
02181 
02182   sqlite3PagerPagecount(pPager, 0);
02183   if( pPager->errCode ){
02184     rc = pPager->errCode;
02185   }else if( nPage<(unsigned)pPager->dbSize ){
02186     rc = syncJournal(pPager);
02187     if( rc==SQLITE_OK ){
02188       /* Get an exclusive lock on the database before truncating. */
02189       rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
02190     }
02191     if( rc==SQLITE_OK ){
02192       rc = pager_truncate(pPager, nPage);
02193     }
02194   }
02195 
02196   return rc;
02197 }
02198 
02199 /*
02200 ** Shutdown the page cache.  Free all memory and close all files.
02201 **
02202 ** If a transaction was in progress when this routine is called, that
02203 ** transaction is rolled back.  All outstanding pages are invalidated
02204 ** and their memory is freed.  Any attempt to use a page associated
02205 ** with this page cache after this function returns will likely
02206 ** result in a coredump.
02207 **
02208 ** This function always succeeds. If a transaction is active an attempt
02209 ** is made to roll it back. If an error occurs during the rollback 
02210 ** a hot journal may be left in the filesystem but no error is returned
02211 ** to the caller.
02212 */
02213 int sqlite3PagerClose(Pager *pPager){
02214 
02215   disable_simulated_io_errors();
02216   sqlite3BeginBenignMalloc();
02217   pPager->errCode = 0;
02218   pPager->exclusiveMode = 0;
02219   pager_reset(pPager);
02220   if( !MEMDB ){
02221     pagerUnlockAndRollback(pPager);
02222   }
02223   enable_simulated_io_errors();
02224   sqlite3EndBenignMalloc();
02225   PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
02226   IOTRACE(("CLOSE %p\n", pPager))
02227   if( pPager->journalOpen ){
02228     sqlite3OsClose(pPager->jfd);
02229   }
02230   sqlite3BitvecDestroy(pPager->pInJournal);
02231   sqlite3BitvecDestroy(pPager->pAlwaysRollback);
02232   if( pPager->stmtOpen ){
02233     sqlite3OsClose(pPager->stfd);
02234   }
02235   sqlite3OsClose(pPager->fd);
02236   /* Temp files are automatically deleted by the OS
02237   ** if( pPager->tempFile ){
02238   **   sqlite3OsDelete(pPager->zFilename);
02239   ** }
02240   */
02241 
02242   sqlite3PageFree(pPager->pTmpSpace);
02243   sqlite3PcacheClose(pPager->pPCache);
02244   sqlite3_free(pPager);
02245   return SQLITE_OK;
02246 }
02247 
02248 #if !defined(NDEBUG) || defined(SQLITE_TEST)
02249 /*
02250 ** Return the page number for the given page data.
02251 */
02252 Pgno sqlite3PagerPagenumber(DbPage *p){
02253   return p->pgno;
02254 }
02255 #endif
02256 
02257 /*
02258 ** Increment the reference count for a page.  The input pointer is
02259 ** a reference to the page data.
02260 */
02261 int sqlite3PagerRef(DbPage *pPg){
02262   sqlite3PcacheRef(pPg);
02263   return SQLITE_OK;
02264 }
02265 
02266 /*
02267 ** Sync the journal.  In other words, make sure all the pages that have
02268 ** been written to the journal have actually reached the surface of the
02269 ** disk.  It is not safe to modify the original database file until after
02270 ** the journal has been synced.  If the original database is modified before
02271 ** the journal is synced and a power failure occurs, the unsynced journal
02272 ** data would be lost and we would be unable to completely rollback the
02273 ** database changes.  Database corruption would occur.
02274 ** 
02275 ** This routine also updates the nRec field in the header of the journal.
02276 ** (See comments on the pager_playback() routine for additional information.)
02277 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
02278 ** is synced, then the nRec field is updated, then a second sync occurs.
02279 **
02280 ** For temporary databases, we do not care if we are able to rollback
02281 ** after a power failure, so no sync occurs.
02282 **
02283 ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
02284 ** the database is stored, then OsSync() is never called on the journal
02285 ** file. In this case all that is required is to update the nRec field in
02286 ** the journal header.
02287 **
02288 ** This routine clears the needSync field of every page current held in
02289 ** memory.
02290 */
02291 static int syncJournal(Pager *pPager){
02292   int rc = SQLITE_OK;
02293 
02294   /* Sync the journal before modifying the main database
02295   ** (assuming there is a journal and it needs to be synced.)
02296   */
02297   if( pPager->needSync ){
02298     assert( !pPager->tempFile );
02299     if( pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
02300       int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
02301       assert( pPager->journalOpen );
02302 
02303       if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
02304         /* Write the nRec value into the journal file header. If in
02305         ** full-synchronous mode, sync the journal first. This ensures that
02306         ** all data has really hit the disk before nRec is updated to mark
02307         ** it as a candidate for rollback.
02308         **
02309         ** This is not required if the persistent media supports the
02310         ** SAFE_APPEND property. Because in this case it is not possible 
02311         ** for garbage data to be appended to the file, the nRec field
02312         ** is populated with 0xFFFFFFFF when the journal header is written
02313         ** and never needs to be updated.
02314         */
02315         i64 jrnlOff;
02316         if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
02317           PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
02318           IOTRACE(("JSYNC %p\n", pPager))
02319           rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
02320           if( rc!=0 ) return rc;
02321         }
02322 
02323         jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
02324         IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
02325         rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
02326         if( rc ) return rc;
02327       }
02328       if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
02329         PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
02330         IOTRACE(("JSYNC %p\n", pPager))
02331         rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
02332           (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
02333         );
02334         if( rc!=0 ) return rc;
02335       }
02336       pPager->journalStarted = 1;
02337     }
02338     pPager->needSync = 0;
02339 
02340     /* Erase the needSync flag from every page.
02341     */
02342     sqlite3PcacheClearFlags(pPager->pPCache, PGHDR_NEED_SYNC);
02343   }
02344 
02345 #ifndef NDEBUG
02346   /* If the Pager.needSync flag is clear then the PgHdr.needSync
02347   ** flag must also be clear for all pages.  Verify that this
02348   ** invariant is true.
02349   */
02350   else{
02351     sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_NEED_SYNC);
02352   }
02353 #endif
02354 
02355   return rc;
02356 }
02357 
02358 /*
02359 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
02360 ** every one of those pages out to the database file. No calls are made
02361 ** to the page-cache to mark the pages as clean. It is the responsibility
02362 ** of the caller to use PcacheCleanAll() or PcacheMakeClean() to mark
02363 ** the pages as clean.
02364 */
02365 static int pager_write_pagelist(PgHdr *pList){
02366   Pager *pPager;
02367   int rc;
02368 
02369   if( pList==0 ) return SQLITE_OK;
02370   pPager = pList->pPager;
02371 
02372   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
02373   ** database file. If there is already an EXCLUSIVE lock, the following
02374   ** calls to sqlite3OsLock() are no-ops.
02375   **
02376   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
02377   ** through an intermediate state PENDING.   A PENDING lock prevents new
02378   ** readers from attaching to the database but is unsufficient for us to
02379   ** write.  The idea of a PENDING lock is to prevent new readers from
02380   ** coming in while we wait for existing readers to clear.
02381   **
02382   ** While the pager is in the RESERVED state, the original database file
02383   ** is unchanged and we can rollback without having to playback the
02384   ** journal into the original database file.  Once we transition to
02385   ** EXCLUSIVE, it means the database file has been changed and any rollback
02386   ** will require a journal playback.
02387   */
02388   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
02389   if( rc!=SQLITE_OK ){
02390     return rc;
02391   }
02392 
02393   while( pList ){
02394 
02395     /* If the file has not yet been opened, open it now. */
02396     if( !pPager->fd->pMethods ){
02397       assert(pPager->tempFile);
02398       rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
02399       if( rc ) return rc;
02400     }
02401 
02402     /* If there are dirty pages in the page cache with page numbers greater
02403     ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
02404     ** make the file smaller (presumably by auto-vacuum code). Do not write
02405     ** any such pages to the file.
02406     */
02407     if( pList->pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
02408       i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
02409       char *pData = CODEC2(pPager, pList->pData, pList->pgno, 6);
02410       PAGERTRACE4("STORE %d page %d hash(%08x)\n",
02411                    PAGERID(pPager), pList->pgno, pager_pagehash(pList));
02412       IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
02413       rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
02414       PAGER_INCR(sqlite3_pager_writedb_count);
02415       PAGER_INCR(pPager->nWrite);
02416       if( pList->pgno==1 ){
02417         memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
02418       }
02419     }
02420 #ifndef NDEBUG
02421     else{
02422       PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
02423     }
02424 #endif
02425     if( rc ) return rc;
02426 #ifdef SQLITE_CHECK_PAGES
02427     pList->pageHash = pager_pagehash(pList);
02428 #endif
02429     pList = pList->pDirty;
02430   }
02431 
02432   return SQLITE_OK;
02433 }
02434 
02435 /*
02436 ** This function is called by the pcache layer when it has reached some
02437 ** soft memory limit. The argument is a pointer to a purgeable Pager 
02438 ** object. This function attempts to make a single dirty page that has no
02439 ** outstanding references (if one exists) clean so that it can be recycled 
02440 ** by the pcache layer.
02441 */
02442 static int pagerStress(void *p, PgHdr *pPg){
02443   Pager *pPager = (Pager *)p;
02444   int rc = SQLITE_OK;
02445 
02446   if( pPager->doNotSync ){
02447     return SQLITE_OK;
02448   }
02449 
02450   assert( pPg->flags&PGHDR_DIRTY );
02451   if( pPager->errCode==SQLITE_OK ){
02452     if( pPg->flags&PGHDR_NEED_SYNC ){
02453       rc = syncJournal(pPager);
02454       if( rc==SQLITE_OK && pPager->fullSync && 
02455         !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
02456         !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
02457       ){
02458         pPager->nRec = 0;
02459         rc = writeJournalHdr(pPager);
02460       }
02461     }
02462     if( rc==SQLITE_OK ){
02463       pPg->pDirty = 0;
02464       rc = pager_write_pagelist(pPg);
02465     }
02466     if( rc!=SQLITE_OK ){
02467       pager_error(pPager, rc);
02468     }
02469   }
02470 
02471   if( rc==SQLITE_OK ){
02472     sqlite3PcacheMakeClean(pPg);
02473   }
02474   return rc;
02475 }
02476 
02477 
02478 /*
02479 ** Return 1 if there is a hot journal on the given pager.
02480 ** A hot journal is one that needs to be played back.
02481 **
02482 ** If the current size of the database file is 0 but a journal file
02483 ** exists, that is probably an old journal left over from a prior
02484 ** database with the same name.  Just delete the journal.
02485 **
02486 ** Return negative if unable to determine the status of the journal.
02487 **
02488 ** This routine does not open the journal file to examine its
02489 ** content.  Hence, the journal might contain the name of a master
02490 ** journal file that has been deleted, and hence not be hot.  Or
02491 ** the header of the journal might be zeroed out.  This routine
02492 ** does not discover these cases of a non-hot journal - if the
02493 ** journal file exists and is not empty this routine assumes it
02494 ** is hot.  The pager_playback() routine will discover that the
02495 ** journal file is not really hot and will no-op.
02496 */
02497 static int hasHotJournal(Pager *pPager, int *pExists){
02498   sqlite3_vfs *pVfs = pPager->pVfs;
02499   int rc = SQLITE_OK;
02500   int exists;
02501   int locked;
02502   assert( pPager!=0 );
02503   assert( pPager->useJournal );
02504   assert( pPager->fd->pMethods );
02505   *pExists = 0;
02506   rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
02507   if( rc==SQLITE_OK && exists ){
02508     rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
02509   }
02510   if( rc==SQLITE_OK && exists && !locked ){
02511     int nPage;
02512     rc = sqlite3PagerPagecount(pPager, &nPage);
02513     if( rc==SQLITE_OK ){
02514      if( nPage==0 ){
02515         sqlite3OsDelete(pVfs, pPager->zJournal, 0);
02516       }else{
02517         *pExists = 1;
02518       }
02519     }
02520   }
02521   return rc;
02522 }
02523 
02524 /*
02525 ** Read the content of page pPg out of the database file.
02526 */
02527 static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
02528   int rc;
02529   i64 offset;
02530   assert( MEMDB==0 );
02531   assert(pPager->fd->pMethods||pPager->tempFile);
02532   if( !pPager->fd->pMethods ){
02533     return SQLITE_IOERR_SHORT_READ;
02534   }
02535   offset = (pgno-1)*(i64)pPager->pageSize;
02536   rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, offset);
02537   PAGER_INCR(sqlite3_pager_readdb_count);
02538   PAGER_INCR(pPager->nRead);
02539   IOTRACE(("PGIN %p %d\n", pPager, pgno));
02540   if( pgno==1 ){
02541     memcpy(&pPager->dbFileVers, &((u8*)pPg->pData)[24],
02542                                               sizeof(pPager->dbFileVers));
02543   }
02544   CODEC1(pPager, pPg->pData, pPg->pgno, 3);
02545   PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
02546                PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
02547   return rc;
02548 }
02549 
02550 
02551 /*
02552 ** This function is called to obtain the shared lock required before
02553 ** data may be read from the pager cache. If the shared lock has already
02554 ** been obtained, this function is a no-op.
02555 **
02556 ** Immediately after obtaining the shared lock (if required), this function
02557 ** checks for a hot-journal file. If one is found, an emergency rollback
02558 ** is performed immediately.
02559 */
02560 static int pagerSharedLock(Pager *pPager){
02561   int rc = SQLITE_OK;
02562   int isErrorReset = 0;
02563 
02564   /* If this database is opened for exclusive access, has no outstanding 
02565   ** page references and is in an error-state, now is the chance to clear
02566   ** the error. Discard the contents of the pager-cache and treat any
02567   ** open journal file as a hot-journal.
02568   */
02569   if( !MEMDB && pPager->exclusiveMode 
02570    && sqlite3PcacheRefCount(pPager->pPCache)==0 && pPager->errCode 
02571   ){
02572     if( pPager->journalOpen ){
02573       isErrorReset = 1;
02574     }
02575     pPager->errCode = SQLITE_OK;
02576     pager_reset(pPager);
02577   }
02578 
02579   /* If the pager is still in an error state, do not proceed. The error 
02580   ** state will be cleared at some point in the future when all page 
02581   ** references are dropped and the cache can be discarded.
02582   */
02583   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
02584     return pPager->errCode;
02585   }
02586 
02587   if( pPager->state==PAGER_UNLOCK || isErrorReset ){
02588     sqlite3_vfs *pVfs = pPager->pVfs;
02589     int isHotJournal;
02590     assert( !MEMDB );
02591     assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
02592     if( !pPager->noReadlock ){
02593       rc = pager_wait_on_lock(pPager, SHARED_LOCK);
02594       if( rc!=SQLITE_OK ){
02595         assert( pPager->state==PAGER_UNLOCK );
02596         return pager_error(pPager, rc);
02597       }
02598       assert( pPager->state>=SHARED_LOCK );
02599     }
02600 
02601     /* If a journal file exists, and there is no RESERVED lock on the
02602     ** database file, then it either needs to be played back or deleted.
02603     */
02604     if( !isErrorReset ){
02605       rc = hasHotJournal(pPager, &isHotJournal);
02606       if( rc!=SQLITE_OK ){
02607         goto failed;
02608       }
02609     }
02610     if( isErrorReset || isHotJournal ){
02611       /* Get an EXCLUSIVE lock on the database file. At this point it is
02612       ** important that a RESERVED lock is not obtained on the way to the
02613       ** EXCLUSIVE lock. If it were, another process might open the
02614       ** database file, detect the RESERVED lock, and conclude that the
02615       ** database is safe to read while this process is still rolling it 
02616       ** back.
02617       ** 
02618       ** Because the intermediate RESERVED lock is not requested, the
02619       ** second process will get to this point in the code and fail to
02620       ** obtain its own EXCLUSIVE lock on the database file.
02621       */
02622       if( pPager->state<EXCLUSIVE_LOCK ){
02623         rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
02624         if( rc!=SQLITE_OK ){
02625           rc = pager_error(pPager, rc);
02626           goto failed;
02627         }
02628         pPager->state = PAGER_EXCLUSIVE;
02629       }
02630  
02631       /* Open the journal for read/write access. This is because in 
02632       ** exclusive-access mode the file descriptor will be kept open and
02633       ** possibly used for a transaction later on. On some systems, the
02634       ** OsTruncate() call used in exclusive-access mode also requires
02635       ** a read/write file handle.
02636       */
02637       if( !isErrorReset && pPager->journalOpen==0 ){
02638         int res;
02639         rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
02640         if( rc==SQLITE_OK ){
02641           if( res ){
02642             int fout = 0;
02643             int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
02644             assert( !pPager->tempFile );
02645             rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
02646             assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
02647             if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){
02648               rc = SQLITE_CANTOPEN;
02649               sqlite3OsClose(pPager->jfd);
02650             }
02651           }else{
02652             /* If the journal does not exist, that means some other process
02653             ** has already rolled it back */
02654             rc = SQLITE_BUSY;
02655           }
02656         }
02657       }
02658       if( rc!=SQLITE_OK ){
02659         goto failed;
02660       }
02661       pPager->journalOpen = 1;
02662       pPager->journalStarted = 0;
02663       pPager->journalOff = 0;
02664       pPager->setMaster = 0;
02665       pPager->journalHdr = 0;
02666  
02667       /* Playback and delete the journal.  Drop the database write
02668       ** lock and reacquire the read lock.
02669       */
02670       rc = pager_playback(pPager, 1);
02671       if( rc!=SQLITE_OK ){
02672         rc = pager_error(pPager, rc);
02673         goto failed;
02674       }
02675       assert(pPager->state==PAGER_SHARED || 
02676           (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
02677       );
02678     }
02679 
02680     if( sqlite3PcachePagecount(pPager->pPCache)>0 ){
02681       /* The shared-lock has just been acquired on the database file
02682       ** and there are already pages in the cache (from a previous
02683       ** read or write transaction).  Check to see if the database
02684       ** has been modified.  If the database has changed, flush the
02685       ** cache.
02686       **
02687       ** Database changes is detected by looking at 15 bytes beginning
02688       ** at offset 24 into the file.  The first 4 of these 16 bytes are
02689       ** a 32-bit counter that is incremented with each change.  The
02690       ** other bytes change randomly with each file change when
02691       ** a codec is in use.
02692       ** 
02693       ** There is a vanishingly small chance that a change will not be 
02694       ** detected.  The chance of an undetected change is so small that
02695       ** it can be neglected.
02696       */
02697       char dbFileVers[sizeof(pPager->dbFileVers)];
02698       sqlite3PagerPagecount(pPager, 0);
02699 
02700       if( pPager->errCode ){
02701         rc = pPager->errCode;
02702         goto failed;
02703       }
02704 
02705       if( pPager->dbSize>0 ){
02706         IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
02707         rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
02708         if( rc!=SQLITE_OK ){
02709           goto failed;
02710         }
02711       }else{
02712         memset(dbFileVers, 0, sizeof(dbFileVers));
02713       }
02714 
02715       if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
02716         pager_reset(pPager);
02717       }
02718     }
02719     assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
02720     if( pPager->state==PAGER_UNLOCK ){
02721       pPager->state = PAGER_SHARED;
02722     }
02723   }
02724 
02725  failed:
02726   if( rc!=SQLITE_OK ){
02727     /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
02728     pager_unlock(pPager);
02729   }
02730   return rc;
02731 }
02732 
02733 /*
02734 ** Make sure we have the content for a page.  If the page was
02735 ** previously acquired with noContent==1, then the content was
02736 ** just initialized to zeros instead of being read from disk.
02737 ** But now we need the real data off of disk.  So make sure we
02738 ** have it.  Read it in if we do not have it already.
02739 */
02740 static int pager_get_content(PgHdr *pPg){
02741   if( pPg->flags&PGHDR_NEED_READ ){
02742     int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
02743     if( rc==SQLITE_OK ){
02744       pPg->flags &= ~PGHDR_NEED_READ;
02745     }else{
02746       return rc;
02747     }
02748   }
02749   return SQLITE_OK;
02750 }
02751 
02752 /*
02753 ** If the reference count has reached zero, and the pager is not in the
02754 ** middle of a write transaction or opened in exclusive mode, unlock it.
02755 */ 
02756 static void pagerUnlockIfUnused(Pager *pPager){
02757   if( (sqlite3PcacheRefCount(pPager->pPCache)==0)
02758     && (!pPager->exclusiveMode || pPager->journalOff>0) 
02759   ){
02760     pagerUnlockAndRollback(pPager);
02761   }
02762 }
02763 
02764 /*
02765 ** Drop a page from the cache using sqlite3PcacheDrop().
02766 **
02767 ** If this means there are now no pages with references to them, a rollback
02768 ** occurs and the lock on the database is removed.
02769 */
02770 static void pagerDropPage(DbPage *pPg){
02771   Pager *pPager = pPg->pPager;
02772   sqlite3PcacheDrop(pPg);
02773   pagerUnlockIfUnused(pPager);
02774 }
02775 
02776 /*
02777 ** Acquire a page.
02778 **
02779 ** A read lock on the disk file is obtained when the first page is acquired. 
02780 ** This read lock is dropped when the last page is released.
02781 **
02782 ** This routine works for any page number greater than 0.  If the database
02783 ** file is smaller than the requested page, then no actual disk
02784 ** read occurs and the memory image of the page is initialized to
02785 ** all zeros.  The extra data appended to a page is always initialized
02786 ** to zeros the first time a page is loaded into memory.
02787 **
02788 ** The acquisition might fail for several reasons.  In all cases,
02789 ** an appropriate error code is returned and *ppPage is set to NULL.
02790 **
02791 ** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
02792 ** to find a page in the in-memory cache first.  If the page is not already
02793 ** in memory, this routine goes to disk to read it in whereas Lookup()
02794 ** just returns 0.  This routine acquires a read-lock the first time it
02795 ** has to go to disk, and could also playback an old journal if necessary.
02796 ** Since Lookup() never goes to disk, it never has to deal with locks
02797 ** or journal files.
02798 **
02799 ** If noContent is false, the page contents are actually read from disk.
02800 ** If noContent is true, it means that we do not care about the contents
02801 ** of the page at this time, so do not do a disk read.  Just fill in the
02802 ** page content with zeros.  But mark the fact that we have not read the
02803 ** content by setting the PgHdr.needRead flag.  Later on, if 
02804 ** sqlite3PagerWrite() is called on this page or if this routine is
02805 ** called again with noContent==0, that means that the content is needed
02806 ** and the disk read should occur at that point.
02807 */
02808 int sqlite3PagerAcquire(
02809   Pager *pPager,      /* The pager open on the database file */
02810   Pgno pgno,          /* Page number to fetch */
02811   DbPage **ppPage,    /* Write a pointer to the page here */
02812   int noContent       /* Do not bother reading content from disk if true */
02813 ){
02814   PgHdr *pPg = 0;
02815   int rc;
02816 
02817   assert( pPager->state==PAGER_UNLOCK 
02818        || sqlite3PcacheRefCount(pPager->pPCache)>0 
02819        || pgno==1
02820   );
02821 
02822   /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
02823   ** number greater than this, or zero, is requested.
02824   */
02825   if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
02826     return SQLITE_CORRUPT_BKPT;
02827   }
02828 
02829   /* Make sure we have not hit any critical errors.
02830   */ 
02831   assert( pPager!=0 );
02832   *ppPage = 0;
02833 
02834   /* If this is the first page accessed, then get a SHARED lock
02835   ** on the database file. pagerSharedLock() is a no-op if 
02836   ** a database lock is already held.
02837   */
02838   rc = pagerSharedLock(pPager);
02839   if( rc!=SQLITE_OK ){
02840     return rc;
02841   }
02842   assert( pPager->state!=PAGER_UNLOCK );
02843 
02844   rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, &pPg);
02845   if( rc!=SQLITE_OK ){
02846     return rc;
02847   }
02848   if( pPg->pPager==0 ){
02849     /* The pager cache has created a new page. Its content needs to 
02850     ** be initialized.
02851     */
02852     int nMax;
02853     PAGER_INCR(pPager->nMiss);
02854     pPg->pPager = pPager;
02855     if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
02856       pPg->flags |= PGHDR_IN_JOURNAL;
02857     }
02858     memset(pPg->pExtra, 0, pPager->nExtra);
02859 
02860     rc = sqlite3PagerPagecount(pPager, &nMax);
02861     if( rc!=SQLITE_OK ){
02862       sqlite3PagerUnref(pPg);
02863       return rc;
02864     }
02865 
02866     if( nMax<(int)pgno || MEMDB || noContent ){
02867       if( pgno>pPager->mxPgno ){
02868         sqlite3PagerUnref(pPg);
02869         return SQLITE_FULL;
02870       }
02871       memset(pPg->pData, 0, pPager->pageSize);
02872       if( noContent ){
02873         pPg->flags |= PGHDR_NEED_READ;
02874       }
02875       IOTRACE(("ZERO %p %d\n", pPager, pgno));
02876     }else{
02877       rc = readDbPage(pPager, pPg, pgno);
02878       if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
02879         /* sqlite3PagerUnref(pPg); */
02880         pagerDropPage(pPg);
02881         return rc;
02882       }
02883     }
02884 #ifdef SQLITE_CHECK_PAGES
02885     pPg->pageHash = pager_pagehash(pPg);
02886 #endif
02887   }else{
02888     /* The requested page is in the page cache. */
02889     assert(sqlite3PcacheRefCount(pPager->pPCache)>0 || pgno==1);
02890     PAGER_INCR(pPager->nHit);
02891     if( !noContent ){
02892       rc = pager_get_content(pPg);
02893       if( rc ){
02894         sqlite3PagerUnref(pPg);
02895         return rc;
02896       }
02897     }
02898   }
02899 
02900   *ppPage = pPg;
02901   return SQLITE_OK;
02902 }
02903 
02904 /*
02905 ** Acquire a page if it is already in the in-memory cache.  Do
02906 ** not read the page from disk.  Return a pointer to the page,
02907 ** or 0 if the page is not in cache.
02908 **
02909 ** See also sqlite3PagerGet().  The difference between this routine
02910 ** and sqlite3PagerGet() is that _get() will go to the disk and read
02911 ** in the page if the page is not already in cache.  This routine
02912 ** returns NULL if the page is not in cache or if a disk I/O error 
02913 ** has ever happened.
02914 */
02915 DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
02916   PgHdr *pPg = 0;
02917   assert( pPager!=0 );
02918   assert( pgno!=0 );
02919 
02920   if( (pPager->state!=PAGER_UNLOCK)
02921    && (pPager->errCode==SQLITE_OK || pPager->errCode==SQLITE_FULL)
02922   ){
02923     sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
02924   }
02925 
02926   return pPg;
02927 }
02928 
02929 /*
02930 ** Release a page.
02931 **
02932 ** If the number of references to the page drop to zero, then the
02933 ** page is added to the LRU list.  When all references to all pages
02934 ** are released, a rollback occurs and the lock on the database is
02935 ** removed.
02936 */
02937 int sqlite3PagerUnref(DbPage *pPg){
02938   if( pPg ){
02939     Pager *pPager = pPg->pPager;
02940     sqlite3PcacheRelease(pPg);
02941     pagerUnlockIfUnused(pPager);
02942   }
02943   return SQLITE_OK;
02944 }
02945 
02946 /*
02947 ** Create a journal file for pPager.  There should already be a RESERVED
02948 ** or EXCLUSIVE lock on the database file when this routine is called.
02949 **
02950 ** Return SQLITE_OK if everything.  Return an error code and release the
02951 ** write lock if anything goes wrong.
02952 */
02953 static int pager_open_journal(Pager *pPager){
02954   sqlite3_vfs *pVfs = pPager->pVfs;
02955   int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
02956 
02957   int rc;
02958   assert( pPager->state>=PAGER_RESERVED );
02959   assert( pPager->useJournal );
02960   assert( pPager->pInJournal==0 );
02961   sqlite3PagerPagecount(pPager, 0);
02962   pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
02963   if( pPager->pInJournal==0 ){
02964     rc = SQLITE_NOMEM;
02965     goto failed_to_open_journal;
02966   }
02967 
02968   if( pPager->journalOpen==0 ){
02969     if( pPager->tempFile ){
02970       flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
02971     }else{
02972       flags |= (SQLITE_OPEN_MAIN_JOURNAL);
02973     }
02974     if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
02975       sqlite3MemJournalOpen(pPager->jfd);
02976       rc = SQLITE_OK;
02977     }else{
02978 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
02979       rc = sqlite3JournalOpen(
02980           pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
02981       );
02982 #else
02983       rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
02984 #endif
02985     }
02986     assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
02987     pPager->journalOff = 0;
02988     pPager->setMaster = 0;
02989     pPager->journalHdr = 0;
02990     if( rc!=SQLITE_OK ){
02991       if( rc==SQLITE_NOMEM ){
02992         sqlite3OsDelete(pVfs, pPager->zJournal, 0);
02993       }
02994       goto failed_to_open_journal;
02995     }
02996   }
02997   pPager->journalOpen = 1;
02998   pPager->journalStarted = 0;
02999   pPager->needSync = 0;
03000   pPager->nRec = 0;
03001   if( pPager->errCode ){
03002     rc = pPager->errCode;
03003     goto failed_to_open_journal;
03004   }
03005   pPager->origDbSize = pPager->dbSize;
03006 
03007   rc = writeJournalHdr(pPager);
03008 
03009   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
03010     rc = sqlite3PagerStmtBegin(pPager);
03011   }
03012   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
03013     rc = pager_end_transaction(pPager, 0);
03014     if( rc==SQLITE_OK ){
03015       rc = SQLITE_FULL;
03016     }
03017   }
03018   return rc;
03019 
03020 failed_to_open_journal:
03021   sqlite3BitvecDestroy(pPager->pInJournal);
03022   pPager->pInJournal = 0;
03023   return rc;
03024 }
03025 
03026 /*
03027 ** Acquire a write-lock on the database.  The lock is removed when
03028 ** the any of the following happen:
03029 **
03030 **   *  sqlite3PagerCommitPhaseTwo() is called.
03031 **   *  sqlite3PagerRollback() is called.
03032 **   *  sqlite3PagerClose() is called.
03033 **   *  sqlite3PagerUnref() is called to on every outstanding page.
03034 **
03035 ** The first parameter to this routine is a pointer to any open page of the
03036 ** database file.  Nothing changes about the page - it is used merely to
03037 ** acquire a pointer to the Pager structure and as proof that there is
03038 ** already a read-lock on the database.
03039 **
03040 ** The second parameter indicates how much space in bytes to reserve for a
03041 ** master journal file-name at the start of the journal when it is created.
03042 **
03043 ** A journal file is opened if this is not a temporary file.  For temporary
03044 ** files, the opening of the journal file is deferred until there is an
03045 ** actual need to write to the journal.
03046 **
03047 ** If the database is already reserved for writing, this routine is a no-op.
03048 **
03049 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
03050 ** immediately instead of waiting until we try to flush the cache.  The
03051 ** exFlag is ignored if a transaction is already active.
03052 */
03053 int sqlite3PagerBegin(DbPage *pPg, int exFlag){
03054   Pager *pPager = pPg->pPager;
03055   int rc = SQLITE_OK;
03056   assert( pPg->nRef>0 );
03057   assert( pPager->state!=PAGER_UNLOCK );
03058   if( pPager->state==PAGER_SHARED ){
03059     assert( pPager->pInJournal==0 );
03060     assert( !MEMDB );
03061     sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
03062     rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
03063     if( rc==SQLITE_OK ){
03064       pPager->state = PAGER_RESERVED;
03065       if( exFlag ){
03066         rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
03067       }
03068     }
03069     if( rc!=SQLITE_OK ){
03070       return rc;
03071     }
03072     pPager->dirtyCache = 0;
03073     PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
03074     if( pPager->useJournal && !pPager->tempFile
03075            && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
03076       rc = pager_open_journal(pPager);
03077     }
03078   }else if( pPager->journalOpen && pPager->journalOff==0 ){
03079     /* This happens when the pager was in exclusive-access mode the last
03080     ** time a (read or write) transaction was successfully concluded
03081     ** by this connection. Instead of deleting the journal file it was 
03082     ** kept open and either was truncated to 0 bytes or its header was
03083     ** overwritten with zeros.
03084     */
03085     assert( pPager->nRec==0 );
03086     assert( pPager->origDbSize==0 );
03087     assert( pPager->pInJournal==0 );
03088     sqlite3PagerPagecount(pPager, 0);
03089     pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
03090     if( !pPager->pInJournal ){
03091       rc = SQLITE_NOMEM;
03092     }else{
03093       pPager->origDbSize = pPager->dbSize;
03094       rc = writeJournalHdr(pPager);
03095     }
03096   }
03097   assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
03098   return rc;
03099 }
03100 
03101 /*
03102 ** Make a page dirty.  Set its dirty flag and add it to the dirty
03103 ** page list.
03104 */
03105 static void makeDirty(PgHdr *pPg){
03106   sqlite3PcacheMakeDirty(pPg);
03107 }
03108 
03109 /*
03110 ** Make a page clean.  Clear its dirty bit and remove it from the
03111 ** dirty page list.
03112 */
03113 static void makeClean(PgHdr *pPg){
03114   sqlite3PcacheMakeClean(pPg);
03115 }
03116 
03117 
03118 /*
03119 ** Mark a data page as writeable.  The page is written into the journal 
03120 ** if it is not there already.  This routine must be called before making
03121 ** changes to a page.
03122 **
03123 ** The first time this routine is called, the pager creates a new
03124 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
03125 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
03126 ** calling routine must check for that return value and be careful not to
03127 ** change any page data until this routine returns SQLITE_OK.
03128 **
03129 ** If the journal file could not be written because the disk is full,
03130 ** then this routine returns SQLITE_FULL and does an immediate rollback.
03131 ** All subsequent write attempts also return SQLITE_FULL until there
03132 ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
03133 ** reset.
03134 */
03135 static int pager_write(PgHdr *pPg){
03136   void *pData = pPg->pData;
03137   Pager *pPager = pPg->pPager;
03138   int rc = SQLITE_OK;
03139 
03140   /* Check for errors
03141   */
03142   if( pPager->errCode ){ 
03143     return pPager->errCode;
03144   }
03145   if( pPager->readOnly ){
03146     return SQLITE_PERM;
03147   }
03148 
03149   assert( !pPager->setMaster );
03150 
03151   CHECK_PAGE(pPg);
03152 
03153   /* If this page was previously acquired with noContent==1, that means
03154   ** we didn't really read in the content of the page.  This can happen
03155   ** (for example) when the page is being moved to the freelist.  But
03156   ** now we are (perhaps) moving the page off of the freelist for
03157   ** reuse and we need to know its original content so that content
03158   ** can be stored in the rollback journal.  So do the read at this
03159   ** time.
03160   */
03161   rc = pager_get_content(pPg);
03162   if( rc ){
03163     return rc;
03164   }
03165 
03166   /* Mark the page as dirty.  If the page has already been written
03167   ** to the journal then we can return right away.
03168   */
03169   makeDirty(pPg);
03170   if( (pPg->flags&PGHDR_IN_JOURNAL)
03171    && (pageInStatement(pPg) || pPager->stmtInUse==0) 
03172   ){
03173     pPager->dirtyCache = 1;
03174     pPager->dbModified = 1;
03175   }else{
03176 
03177     /* If we get this far, it means that the page needs to be
03178     ** written to the transaction journal or the ckeckpoint journal
03179     ** or both.
03180     **
03181     ** First check to see that the transaction journal exists and
03182     ** create it if it does not.
03183     */
03184     assert( pPager->state!=PAGER_UNLOCK );
03185     rc = sqlite3PagerBegin(pPg, 0);
03186     if( rc!=SQLITE_OK ){
03187       return rc;
03188     }
03189     assert( pPager->state>=PAGER_RESERVED );
03190     if( !pPager->journalOpen && pPager->useJournal
03191           && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
03192       rc = pager_open_journal(pPager);
03193       if( rc!=SQLITE_OK ) return rc;
03194     }
03195     pPager->dirtyCache = 1;
03196     pPager->dbModified = 1;
03197   
03198     /* The transaction journal now exists and we have a RESERVED or an
03199     ** EXCLUSIVE lock on the main database file.  Write the current page to
03200     ** the transaction journal if it is not there already.
03201     */
03202     if( !(pPg->flags&PGHDR_IN_JOURNAL) && pPager->journalOpen ){
03203       if( (int)pPg->pgno <= pPager->origDbSize ){
03204         u32 cksum;
03205         char *pData2;
03206 
03207         /* We should never write to the journal file the page that
03208         ** contains the database locks.  The following assert verifies
03209         ** that we do not. */
03210         assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
03211         pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
03212         cksum = pager_cksum(pPager, (u8*)pData2);
03213         rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
03214         if( rc==SQLITE_OK ){
03215           rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
03216                               pPager->journalOff + 4);
03217           pPager->journalOff += pPager->pageSize+4;
03218         }
03219         if( rc==SQLITE_OK ){
03220           rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
03221           pPager->journalOff += 4;
03222         }
03223         IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
03224                  pPager->journalOff, pPager->pageSize));
03225         PAGER_INCR(sqlite3_pager_writej_count);
03226         PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
03227              PAGERID(pPager), pPg->pgno, 
03228              ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg));
03229 
03230         /* An error has occured writing to the journal file. The 
03231         ** transaction will be rolled back by the layer above.
03232         */
03233         if( rc!=SQLITE_OK ){
03234           return rc;
03235         }
03236 
03237         pPager->nRec++;
03238         assert( pPager->pInJournal!=0 );
03239         sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
03240         if( !pPager->noSync ){
03241           pPg->flags |= PGHDR_NEED_SYNC;
03242         }
03243         if( pPager->stmtInUse ){
03244           sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
03245         }
03246       }else{
03247         if( !pPager->journalStarted && !pPager->noSync ){
03248           pPg->flags |= PGHDR_NEED_SYNC;
03249         }
03250         PAGERTRACE4("APPEND %d page %d needSync=%d\n",
03251                 PAGERID(pPager), pPg->pgno,
03252                ((pPg->flags&PGHDR_NEED_SYNC)?1:0));
03253       }
03254       if( pPg->flags&PGHDR_NEED_SYNC ){
03255         pPager->needSync = 1;
03256       }
03257       pPg->flags |= PGHDR_IN_JOURNAL;
03258     }
03259   
03260     /* If the statement journal is open and the page is not in it,
03261     ** then write the current page to the statement journal.  Note that
03262     ** the statement journal format differs from the standard journal format
03263     ** in that it omits the checksums and the header.
03264     */
03265     if( pPager->stmtInUse 
03266      && !pageInStatement(pPg) 
03267      && (int)pPg->pgno<=pPager->stmtSize 
03268     ){
03269       i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
03270       char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
03271       assert( (pPg->flags&PGHDR_IN_JOURNAL) 
03272                  || (int)pPg->pgno>pPager->origDbSize );
03273       rc = write32bits(pPager->stfd, offset, pPg->pgno);
03274       if( rc==SQLITE_OK ){
03275         rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
03276       }
03277       PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
03278       if( rc!=SQLITE_OK ){
03279         return rc;
03280       }
03281       pPager->stmtNRec++;
03282       assert( pPager->pInStmt!=0 );
03283       sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
03284     }
03285   }
03286 
03287   /* Update the database size and return.
03288   */
03289   assert( pPager->state>=PAGER_SHARED );
03290   if( pPager->dbSize<(int)pPg->pgno ){
03291     pPager->dbSize = pPg->pgno;
03292     if( pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
03293       pPager->dbSize++;
03294     }
03295   }
03296   return rc;
03297 }
03298 
03299 /*
03300 ** This function is used to mark a data-page as writable. It uses 
03301 ** pager_write() to open a journal file (if it is not already open)
03302 ** and write the page *pData to the journal.
03303 **
03304 ** The difference between this function and pager_write() is that this
03305 ** function also deals with the special case where 2 or more pages
03306 ** fit on a single disk sector. In this case all co-resident pages
03307 ** must have been written to the journal file before returning.
03308 */
03309 int sqlite3PagerWrite(DbPage *pDbPage){
03310   int rc = SQLITE_OK;
03311 
03312   PgHdr *pPg = pDbPage;
03313   Pager *pPager = pPg->pPager;
03314   Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
03315 
03316   if( nPagePerSector>1 ){
03317     Pgno nPageCount;          /* Total number of pages in database file */
03318     Pgno pg1;                 /* First page of the sector pPg is located on. */
03319     int nPage;                /* Number of pages starting at pg1 to journal */
03320     int ii;
03321     int needSync = 0;
03322 
03323     /* Set the doNotSync flag to 1. This is because we cannot allow a journal
03324     ** header to be written between the pages journaled by this function.
03325     */
03326     assert( !MEMDB );
03327     assert( pPager->doNotSync==0 );
03328     pPager->doNotSync = 1;
03329 
03330     /* This trick assumes that both the page-size and sector-size are
03331     ** an integer power of 2. It sets variable pg1 to the identifier
03332     ** of the first page of the sector pPg is located on.
03333     */
03334     pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
03335 
03336     sqlite3PagerPagecount(pPager, (int *)&nPageCount);
03337     if( pPg->pgno>nPageCount ){
03338       nPage = (pPg->pgno - pg1)+1;
03339     }else if( (pg1+nPagePerSector-1)>nPageCount ){
03340       nPage = nPageCount+1-pg1;
03341     }else{
03342       nPage = nPagePerSector;
03343     }
03344     assert(nPage>0);
03345     assert(pg1<=pPg->pgno);
03346     assert((pg1+nPage)>pPg->pgno);
03347 
03348     for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
03349       Pgno pg = pg1+ii;
03350       PgHdr *pPage;
03351       if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
03352         if( pg!=PAGER_MJ_PGNO(pPager) ){
03353           rc = sqlite3PagerGet(pPager, pg, &pPage);
03354           if( rc==SQLITE_OK ){
03355             rc = pager_write(pPage);
03356             if( pPage->flags&PGHDR_NEED_SYNC ){
03357               needSync = 1;
03358             }
03359             sqlite3PagerUnref(pPage);
03360           }
03361         }
03362       }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
03363         if( pPage->flags&PGHDR_NEED_SYNC ){
03364           needSync = 1;
03365         }
03366         sqlite3PagerUnref(pPage);
03367       }
03368     }
03369 
03370     /* If the PgHdr.needSync flag is set for any of the nPage pages 
03371     ** starting at pg1, then it needs to be set for all of them. Because
03372     ** writing to any of these nPage pages may damage the others, the
03373     ** journal file must contain sync()ed copies of all of them
03374     ** before any of them can be written out to the database file.
03375     */
03376     if( needSync ){
03377       assert( !MEMDB && pPager->noSync==0 );
03378       for(ii=0; ii<nPage && needSync; ii++){
03379         PgHdr *pPage = pager_lookup(pPager, pg1+ii);
03380         if( pPage ) pPage->flags |= PGHDR_NEED_SYNC;
03381         sqlite3PagerUnref(pPage);
03382       }
03383       assert(pPager->needSync);
03384     }
03385 
03386     assert( pPager->doNotSync==1 );
03387     pPager->doNotSync = 0;
03388   }else{
03389     rc = pager_write(pDbPage);
03390   }
03391   return rc;
03392 }
03393 
03394 /*
03395 ** Return TRUE if the page given in the argument was previously passed
03396 ** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
03397 ** to change the content of the page.
03398 */
03399 #ifndef NDEBUG
03400 int sqlite3PagerIswriteable(DbPage *pPg){
03401   return pPg->flags&PGHDR_DIRTY;
03402 }
03403 #endif
03404 
03405 /*
03406 ** A call to this routine tells the pager that it is not necessary to
03407 ** write the information on page pPg back to the disk, even though
03408 ** that page might be marked as dirty.  This happens, for example, when
03409 ** the page has been added as a leaf of the freelist and so its
03410 ** content no longer matters.
03411 **
03412 ** The overlying software layer calls this routine when all of the data
03413 ** on the given page is unused.  The pager marks the page as clean so
03414 ** that it does not get written to disk.
03415 **
03416 ** Tests show that this optimization, together with the
03417 ** sqlite3PagerDontRollback() below, more than double the speed
03418 ** of large INSERT operations and quadruple the speed of large DELETEs.
03419 **
03420 ** When this routine is called, set the alwaysRollback flag to true.
03421 ** Subsequent calls to sqlite3PagerDontRollback() for the same page
03422 ** will thereafter be ignored.  This is necessary to avoid a problem
03423 ** where a page with data is added to the freelist during one part of
03424 ** a transaction then removed from the freelist during a later part
03425 ** of the same transaction and reused for some other purpose.  When it
03426 ** is first added to the freelist, this routine is called.  When reused,
03427 ** the sqlite3PagerDontRollback() routine is called.  But because the
03428 ** page contains critical data, we still need to be sure it gets
03429 ** rolled back in spite of the sqlite3PagerDontRollback() call.
03430 */
03431 int sqlite3PagerDontWrite(DbPage *pDbPage){
03432   PgHdr *pPg = pDbPage;
03433   Pager *pPager = pPg->pPager;
03434   int rc;
03435 
03436   if( pPg->pgno>pPager->origDbSize ){
03437     return SQLITE_OK;
03438   }
03439   if( pPager->pAlwaysRollback==0 ){
03440     assert( pPager->pInJournal );
03441     pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->origDbSize);
03442     if( !pPager->pAlwaysRollback ){
03443       return SQLITE_NOMEM;
03444     }
03445   }
03446   rc = sqlite3BitvecSet(pPager->pAlwaysRollback, pPg->pgno);
03447 
03448   if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && !pPager->stmtInUse ){
03449     assert( pPager->state>=PAGER_SHARED );
03450     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
03451       /* If this pages is the last page in the file and the file has grown
03452       ** during the current transaction, then do NOT mark the page as clean.
03453       ** When the database file grows, we must make sure that the last page
03454       ** gets written at least once so that the disk file will be the correct
03455       ** size. If you do not write this page and the size of the file
03456       ** on the disk ends up being too small, that can lead to database
03457       ** corruption during the next transaction.
03458       */
03459     }else{
03460       PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
03461       IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
03462       pPg->flags |= PGHDR_DONT_WRITE;
03463 #ifdef SQLITE_CHECK_PAGES
03464       pPg->pageHash = pager_pagehash(pPg);
03465 #endif
03466     }
03467   }
03468   return rc;
03469 }
03470 
03471 /*
03472 ** A call to this routine tells the pager that if a rollback occurs,
03473 ** it is not necessary to restore the data on the given page.  This
03474 ** means that the pager does not have to record the given page in the
03475 ** rollback journal.
03476 **
03477 ** If we have not yet actually read the content of this page (if
03478 ** the PgHdr.needRead flag is set) then this routine acts as a promise
03479 ** that we will never need to read the page content in the future.
03480 ** so the needRead flag can be cleared at this point.
03481 */
03482 void sqlite3PagerDontRollback(DbPage *pPg){
03483   Pager *pPager = pPg->pPager;
03484 
03485   assert( pPager->state>=PAGER_RESERVED );
03486 
03487   /* If the journal file is not open, or DontWrite() has been called on
03488   ** this page (DontWrite() sets the alwaysRollback flag), then this
03489   ** function is a no-op.
03490   */
03491   if( pPager->journalOpen==0 
03492    || sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno)
03493    || pPg->pgno>pPager->origDbSize
03494   ){
03495     return;
03496   }
03497 
03498 #ifdef SQLITE_SECURE_DELETE
03499   if( (pPg->flags & PGHDR_IN_JOURNAL)!=0 || (int)pPg->pgno>pPager->origDbSize ){
03500     return;
03501   }
03502 #endif
03503 
03504   /* If SECURE_DELETE is disabled, then there is no way that this
03505   ** routine can be called on a page for which sqlite3PagerDontWrite()
03506   ** has not been previously called during the same transaction.
03507   ** And if DontWrite() has previously been called, the following
03508   ** conditions must be met.
03509   **
03510   ** (Later:)  Not true.  If the database is corrupted by having duplicate
03511   ** pages on the freelist (ex: corrupt9.test) then the following is not
03512   ** necessarily true:
03513   */
03514   /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
03515 
03516   assert( pPager->pInJournal!=0 );
03517   sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
03518   pPg->flags |= PGHDR_IN_JOURNAL;
03519   pPg->flags &= ~PGHDR_NEED_READ;
03520   if( pPager->stmtInUse ){
03521     assert( pPager->stmtSize >= pPager->origDbSize );
03522     sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
03523   }
03524   PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
03525   IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
03526 }
03527 
03528 
03529 /*
03530 ** This routine is called to increment the database file change-counter,
03531 ** stored at byte 24 of the pager file.
03532 */
03533 static int pager_incr_changecounter(Pager *pPager, int isDirect){
03534   PgHdr *pPgHdr;
03535   u32 change_counter;
03536   int rc = SQLITE_OK;
03537 
03538 #ifndef SQLITE_ENABLE_ATOMIC_WRITE
03539   assert( isDirect==0 );  /* isDirect is only true for atomic writes */
03540 #endif
03541   if( !pPager->changeCountDone ){
03542     /* Open page 1 of the file for writing. */
03543     rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
03544     if( rc!=SQLITE_OK ) return rc;
03545 
03546     if( !isDirect ){
03547       rc = sqlite3PagerWrite(pPgHdr);
03548       if( rc!=SQLITE_OK ){
03549         sqlite3PagerUnref(pPgHdr);
03550         return rc;
03551       }
03552     }
03553 
03554     /* Increment the value just read and write it back to byte 24. */
03555     change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
03556     change_counter++;
03557     put32bits(((char*)pPgHdr->pData)+24, change_counter);
03558 
03559 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
03560     if( isDirect && pPager->fd->pMethods ){
03561       const void *zBuf = pPgHdr->pData;
03562       rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
03563     }
03564 #endif
03565 
03566     /* Release the page reference. */
03567     sqlite3PagerUnref(pPgHdr);
03568     pPager->changeCountDone = 1;
03569   }
03570   return rc;
03571 }
03572 
03573 /*
03574 ** Sync the pager file to disk.
03575 */
03576 int sqlite3PagerSync(Pager *pPager){
03577   int rc;
03578   if( MEMDB ){
03579     rc = SQLITE_OK;
03580   }else{
03581     rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
03582   }
03583   return rc;
03584 }
03585 
03586 /*
03587 ** Sync the database file for the pager pPager. zMaster points to the name
03588 ** of a master journal file that should be written into the individual
03589 ** journal file. zMaster may be NULL, which is interpreted as no master
03590 ** journal (a single database transaction).
03591 **
03592 ** This routine ensures that the journal is synced, all dirty pages written
03593 ** to the database file and the database file synced. The only thing that
03594 ** remains to commit the transaction is to delete the journal file (or
03595 ** master journal file if specified).
03596 **
03597 ** Note that if zMaster==NULL, this does not overwrite a previous value
03598 ** passed to an sqlite3PagerCommitPhaseOne() call.
03599 **
03600 ** If parameter nTrunc is non-zero, then the pager file is truncated to
03601 ** nTrunc pages (this is used by auto-vacuum databases).
03602 **
03603 ** If the final parameter - noSync - is true, then the database file itself
03604 ** is not synced. The caller must call sqlite3PagerSync() directly to
03605 ** sync the database file before calling CommitPhaseTwo() to delete the
03606 ** journal file in this case.
03607 */
03608 int sqlite3PagerCommitPhaseOne(
03609   Pager *pPager, 
03610   const char *zMaster, 
03611   Pgno nTrunc,
03612   int noSync
03613 ){
03614   int rc = SQLITE_OK;
03615 
03616   if( pPager->errCode ){
03617     return pPager->errCode;
03618   }
03619 
03620   /* If no changes have been made, we can leave the transaction early.
03621   */
03622   if( pPager->dbModified==0 &&
03623         (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
03624           pPager->exclusiveMode!=0) ){
03625     assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
03626     return SQLITE_OK;
03627   }
03628 
03629   PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
03630       pPager->zFilename, zMaster, nTrunc);
03631 
03632   /* If this is an in-memory db, or no pages have been written to, or this
03633   ** function has already been called, it is a no-op.
03634   */
03635   if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
03636     PgHdr *pPg;
03637 
03638 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
03639     /* The atomic-write optimization can be used if all of the
03640     ** following are true:
03641     **
03642     **    + The file-system supports the atomic-write property for
03643     **      blocks of size page-size, and
03644     **    + This commit is not part of a multi-file transaction, and
03645     **    + Exactly one page has been modified and store in the journal file.
03646     **
03647     ** If the optimization can be used, then the journal file will never
03648     ** be created for this transaction.
03649     */
03650     int useAtomicWrite;
03651     pPg = sqlite3PcacheDirtyList(pPager->pPCache);
03652     useAtomicWrite = (
03653         !zMaster && 
03654         pPager->journalOpen &&
03655         pPager->journalOff==jrnlBufferSize(pPager) && 
03656         nTrunc==0 && 
03657         (pPg==0 || pPg->pDirty==0)
03658     );
03659     assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
03660     if( useAtomicWrite ){
03661       /* Update the nRec field in the journal file. */
03662       int offset = pPager->journalHdr + sizeof(aJournalMagic);
03663       assert(pPager->nRec==1);
03664       rc = write32bits(pPager->jfd, offset, pPager->nRec);
03665 
03666       /* Update the db file change counter. The following call will modify
03667       ** the in-memory representation of page 1 to include the updated
03668       ** change counter and then write page 1 directly to the database
03669       ** file. Because of the atomic-write property of the host file-system, 
03670       ** this is safe.
03671       */
03672       if( rc==SQLITE_OK ){
03673         rc = pager_incr_changecounter(pPager, 1);
03674       }
03675     }else{
03676       rc = sqlite3JournalCreate(pPager->jfd);
03677     }
03678 
03679     if( !useAtomicWrite && rc==SQLITE_OK )
03680 #endif
03681 
03682     /* If a master journal file name has already been written to the
03683     ** journal file, then no sync is required. This happens when it is
03684     ** written, then the process fails to upgrade from a RESERVED to an
03685     ** EXCLUSIVE lock. The next time the process tries to commit the
03686     ** transaction the m-j name will have already been written.
03687     */
03688     if( !pPager->setMaster ){
03689       rc = pager_incr_changecounter(pPager, 0);
03690       if( rc!=SQLITE_OK ) goto sync_exit;
03691       if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
03692 #ifndef SQLITE_OMIT_AUTOVACUUM
03693         if( nTrunc!=0 ){
03694           /* If this transaction has made the database smaller, then all pages
03695           ** being discarded by the truncation must be written to the journal
03696           ** file.
03697           */
03698           Pgno i;
03699           int iSkip = PAGER_MJ_PGNO(pPager);
03700           for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
03701             if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
03702               rc = sqlite3PagerGet(pPager, i, &pPg);
03703               if( rc!=SQLITE_OK ) goto sync_exit;
03704               rc = sqlite3PagerWrite(pPg);
03705               sqlite3PagerUnref(pPg);
03706               if( rc!=SQLITE_OK ) goto sync_exit;
03707             }
03708           } 
03709         }
03710 #endif
03711         rc = writeMasterJournal(pPager, zMaster);
03712         if( rc!=SQLITE_OK ) goto sync_exit;
03713         rc = syncJournal(pPager);
03714       }
03715     }
03716     if( rc!=SQLITE_OK ) goto sync_exit;
03717 
03718 #ifndef SQLITE_OMIT_AUTOVACUUM
03719     if( nTrunc!=0 ){
03720       rc = sqlite3PagerTruncate(pPager, nTrunc);
03721       if( rc!=SQLITE_OK ) goto sync_exit;
03722     }
03723 #endif
03724 
03725     /* Write all dirty pages to the database file */
03726     pPg = sqlite3PcacheDirtyList(pPager->pPCache);
03727     rc = pager_write_pagelist(pPg);
03728     if( rc!=SQLITE_OK ){
03729       assert( rc!=SQLITE_IOERR_BLOCKED );
03730       /* The error might have left the dirty list all fouled up here,
03731       ** but that does not matter because if the if the dirty list did
03732       ** get corrupted, then the transaction will roll back and
03733       ** discard the dirty list.  There is an assert in
03734       ** pager_get_all_dirty_pages() that verifies that no attempt
03735       ** is made to use an invalid dirty list.
03736       */
03737       goto sync_exit;
03738     }
03739     sqlite3PcacheCleanAll(pPager->pPCache);
03740 
03741     /* Sync the database file. */
03742     if( !pPager->noSync && !noSync ){
03743       rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
03744     }
03745     IOTRACE(("DBSYNC %p\n", pPager))
03746 
03747     pPager->state = PAGER_SYNCED;
03748   }else if( MEMDB && nTrunc!=0 ){
03749     rc = sqlite3PagerTruncate(pPager, nTrunc);
03750   }
03751 
03752 sync_exit:
03753   if( rc==SQLITE_IOERR_BLOCKED ){
03754     /* pager_incr_changecounter() may attempt to obtain an exclusive
03755      * lock to spill the cache and return IOERR_BLOCKED. But since 
03756      * there is no chance the cache is inconsistent, it is
03757      * better to return SQLITE_BUSY.
03758      */
03759     rc = SQLITE_BUSY;
03760   }
03761   return rc;
03762 }
03763 
03764 
03765 /*
03766 ** Commit all changes to the database and release the write lock.
03767 **
03768 ** If the commit fails for any reason, a rollback attempt is made
03769 ** and an error code is returned.  If the commit worked, SQLITE_OK
03770 ** is returned.
03771 */
03772 int sqlite3PagerCommitPhaseTwo(Pager *pPager){
03773   int rc = SQLITE_OK;
03774 
03775   if( pPager->errCode ){
03776     return pPager->errCode;
03777   }
03778   if( pPager->state<PAGER_RESERVED ){
03779     return SQLITE_ERROR;
03780   }
03781   if( pPager->dbModified==0 &&
03782         (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
03783           pPager->exclusiveMode!=0) ){
03784     assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
03785     return SQLITE_OK;
03786   }
03787   PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
03788   assert( pPager->state==PAGER_SYNCED || MEMDB || !pPager->dirtyCache );
03789   rc = pager_end_transaction(pPager, pPager->setMaster);
03790   rc = pager_error(pPager, rc);
03791   return rc;
03792 }
03793 
03794 /*
03795 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
03796 ** All in-memory cache pages revert to their original data contents.
03797 ** The journal is deleted.
03798 **
03799 ** This routine cannot fail unless some other process is not following
03800 ** the correct locking protocol or unless some other
03801 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
03802 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
03803 ** codes are returned for all these occasions.  Otherwise,
03804 ** SQLITE_OK is returned.
03805 */
03806 int sqlite3PagerRollback(Pager *pPager){
03807   int rc = SQLITE_OK;
03808   PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
03809   if( !pPager->dirtyCache || !pPager->journalOpen ){
03810     rc = pager_end_transaction(pPager, pPager->setMaster);
03811   }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
03812     if( pPager->state>=PAGER_EXCLUSIVE ){
03813       pager_playback(pPager, 0);
03814     }
03815     rc = pPager->errCode;
03816   }else{
03817     if( pPager->state==PAGER_RESERVED ){
03818       int rc2;
03819       rc = pager_playback(pPager, 0);
03820       rc2 = pager_end_transaction(pPager, pPager->setMaster);
03821       if( rc==SQLITE_OK ){
03822         rc = rc2;
03823       }
03824     }else{
03825       rc = pager_playback(pPager, 0);
03826     }
03827 
03828     if( !MEMDB ){
03829       pPager->dbSize = -1;
03830     }
03831 
03832     /* If an error occurs during a ROLLBACK, we can no longer trust the pager
03833     ** cache. So call pager_error() on the way out to make any error 
03834     ** persistent.
03835     */
03836     rc = pager_error(pPager, rc);
03837   }
03838   return rc;
03839 }
03840 
03841 /*
03842 ** Return TRUE if the database file is opened read-only.  Return FALSE
03843 ** if the database is (in theory) writable.
03844 */
03845 int sqlite3PagerIsreadonly(Pager *pPager){
03846   return pPager->readOnly;
03847 }
03848 
03849 /*
03850 ** Return the number of references to the pager.
03851 */
03852 int sqlite3PagerRefcount(Pager *pPager){
03853   return sqlite3PcacheRefCount(pPager->pPCache);
03854 }
03855 
03856 /*
03857 ** Return the number of references to the specified page.
03858 */
03859 int sqlite3PagerPageRefcount(DbPage *pPage){
03860   return sqlite3PcachePageRefcount(pPage);
03861 }
03862 
03863 #ifdef SQLITE_TEST
03864 /*
03865 ** This routine is used for testing and analysis only.
03866 */
03867 int *sqlite3PagerStats(Pager *pPager){
03868   static int a[11];
03869   a[0] = sqlite3PcacheRefCount(pPager->pPCache);
03870   a[1] = sqlite3PcachePagecount(pPager->pPCache);
03871   a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
03872   a[3] = pPager->dbSize;
03873   a[4] = pPager->state;
03874   a[5] = pPager->errCode;
03875   a[6] = pPager->nHit;
03876   a[7] = pPager->nMiss;
03877   a[8] = 0;  /* Used to be pPager->nOvfl */
03878   a[9] = pPager->nRead;
03879   a[10] = pPager->nWrite;
03880   return a;
03881 }
03882 int sqlite3PagerIsMemdb(Pager *pPager){
03883   return MEMDB;
03884 }
03885 #endif
03886 
03887 /*
03888 ** Set the statement rollback point.
03889 **
03890 ** This routine should be called with the transaction journal already
03891 ** open.  A new statement journal is created that can be used to rollback
03892 ** changes of a single SQL command within a larger transaction.
03893 */
03894 static int pagerStmtBegin(Pager *pPager){
03895   int rc;
03896   assert( !pPager->stmtInUse );
03897   assert( pPager->state>=PAGER_SHARED );
03898   assert( pPager->dbSize>=0 );
03899   PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
03900   if( !pPager->journalOpen ){
03901     pPager->stmtAutoopen = 1;
03902     return SQLITE_OK;
03903   }
03904   assert( pPager->journalOpen );
03905   assert( pPager->pInStmt==0 );
03906   pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize);
03907   if( pPager->pInStmt==0 ){
03908     /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
03909     return SQLITE_NOMEM;
03910   }
03911   pPager->stmtJSize = pPager->journalOff;
03912   pPager->stmtSize = pPager->dbSize;
03913   pPager->stmtHdrOff = 0;
03914   pPager->stmtCksum = pPager->cksumInit;
03915   if( !pPager->stmtOpen ){
03916     if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
03917       sqlite3MemJournalOpen(pPager->stfd);
03918     }else{
03919       rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL);
03920       if( rc ){
03921         goto stmt_begin_failed;
03922       }
03923     }
03924     pPager->stmtOpen = 1;
03925     pPager->stmtNRec = 0;
03926   }
03927   pPager->stmtInUse = 1;
03928   return SQLITE_OK;
03929  
03930 stmt_begin_failed:
03931   if( pPager->pInStmt ){
03932     sqlite3BitvecDestroy(pPager->pInStmt);
03933     pPager->pInStmt = 0;
03934   }
03935   return rc;
03936 }
03937 int sqlite3PagerStmtBegin(Pager *pPager){
03938   int rc;
03939   rc = pagerStmtBegin(pPager);
03940   return rc;
03941 }
03942 
03943 /*
03944 ** Commit a statement.
03945 */
03946 int sqlite3PagerStmtCommit(Pager *pPager){
03947   if( pPager->stmtInUse ){
03948     PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
03949     sqlite3BitvecDestroy(pPager->pInStmt);
03950     pPager->pInStmt = 0;
03951     pPager->stmtNRec = 0;
03952     pPager->stmtInUse = 0;
03953     if( sqlite3IsMemJournal(pPager->stfd) ){
03954       sqlite3OsTruncate(pPager->stfd, 0);
03955     }
03956   }
03957   pPager->stmtAutoopen = 0;
03958   return SQLITE_OK;
03959 }
03960 
03961 /*
03962 ** Rollback a statement.
03963 */
03964 int sqlite3PagerStmtRollback(Pager *pPager){
03965   int rc;
03966   if( pPager->stmtInUse ){
03967     PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
03968     rc = pager_stmt_playback(pPager);
03969     sqlite3PagerStmtCommit(pPager);
03970   }else{
03971     rc = SQLITE_OK;
03972   }
03973   pPager->stmtAutoopen = 0;
03974   return rc;
03975 }
03976 
03977 /*
03978 ** Return the full pathname of the database file.
03979 */
03980 const char *sqlite3PagerFilename(Pager *pPager){
03981   return pPager->zFilename;
03982 }
03983 
03984 /*
03985 ** Return the VFS structure for the pager.
03986 */
03987 const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
03988   return pPager->pVfs;
03989 }
03990 
03991 /*
03992 ** Return the file handle for the database file associated
03993 ** with the pager.  This might return NULL if the file has
03994 ** not yet been opened.
03995 */
03996 sqlite3_file *sqlite3PagerFile(Pager *pPager){
03997   return pPager->fd;
03998 }
03999 
04000 /*
04001 ** Return the directory of the database file.
04002 */
04003 const char *sqlite3PagerDirname(Pager *pPager){
04004   return pPager->zDirectory;
04005 }
04006 
04007 /*
04008 ** Return the full pathname of the journal file.
04009 */
04010 const char *sqlite3PagerJournalname(Pager *pPager){
04011   return pPager->zJournal;
04012 }
04013 
04014 /*
04015 ** Return true if fsync() calls are disabled for this pager.  Return FALSE
04016 ** if fsync()s are executed normally.
04017 */
04018 int sqlite3PagerNosync(Pager *pPager){
04019   return pPager->noSync;
04020 }
04021 
04022 #ifdef SQLITE_HAS_CODEC
04023 /*
04024 ** Set the codec for this pager
04025 */
04026 void sqlite3PagerSetCodec(
04027   Pager *pPager,
04028   void *(*xCodec)(void*,void*,Pgno,int),
04029   void *pCodecArg
04030 ){
04031   pPager->xCodec = xCodec;
04032   pPager->pCodecArg = pCodecArg;
04033 }
04034 #endif
04035 
04036 #ifndef SQLITE_OMIT_AUTOVACUUM
04037 /*
04038 ** Move the page pPg to location pgno in the file.
04039 **
04040 ** There must be no references to the page previously located at
04041 ** pgno (which we call pPgOld) though that page is allowed to be
04042 ** in cache.  If the page previously located at pgno is not already
04043 ** in the rollback journal, it is not put there by by this routine.
04044 **
04045 ** References to the page pPg remain valid. Updating any
04046 ** meta-data associated with pPg (i.e. data stored in the nExtra bytes
04047 ** allocated along with the page) is the responsibility of the caller.
04048 **
04049 ** A transaction must be active when this routine is called. It used to be
04050 ** required that a statement transaction was not active, but this restriction
04051 ** has been removed (CREATE INDEX needs to move a page when a statement
04052 ** transaction is active).
04053 **
04054 ** If the fourth argument, isCommit, is non-zero, then this page is being
04055 ** moved as part of a database reorganization just before the transaction 
04056 ** is being committed. In this case, it is guaranteed that the database page 
04057 ** pPg refers to will not be written to again within this transaction.
04058 */
04059 int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
04060   PgHdr *pPgOld;  /* The page being overwritten. */
04061   Pgno needSyncPgno = 0;
04062 
04063   assert( pPg->nRef>0 );
04064 
04065   PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
04066       PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno);
04067   IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
04068 
04069   pager_get_content(pPg);
04070 
04071   /* If the journal needs to be sync()ed before page pPg->pgno can
04072   ** be written to, store pPg->pgno in local variable needSyncPgno.
04073   **
04074   ** If the isCommit flag is set, there is no need to remember that
04075   ** the journal needs to be sync()ed before database page pPg->pgno 
04076   ** can be written to. The caller has already promised not to write to it.
04077   */
04078   if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
04079     needSyncPgno = pPg->pgno;
04080     assert( (pPg->flags&PGHDR_IN_JOURNAL) || (int)pgno>pPager->origDbSize );
04081     assert( pPg->flags&PGHDR_DIRTY );
04082     assert( pPager->needSync );
04083   }
04084 
04085   /* If the cache contains a page with page-number pgno, remove it
04086   ** from its hash chain. Also, if the PgHdr.needSync was set for 
04087   ** page pgno before the 'move' operation, it needs to be retained 
04088   ** for the page moved there.
04089   */
04090   pPg->flags &= ~(PGHDR_NEED_SYNC|PGHDR_IN_JOURNAL);
04091   pPgOld = pager_lookup(pPager, pgno);
04092   assert( !pPgOld || pPgOld->nRef==1 );
04093   if( pPgOld ){
04094     pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
04095   }
04096   if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
04097     pPg->flags |= PGHDR_IN_JOURNAL;
04098   }
04099 
04100   sqlite3PcacheMove(pPg, pgno);
04101   if( pPgOld ){
04102     sqlite3PcacheMove(pPgOld, 0);
04103     sqlite3PcacheRelease(pPgOld);
04104   }
04105 
04106   makeDirty(pPg);
04107   pPager->dirtyCache = 1;
04108   pPager->dbModified = 1;
04109 
04110   if( needSyncPgno ){
04111     /* If needSyncPgno is non-zero, then the journal file needs to be 
04112     ** sync()ed before any data is written to database file page needSyncPgno.
04113     ** Currently, no such page exists in the page-cache and the 
04114     ** "is journaled" bitvec flag has been set. This needs to be remedied by
04115     ** loading the page into the pager-cache and setting the PgHdr.needSync 
04116     ** flag.
04117     **
04118     ** If the attempt to load the page into the page-cache fails, (due
04119     ** to a malloc() or IO failure), clear the bit in the pInJournal[]
04120     ** array. Otherwise, if the page is loaded and written again in
04121     ** this transaction, it may be written to the database file before
04122     ** it is synced into the journal file. This way, it may end up in
04123     ** the journal file twice, but that is not a problem.
04124     **
04125     ** The sqlite3PagerGet() call may cause the journal to sync. So make
04126     ** sure the Pager.needSync flag is set too.
04127     */
04128     int rc;
04129     PgHdr *pPgHdr;
04130     assert( pPager->needSync );
04131     rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
04132     if( rc!=SQLITE_OK ){
04133       if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){
04134         sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
04135       }
04136       return rc;
04137     }
04138     pPager->needSync = 1;
04139     assert( pPager->noSync==0 && !MEMDB );
04140     pPgHdr->flags |= PGHDR_NEED_SYNC;
04141     pPgHdr->flags |= PGHDR_IN_JOURNAL;
04142     makeDirty(pPgHdr);
04143     sqlite3PagerUnref(pPgHdr);
04144   }
04145 
04146   return SQLITE_OK;
04147 }
04148 #endif
04149 
04150 /*
04151 ** Return a pointer to the data for the specified page.
04152 */
04153 void *sqlite3PagerGetData(DbPage *pPg){
04154   assert( pPg->nRef>0 || pPg->pPager->memDb );
04155   return pPg->pData;
04156 }
04157 
04158 /*
04159 ** Return a pointer to the Pager.nExtra bytes of "extra" space 
04160 ** allocated along with the specified page.
04161 */
04162 void *sqlite3PagerGetExtra(DbPage *pPg){
04163   Pager *pPager = pPg->pPager;
04164   return (pPager?pPg->pExtra:0);
04165 }
04166 
04167 /*
04168 ** Get/set the locking-mode for this pager. Parameter eMode must be one
04169 ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
04170 ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
04171 ** the locking-mode is set to the value specified.
04172 **
04173 ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
04174 ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
04175 ** locking-mode.
04176 */
04177 int sqlite3PagerLockingMode(Pager *pPager, int eMode){
04178   assert( eMode==PAGER_LOCKINGMODE_QUERY
04179             || eMode==PAGER_LOCKINGMODE_NORMAL
04180             || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
04181   assert( PAGER_LOCKINGMODE_QUERY<0 );
04182   assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
04183   if( eMode>=0 && !pPager->tempFile ){
04184     pPager->exclusiveMode = eMode;
04185   }
04186   return (int)pPager->exclusiveMode;
04187 }
04188 
04189 /*
04190 ** Get/set the journal-mode for this pager. Parameter eMode must be one of:
04191 **
04192 **    PAGER_JOURNALMODE_QUERY
04193 **    PAGER_JOURNALMODE_DELETE
04194 **    PAGER_JOURNALMODE_TRUNCATE
04195 **    PAGER_JOURNALMODE_PERSIST
04196 **    PAGER_JOURNALMODE_OFF
04197 **
04198 ** If the parameter is not _QUERY, then the journal-mode is set to the
04199 ** value specified.
04200 **
04201 ** The returned indicate the current (possibly updated)
04202 ** journal-mode.
04203 */
04204 int sqlite3PagerJournalMode(Pager *pPager, int eMode){
04205   if( !MEMDB ){
04206     assert( eMode==PAGER_JOURNALMODE_QUERY
04207               || eMode==PAGER_JOURNALMODE_DELETE
04208               || eMode==PAGER_JOURNALMODE_TRUNCATE
04209               || eMode==PAGER_JOURNALMODE_PERSIST
04210               || eMode==PAGER_JOURNALMODE_OFF 
04211               || eMode==PAGER_JOURNALMODE_MEMORY );
04212     assert( PAGER_JOURNALMODE_QUERY<0 );
04213     if( eMode>=0 ){
04214       pPager->journalMode = eMode;
04215     }else{
04216       assert( eMode==PAGER_JOURNALMODE_QUERY );
04217     }
04218   }
04219   return (int)pPager->journalMode;
04220 }
04221 
04222 /*
04223 ** Get/set the size-limit used for persistent journal files.
04224 */
04225 i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
04226   if( iLimit>=-1 ){
04227     pPager->journalSizeLimit = iLimit;
04228   }
04229   return pPager->journalSizeLimit;
04230 }
04231 
04232 #endif /* SQLITE_OMIT_DISKIO */

ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:55 2011 by Doxygen 1.6.1