00001 /* 00002 ** 2001 September 15 00003 ** 00004 ** The author disclaims copyright to this source code. In place of 00005 ** a legal notice, here is a blessing: 00006 ** 00007 ** May you do good and not evil. 00008 ** May you find forgiveness for yourself and forgive others. 00009 ** May you share freely, never taking more than you give. 00010 ** 00011 ************************************************************************* 00012 ** This is the implementation of the page cache subsystem or "pager". 00013 ** 00014 ** The pager is used to access a database disk file. It implements 00015 ** atomic commit and rollback through the use of a journal file that 00016 ** is separate from the database file. The pager also implements file 00017 ** locking to prevent two processes from writing the same database 00018 ** file simultaneously, or one process from reading the database while 00019 ** another is writing. 00020 ** 00021 ** @(#) $Id: pager.c,v 1.502 2008/11/07 00:24:54 drh Exp $ 00022 */ 00023 #ifndef SQLITE_OMIT_DISKIO 00024 #include "sqliteInt.h" 00025 00026 /* 00027 ** Macros for troubleshooting. Normally turned off 00028 */ 00029 #if 0 00030 #define sqlite3DebugPrintf printf 00031 #define PAGERTRACE1(X) sqlite3DebugPrintf(X) 00032 #define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y) 00033 #define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z) 00034 #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W) 00035 #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V) 00036 #else 00037 #define PAGERTRACE1(X) 00038 #define PAGERTRACE2(X,Y) 00039 #define PAGERTRACE3(X,Y,Z) 00040 #define PAGERTRACE4(X,Y,Z,W) 00041 #define PAGERTRACE5(X,Y,Z,W,V) 00042 #endif 00043 00044 /* 00045 ** The following two macros are used within the PAGERTRACEX() macros above 00046 ** to print out file-descriptors. 00047 ** 00048 ** PAGERID() takes a pointer to a Pager struct as its argument. The 00049 ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file 00050 ** struct as its argument. 00051 */ 00052 #define PAGERID(p) ((int)(p->fd)) 00053 #define FILEHANDLEID(fd) ((int)fd) 00054 00055 /* 00056 ** The page cache as a whole is always in one of the following 00057 ** states: 00058 ** 00059 ** PAGER_UNLOCK The page cache is not currently reading or 00060 ** writing the database file. There is no 00061 ** data held in memory. This is the initial 00062 ** state. 00063 ** 00064 ** PAGER_SHARED The page cache is reading the database. 00065 ** Writing is not permitted. There can be 00066 ** multiple readers accessing the same database 00067 ** file at the same time. 00068 ** 00069 ** PAGER_RESERVED This process has reserved the database for writing 00070 ** but has not yet made any changes. Only one process 00071 ** at a time can reserve the database. The original 00072 ** database file has not been modified so other 00073 ** processes may still be reading the on-disk 00074 ** database file. 00075 ** 00076 ** PAGER_EXCLUSIVE The page cache is writing the database. 00077 ** Access is exclusive. No other processes or 00078 ** threads can be reading or writing while one 00079 ** process is writing. 00080 ** 00081 ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE 00082 ** after all dirty pages have been written to the 00083 ** database file and the file has been synced to 00084 ** disk. All that remains to do is to remove or 00085 ** truncate the journal file and the transaction 00086 ** will be committed. 00087 ** 00088 ** The page cache comes up in PAGER_UNLOCK. The first time a 00089 ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED. 00090 ** After all pages have been released using sqlite_page_unref(), 00091 ** the state transitions back to PAGER_UNLOCK. The first time 00092 ** that sqlite3PagerWrite() is called, the state transitions to 00093 ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be 00094 ** called on an outstanding page which means that the pager must 00095 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.) 00096 ** PAGER_RESERVED means that there is an open rollback journal. 00097 ** The transition to PAGER_EXCLUSIVE occurs before any changes 00098 ** are made to the database file, though writes to the rollback 00099 ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback() 00100 ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED, 00101 ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode. 00102 */ 00103 #define PAGER_UNLOCK 0 00104 #define PAGER_SHARED 1 /* same as SHARED_LOCK */ 00105 #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */ 00106 #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */ 00107 #define PAGER_SYNCED 5 00108 00109 /* 00110 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time, 00111 ** then failed attempts to get a reserved lock will invoke the busy callback. 00112 ** This is off by default. To see why, consider the following scenario: 00113 ** 00114 ** Suppose thread A already has a shared lock and wants a reserved lock. 00115 ** Thread B already has a reserved lock and wants an exclusive lock. If 00116 ** both threads are using their busy callbacks, it might be a long time 00117 ** be for one of the threads give up and allows the other to proceed. 00118 ** But if the thread trying to get the reserved lock gives up quickly 00119 ** (if it never invokes its busy callback) then the contention will be 00120 ** resolved quickly. 00121 */ 00122 #ifndef SQLITE_BUSY_RESERVED_LOCK 00123 # define SQLITE_BUSY_RESERVED_LOCK 0 00124 #endif 00125 00126 /* 00127 ** This macro rounds values up so that if the value is an address it 00128 ** is guaranteed to be an address that is aligned to an 8-byte boundary. 00129 */ 00130 #define FORCE_ALIGNMENT(X) (((X)+7)&~7) 00131 00132 /* 00133 ** A macro used for invoking the codec if there is one 00134 */ 00135 #ifdef SQLITE_HAS_CODEC 00136 # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); } 00137 # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D)) 00138 #else 00139 # define CODEC1(P,D,N,X) /* NO-OP */ 00140 # define CODEC2(P,D,N,X) ((char*)D) 00141 #endif 00142 00143 /* 00144 ** A open page cache is an instance of the following structure. 00145 ** 00146 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or 00147 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists 00148 ** and is returned as the result of every major pager API call. The 00149 ** SQLITE_FULL return code is slightly different. It persists only until the 00150 ** next successful rollback is performed on the pager cache. Also, 00151 ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup() 00152 ** APIs, they may still be used successfully. 00153 */ 00154 struct Pager { 00155 sqlite3_vfs *pVfs; /* OS functions to use for IO */ 00156 u8 journalOpen; /* True if journal file descriptors is valid */ 00157 u8 journalStarted; /* True if header of journal is synced */ 00158 u8 useJournal; /* Use a rollback journal on this file */ 00159 u8 noReadlock; /* Do not bother to obtain readlocks */ 00160 u8 stmtOpen; /* True if the statement subjournal is open */ 00161 u8 stmtInUse; /* True we are in a statement subtransaction */ 00162 u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/ 00163 u8 noSync; /* Do not sync the journal if true */ 00164 u8 fullSync; /* Do extra syncs of the journal for robustness */ 00165 u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */ 00166 u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ 00167 u8 tempFile; /* zFilename is a temporary file */ 00168 u8 readOnly; /* True for a read-only database */ 00169 u8 needSync; /* True if an fsync() is needed on the journal */ 00170 u8 dirtyCache; /* True if cached pages have changed */ 00171 u8 alwaysRollback; /* Disable DontRollback() for all pages */ 00172 u8 memDb; /* True to inhibit all file I/O */ 00173 u8 setMaster; /* True if a m-j name has been written to jrnl */ 00174 u8 doNotSync; /* Boolean. While true, do not spill the cache */ 00175 u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ 00176 u8 journalMode; /* On of the PAGER_JOURNALMODE_* values */ 00177 u8 dbModified; /* True if there are any changes to the Db */ 00178 u8 changeCountDone; /* Set after incrementing the change-counter */ 00179 u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ 00180 int errCode; /* One of several kinds of errors */ 00181 int dbSize; /* Number of pages in the file */ 00182 int origDbSize; /* dbSize before the current change */ 00183 int stmtSize; /* Size of database (in pages) at stmt_begin() */ 00184 int nRec; /* Number of pages written to the journal */ 00185 u32 cksumInit; /* Quasi-random value added to every checksum */ 00186 int stmtNRec; /* Number of records in stmt subjournal */ 00187 int nExtra; /* Add this many bytes to each in-memory page */ 00188 int pageSize; /* Number of bytes in a page */ 00189 int nPage; /* Total number of in-memory pages */ 00190 int mxPage; /* Maximum number of pages to hold in cache */ 00191 Pgno mxPgno; /* Maximum allowed size of the database */ 00192 Bitvec *pInJournal; /* One bit for each page in the database file */ 00193 Bitvec *pInStmt; /* One bit for each page in the database */ 00194 Bitvec *pAlwaysRollback; /* One bit for each page marked always-rollback */ 00195 char *zFilename; /* Name of the database file */ 00196 char *zJournal; /* Name of the journal file */ 00197 char *zDirectory; /* Directory hold database and journal files */ 00198 sqlite3_file *fd, *jfd; /* File descriptors for database and journal */ 00199 sqlite3_file *stfd; /* File descriptor for the statement subjournal*/ 00200 BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */ 00201 i64 journalOff; /* Current byte offset in the journal file */ 00202 i64 journalHdr; /* Byte offset to previous journal header */ 00203 i64 stmtHdrOff; /* First journal header written this statement */ 00204 i64 stmtCksum; /* cksumInit when statement was started */ 00205 i64 stmtJSize; /* Size of journal at stmt_begin() */ 00206 u32 sectorSize; /* Assumed sector size during rollback */ 00207 #ifdef SQLITE_TEST 00208 int nHit, nMiss; /* Cache hits and missing */ 00209 int nRead, nWrite; /* Database pages read/written */ 00210 #endif 00211 void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */ 00212 #ifdef SQLITE_HAS_CODEC 00213 void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ 00214 void *pCodecArg; /* First argument to xCodec() */ 00215 #endif 00216 char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ 00217 char dbFileVers[16]; /* Changes whenever database file changes */ 00218 i64 journalSizeLimit; /* Size limit for persistent journal files */ 00219 PCache *pPCache; /* Pointer to page cache object */ 00220 }; 00221 00222 /* 00223 ** The following global variables hold counters used for 00224 ** testing purposes only. These variables do not exist in 00225 ** a non-testing build. These variables are not thread-safe. 00226 */ 00227 #ifdef SQLITE_TEST 00228 int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ 00229 int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ 00230 int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ 00231 # define PAGER_INCR(v) v++ 00232 #else 00233 # define PAGER_INCR(v) 00234 #endif 00235 00236 00237 00238 /* 00239 ** Journal files begin with the following magic string. The data 00240 ** was obtained from /dev/random. It is used only as a sanity check. 00241 ** 00242 ** Since version 2.8.0, the journal format contains additional sanity 00243 ** checking information. If the power fails while the journal is begin 00244 ** written, semi-random garbage data might appear in the journal 00245 ** file after power is restored. If an attempt is then made 00246 ** to roll the journal back, the database could be corrupted. The additional 00247 ** sanity checking data is an attempt to discover the garbage in the 00248 ** journal and ignore it. 00249 ** 00250 ** The sanity checking information for the new journal format consists 00251 ** of a 32-bit checksum on each page of data. The checksum covers both 00252 ** the page number and the pPager->pageSize bytes of data for the page. 00253 ** This cksum is initialized to a 32-bit random value that appears in the 00254 ** journal file right after the header. The random initializer is important, 00255 ** because garbage data that appears at the end of a journal is likely 00256 ** data that was once in other files that have now been deleted. If the 00257 ** garbage data came from an obsolete journal file, the checksums might 00258 ** be correct. But by initializing the checksum to random value which 00259 ** is different for every journal, we minimize that risk. 00260 */ 00261 static const unsigned char aJournalMagic[] = { 00262 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, 00263 }; 00264 00265 /* 00266 ** The size of the header and of each page in the journal is determined 00267 ** by the following macros. 00268 */ 00269 #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) 00270 00271 /* 00272 ** The journal header size for this pager. In the future, this could be 00273 ** set to some value read from the disk controller. The important 00274 ** characteristic is that it is the same size as a disk sector. 00275 */ 00276 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) 00277 00278 /* 00279 ** The macro MEMDB is true if we are dealing with an in-memory database. 00280 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set, 00281 ** the value of MEMDB will be a constant and the compiler will optimize 00282 ** out code that would never execute. 00283 */ 00284 #ifdef SQLITE_OMIT_MEMORYDB 00285 # define MEMDB 0 00286 #else 00287 # define MEMDB pPager->memDb 00288 #endif 00289 00290 /* 00291 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is 00292 ** reserved for working around a windows/posix incompatibility). It is 00293 ** used in the journal to signify that the remainder of the journal file 00294 ** is devoted to storing a master journal name - there are no more pages to 00295 ** roll back. See comments for function writeMasterJournal() for details. 00296 */ 00297 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */ 00298 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1) 00299 00300 /* 00301 ** The maximum legal page number is (2^31 - 1). 00302 */ 00303 #define PAGER_MAX_PGNO 2147483647 00304 00305 /* 00306 ** Return true if page *pPg has already been written to the statement 00307 ** journal (or statement snapshot has been created, if *pPg is part 00308 ** of an in-memory database). 00309 */ 00310 static int pageInStatement(PgHdr *pPg){ 00311 Pager *pPager = pPg->pPager; 00312 return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno); 00313 } 00314 00315 /* 00316 ** Read a 32-bit integer from the given file descriptor. Store the integer 00317 ** that is read in *pRes. Return SQLITE_OK if everything worked, or an 00318 ** error code is something goes wrong. 00319 ** 00320 ** All values are stored on disk as big-endian. 00321 */ 00322 static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ 00323 unsigned char ac[4]; 00324 int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); 00325 if( rc==SQLITE_OK ){ 00326 *pRes = sqlite3Get4byte(ac); 00327 } 00328 return rc; 00329 } 00330 00331 /* 00332 ** Write a 32-bit integer into a string buffer in big-endian byte order. 00333 */ 00334 #define put32bits(A,B) sqlite3Put4byte((u8*)A,B) 00335 00336 /* 00337 ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK 00338 ** on success or an error code is something goes wrong. 00339 */ 00340 static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ 00341 char ac[4]; 00342 put32bits(ac, val); 00343 return sqlite3OsWrite(fd, ac, 4, offset); 00344 } 00345 00346 /* 00347 ** If file pFd is open, call sqlite3OsUnlock() on it. 00348 */ 00349 static int osUnlock(sqlite3_file *pFd, int eLock){ 00350 if( !pFd->pMethods ){ 00351 return SQLITE_OK; 00352 } 00353 return sqlite3OsUnlock(pFd, eLock); 00354 } 00355 00356 /* 00357 ** This function determines whether or not the atomic-write optimization 00358 ** can be used with this pager. The optimization can be used if: 00359 ** 00360 ** (a) the value returned by OsDeviceCharacteristics() indicates that 00361 ** a database page may be written atomically, and 00362 ** (b) the value returned by OsSectorSize() is less than or equal 00363 ** to the page size. 00364 ** 00365 ** If the optimization cannot be used, 0 is returned. If it can be used, 00366 ** then the value returned is the size of the journal file when it 00367 ** contains rollback data for exactly one page. 00368 */ 00369 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 00370 static int jrnlBufferSize(Pager *pPager){ 00371 int dc; /* Device characteristics */ 00372 int nSector; /* Sector size */ 00373 int szPage; /* Page size */ 00374 sqlite3_file *fd = pPager->fd; 00375 00376 if( fd->pMethods ){ 00377 dc = sqlite3OsDeviceCharacteristics(fd); 00378 nSector = sqlite3OsSectorSize(fd); 00379 szPage = pPager->pageSize; 00380 } 00381 00382 assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); 00383 assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); 00384 00385 if( !fd->pMethods || 00386 (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){ 00387 return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); 00388 } 00389 return 0; 00390 } 00391 #endif 00392 00393 /* 00394 ** This function should be called when an error occurs within the pager 00395 ** code. The first argument is a pointer to the pager structure, the 00396 ** second the error-code about to be returned by a pager API function. 00397 ** The value returned is a copy of the second argument to this function. 00398 ** 00399 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL 00400 ** the error becomes persistent. Until the persisten error is cleared, 00401 ** subsequent API calls on this Pager will immediately return the same 00402 ** error code. 00403 ** 00404 ** A persistent error indicates that the contents of the pager-cache 00405 ** cannot be trusted. This state can be cleared by completely discarding 00406 ** the contents of the pager-cache. If a transaction was active when 00407 ** the persistent error occured, then the rollback journal may need 00408 ** to be replayed. 00409 */ 00410 static void pager_unlock(Pager *pPager); 00411 static int pager_error(Pager *pPager, int rc){ 00412 int rc2 = rc & 0xff; 00413 assert( 00414 pPager->errCode==SQLITE_FULL || 00415 pPager->errCode==SQLITE_OK || 00416 (pPager->errCode & 0xff)==SQLITE_IOERR 00417 ); 00418 if( 00419 rc2==SQLITE_FULL || 00420 rc2==SQLITE_IOERR || 00421 rc2==SQLITE_CORRUPT 00422 ){ 00423 pPager->errCode = rc; 00424 if( pPager->state==PAGER_UNLOCK 00425 && sqlite3PcacheRefCount(pPager->pPCache)==0 00426 ){ 00427 /* If the pager is already unlocked, call pager_unlock() now to 00428 ** clear the error state and ensure that the pager-cache is 00429 ** completely empty. 00430 */ 00431 pager_unlock(pPager); 00432 } 00433 } 00434 return rc; 00435 } 00436 00437 /* 00438 ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking 00439 ** on the cache using a hash function. This is used for testing 00440 ** and debugging only. 00441 */ 00442 #ifdef SQLITE_CHECK_PAGES 00443 /* 00444 ** Return a 32-bit hash of the page data for pPage. 00445 */ 00446 static u32 pager_datahash(int nByte, unsigned char *pData){ 00447 u32 hash = 0; 00448 int i; 00449 for(i=0; i<nByte; i++){ 00450 hash = (hash*1039) + pData[i]; 00451 } 00452 return hash; 00453 } 00454 static u32 pager_pagehash(PgHdr *pPage){ 00455 return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData); 00456 } 00457 static u32 pager_set_pagehash(PgHdr *pPage){ 00458 pPage->pageHash = pager_pagehash(pPage); 00459 } 00460 00461 /* 00462 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES 00463 ** is defined, and NDEBUG is not defined, an assert() statement checks 00464 ** that the page is either dirty or still matches the calculated page-hash. 00465 */ 00466 #define CHECK_PAGE(x) checkPage(x) 00467 static void checkPage(PgHdr *pPg){ 00468 Pager *pPager = pPg->pPager; 00469 assert( !pPg->pageHash || pPager->errCode 00470 || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) ); 00471 } 00472 00473 #else 00474 #define pager_datahash(X,Y) 0 00475 #define pager_pagehash(X) 0 00476 #define CHECK_PAGE(x) 00477 #endif /* SQLITE_CHECK_PAGES */ 00478 00479 /* 00480 ** When this is called the journal file for pager pPager must be open. 00481 ** The master journal file name is read from the end of the file and 00482 ** written into memory supplied by the caller. 00483 ** 00484 ** zMaster must point to a buffer of at least nMaster bytes allocated by 00485 ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is 00486 ** enough space to write the master journal name). If the master journal 00487 ** name in the journal is longer than nMaster bytes (including a 00488 ** nul-terminator), then this is handled as if no master journal name 00489 ** were present in the journal. 00490 ** 00491 ** If no master journal file name is present zMaster[0] is set to 0 and 00492 ** SQLITE_OK returned. 00493 */ 00494 static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){ 00495 int rc; 00496 u32 len; 00497 i64 szJ; 00498 u32 cksum; 00499 u32 u; /* Unsigned loop counter */ 00500 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 00501 00502 zMaster[0] = '\0'; 00503 00504 rc = sqlite3OsFileSize(pJrnl, &szJ); 00505 if( rc!=SQLITE_OK || szJ<16 ) return rc; 00506 00507 rc = read32bits(pJrnl, szJ-16, &len); 00508 if( rc!=SQLITE_OK ) return rc; 00509 00510 if( len>=nMaster ){ 00511 return SQLITE_OK; 00512 } 00513 00514 rc = read32bits(pJrnl, szJ-12, &cksum); 00515 if( rc!=SQLITE_OK ) return rc; 00516 00517 rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8); 00518 if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc; 00519 00520 rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len); 00521 if( rc!=SQLITE_OK ){ 00522 return rc; 00523 } 00524 zMaster[len] = '\0'; 00525 00526 /* See if the checksum matches the master journal name */ 00527 for(u=0; u<len; u++){ 00528 cksum -= zMaster[u]; 00529 } 00530 if( cksum ){ 00531 /* If the checksum doesn't add up, then one or more of the disk sectors 00532 ** containing the master journal filename is corrupted. This means 00533 ** definitely roll back, so just return SQLITE_OK and report a (nul) 00534 ** master-journal filename. 00535 */ 00536 zMaster[0] = '\0'; 00537 } 00538 00539 return SQLITE_OK; 00540 } 00541 00542 /* 00543 ** Seek the journal file descriptor to the next sector boundary where a 00544 ** journal header may be read or written. Pager.journalOff is updated with 00545 ** the new seek offset. 00546 ** 00547 ** i.e for a sector size of 512: 00548 ** 00549 ** Input Offset Output Offset 00550 ** --------------------------------------- 00551 ** 0 0 00552 ** 512 512 00553 ** 100 512 00554 ** 2000 2048 00555 ** 00556 */ 00557 static void seekJournalHdr(Pager *pPager){ 00558 i64 offset = 0; 00559 i64 c = pPager->journalOff; 00560 if( c ){ 00561 offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); 00562 } 00563 assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); 00564 assert( offset>=c ); 00565 assert( (offset-c)<JOURNAL_HDR_SZ(pPager) ); 00566 pPager->journalOff = offset; 00567 } 00568 00569 /* 00570 ** Write zeros over the header of the journal file. This has the 00571 ** effect of invalidating the journal file and committing the 00572 ** transaction. 00573 */ 00574 static int zeroJournalHdr(Pager *pPager, int doTruncate){ 00575 int rc = SQLITE_OK; 00576 static const char zeroHdr[28] = {0}; 00577 00578 if( pPager->journalOff ){ 00579 i64 iLimit = pPager->journalSizeLimit; 00580 00581 IOTRACE(("JZEROHDR %p\n", pPager)) 00582 if( doTruncate || iLimit==0 ){ 00583 rc = sqlite3OsTruncate(pPager->jfd, 0); 00584 }else{ 00585 rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0); 00586 } 00587 if( rc==SQLITE_OK && !pPager->noSync ){ 00588 rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags); 00589 } 00590 00591 /* At this point the transaction is committed but the write lock 00592 ** is still held on the file. If there is a size limit configured for 00593 ** the persistent journal and the journal file currently consumes more 00594 ** space than that limit allows for, truncate it now. There is no need 00595 ** to sync the file following this operation. 00596 */ 00597 if( rc==SQLITE_OK && iLimit>0 ){ 00598 i64 sz; 00599 rc = sqlite3OsFileSize(pPager->jfd, &sz); 00600 if( rc==SQLITE_OK && sz>iLimit ){ 00601 rc = sqlite3OsTruncate(pPager->jfd, iLimit); 00602 } 00603 } 00604 } 00605 return rc; 00606 } 00607 00608 /* 00609 ** The journal file must be open when this routine is called. A journal 00610 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the 00611 ** current location. 00612 ** 00613 ** The format for the journal header is as follows: 00614 ** - 8 bytes: Magic identifying journal format. 00615 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. 00616 ** - 4 bytes: Random number used for page hash. 00617 ** - 4 bytes: Initial database page count. 00618 ** - 4 bytes: Sector size used by the process that wrote this journal. 00619 ** - 4 bytes: Database page size. 00620 ** 00621 ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space. 00622 */ 00623 static int writeJournalHdr(Pager *pPager){ 00624 int rc = SQLITE_OK; 00625 char *zHeader = pPager->pTmpSpace; 00626 int nHeader = pPager->pageSize; 00627 int nWrite; 00628 00629 if( nHeader>JOURNAL_HDR_SZ(pPager) ){ 00630 nHeader = JOURNAL_HDR_SZ(pPager); 00631 } 00632 00633 if( pPager->stmtHdrOff==0 ){ 00634 pPager->stmtHdrOff = pPager->journalOff; 00635 } 00636 00637 seekJournalHdr(pPager); 00638 pPager->journalHdr = pPager->journalOff; 00639 00640 memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); 00641 00642 /* 00643 ** Write the nRec Field - the number of page records that follow this 00644 ** journal header. Normally, zero is written to this value at this time. 00645 ** After the records are added to the journal (and the journal synced, 00646 ** if in full-sync mode), the zero is overwritten with the true number 00647 ** of records (see syncJournal()). 00648 ** 00649 ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When 00650 ** reading the journal this value tells SQLite to assume that the 00651 ** rest of the journal file contains valid page records. This assumption 00652 ** is dangerous, as if a failure occured whilst writing to the journal 00653 ** file it may contain some garbage data. There are two scenarios 00654 ** where this risk can be ignored: 00655 ** 00656 ** * When the pager is in no-sync mode. Corruption can follow a 00657 ** power failure in this case anyway. 00658 ** 00659 ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees 00660 ** that garbage data is never appended to the journal file. 00661 */ 00662 assert(pPager->fd->pMethods||pPager->noSync); 00663 if( (pPager->noSync) || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY) 00664 || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 00665 ){ 00666 put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); 00667 }else{ 00668 put32bits(&zHeader[sizeof(aJournalMagic)], 0); 00669 } 00670 00671 /* The random check-hash initialiser */ 00672 sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); 00673 put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); 00674 /* The initial database size */ 00675 put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize); 00676 /* The assumed sector size for this process */ 00677 put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); 00678 if( pPager->journalHdr==0 ){ 00679 /* The page size */ 00680 put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize); 00681 } 00682 00683 for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){ 00684 IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader)) 00685 rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff); 00686 pPager->journalOff += nHeader; 00687 } 00688 00689 return rc; 00690 } 00691 00692 /* 00693 ** The journal file must be open when this is called. A journal header file 00694 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal 00695 ** file. See comments above function writeJournalHdr() for a description of 00696 ** the journal header format. 00697 ** 00698 ** If the header is read successfully, *nRec is set to the number of 00699 ** page records following this header and *dbSize is set to the size of the 00700 ** database before the transaction began, in pages. Also, pPager->cksumInit 00701 ** is set to the value read from the journal header. SQLITE_OK is returned 00702 ** in this case. 00703 ** 00704 ** If the journal header file appears to be corrupted, SQLITE_DONE is 00705 ** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes 00706 ** cannot be read from the journal file an error code is returned. 00707 */ 00708 static int readJournalHdr( 00709 Pager *pPager, 00710 i64 journalSize, 00711 u32 *pNRec, 00712 u32 *pDbSize 00713 ){ 00714 int rc; 00715 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 00716 i64 jrnlOff; 00717 int iPageSize; 00718 00719 seekJournalHdr(pPager); 00720 if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ 00721 return SQLITE_DONE; 00722 } 00723 jrnlOff = pPager->journalOff; 00724 00725 rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff); 00726 if( rc ) return rc; 00727 jrnlOff += sizeof(aMagic); 00728 00729 if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ 00730 return SQLITE_DONE; 00731 } 00732 00733 rc = read32bits(pPager->jfd, jrnlOff, pNRec); 00734 if( rc ) return rc; 00735 00736 rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit); 00737 if( rc ) return rc; 00738 00739 rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize); 00740 if( rc ) return rc; 00741 00742 rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize); 00743 if( rc==SQLITE_OK 00744 && iPageSize>=512 00745 && iPageSize<=SQLITE_MAX_PAGE_SIZE 00746 && ((iPageSize-1)&iPageSize)==0 00747 ){ 00748 u16 pagesize = iPageSize; 00749 rc = sqlite3PagerSetPagesize(pPager, &pagesize); 00750 } 00751 if( rc ) return rc; 00752 00753 /* Update the assumed sector-size to match the value used by 00754 ** the process that created this journal. If this journal was 00755 ** created by a process other than this one, then this routine 00756 ** is being called from within pager_playback(). The local value 00757 ** of Pager.sectorSize is restored at the end of that routine. 00758 */ 00759 rc = read32bits(pPager->jfd, jrnlOff+12, &pPager->sectorSize); 00760 if( rc ) return rc; 00761 if( (pPager->sectorSize & (pPager->sectorSize-1))!=0 00762 || pPager->sectorSize>0x1000000 ){ 00763 return SQLITE_DONE; 00764 } 00765 00766 pPager->journalOff += JOURNAL_HDR_SZ(pPager); 00767 return SQLITE_OK; 00768 } 00769 00770 00771 /* 00772 ** Write the supplied master journal name into the journal file for pager 00773 ** pPager at the current location. The master journal name must be the last 00774 ** thing written to a journal file. If the pager is in full-sync mode, the 00775 ** journal file descriptor is advanced to the next sector boundary before 00776 ** anything is written. The format is: 00777 ** 00778 ** + 4 bytes: PAGER_MJ_PGNO. 00779 ** + N bytes: length of master journal name. 00780 ** + 4 bytes: N 00781 ** + 4 bytes: Master journal name checksum. 00782 ** + 8 bytes: aJournalMagic[]. 00783 ** 00784 ** The master journal page checksum is the sum of the bytes in the master 00785 ** journal name. 00786 ** 00787 ** If zMaster is a NULL pointer (occurs for a single database transaction), 00788 ** this call is a no-op. 00789 */ 00790 static int writeMasterJournal(Pager *pPager, const char *zMaster){ 00791 int rc; 00792 int len; 00793 int i; 00794 i64 jrnlOff; 00795 i64 jrnlSize; 00796 u32 cksum = 0; 00797 char zBuf[sizeof(aJournalMagic)+2*4]; 00798 00799 if( !zMaster || pPager->setMaster ) return SQLITE_OK; 00800 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ) return SQLITE_OK; 00801 pPager->setMaster = 1; 00802 00803 len = strlen(zMaster); 00804 for(i=0; i<len; i++){ 00805 cksum += zMaster[i]; 00806 } 00807 00808 /* If in full-sync mode, advance to the next disk sector before writing 00809 ** the master journal name. This is in case the previous page written to 00810 ** the journal has already been synced. 00811 */ 00812 if( pPager->fullSync ){ 00813 seekJournalHdr(pPager); 00814 } 00815 jrnlOff = pPager->journalOff; 00816 pPager->journalOff += (len+20); 00817 00818 rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager)); 00819 if( rc!=SQLITE_OK ) return rc; 00820 jrnlOff += 4; 00821 00822 rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff); 00823 if( rc!=SQLITE_OK ) return rc; 00824 jrnlOff += len; 00825 00826 put32bits(zBuf, len); 00827 put32bits(&zBuf[4], cksum); 00828 memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic)); 00829 rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff); 00830 jrnlOff += 8+sizeof(aJournalMagic); 00831 pPager->needSync = !pPager->noSync; 00832 00833 /* If the pager is in peristent-journal mode, then the physical 00834 ** journal-file may extend past the end of the master-journal name 00835 ** and 8 bytes of magic data just written to the file. This is 00836 ** dangerous because the code to rollback a hot-journal file 00837 ** will not be able to find the master-journal name to determine 00838 ** whether or not the journal is hot. 00839 ** 00840 ** Easiest thing to do in this scenario is to truncate the journal 00841 ** file to the required size. 00842 */ 00843 if( (rc==SQLITE_OK) 00844 && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK 00845 && jrnlSize>jrnlOff 00846 ){ 00847 rc = sqlite3OsTruncate(pPager->jfd, jrnlOff); 00848 } 00849 return rc; 00850 } 00851 00852 /* 00853 ** Find a page in the hash table given its page number. Return 00854 ** a pointer to the page or NULL if not found. 00855 */ 00856 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ 00857 PgHdr *p; 00858 sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p); 00859 return p; 00860 } 00861 00862 /* 00863 ** Clear the in-memory cache. This routine 00864 ** sets the state of the pager back to what it was when it was first 00865 ** opened. Any outstanding pages are invalidated and subsequent attempts 00866 ** to access those pages will likely result in a coredump. 00867 */ 00868 static void pager_reset(Pager *pPager){ 00869 if( pPager->errCode ) return; 00870 sqlite3PcacheClear(pPager->pPCache); 00871 } 00872 00873 /* 00874 ** Unlock the database file. 00875 ** 00876 ** If the pager is currently in error state, discard the contents of 00877 ** the cache and reset the Pager structure internal state. If there is 00878 ** an open journal-file, then the next time a shared-lock is obtained 00879 ** on the pager file (by this or any other process), it will be 00880 ** treated as a hot-journal and rolled back. 00881 */ 00882 static void pager_unlock(Pager *pPager){ 00883 if( !pPager->exclusiveMode ){ 00884 int rc = osUnlock(pPager->fd, NO_LOCK); 00885 if( rc ) pPager->errCode = rc; 00886 pPager->dbSize = -1; 00887 IOTRACE(("UNLOCK %p\n", pPager)) 00888 00889 /* Always close the journal file when dropping the database lock. 00890 ** Otherwise, another connection with journal_mode=delete might 00891 ** delete the file out from under us. 00892 */ 00893 if( pPager->journalOpen ){ 00894 sqlite3OsClose(pPager->jfd); 00895 pPager->journalOpen = 0; 00896 sqlite3BitvecDestroy(pPager->pInJournal); 00897 pPager->pInJournal = 0; 00898 sqlite3BitvecDestroy(pPager->pAlwaysRollback); 00899 pPager->pAlwaysRollback = 0; 00900 } 00901 00902 /* If Pager.errCode is set, the contents of the pager cache cannot be 00903 ** trusted. Now that the pager file is unlocked, the contents of the 00904 ** cache can be discarded and the error code safely cleared. 00905 */ 00906 if( pPager->errCode ){ 00907 if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK; 00908 pager_reset(pPager); 00909 if( pPager->stmtOpen ){ 00910 sqlite3OsClose(pPager->stfd); 00911 sqlite3BitvecDestroy(pPager->pInStmt); 00912 pPager->pInStmt = 0; 00913 } 00914 pPager->stmtOpen = 0; 00915 pPager->stmtInUse = 0; 00916 pPager->journalOff = 0; 00917 pPager->journalStarted = 0; 00918 pPager->stmtAutoopen = 0; 00919 pPager->origDbSize = 0; 00920 } 00921 00922 pPager->state = PAGER_UNLOCK; 00923 pPager->changeCountDone = 0; 00924 } 00925 } 00926 00927 /* 00928 ** Execute a rollback if a transaction is active and unlock the 00929 ** database file. If the pager has already entered the error state, 00930 ** do not attempt the rollback. 00931 */ 00932 static void pagerUnlockAndRollback(Pager *p){ 00933 if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){ 00934 sqlite3BeginBenignMalloc(); 00935 sqlite3PagerRollback(p); 00936 sqlite3EndBenignMalloc(); 00937 } 00938 pager_unlock(p); 00939 } 00940 00941 /* 00942 ** This routine ends a transaction. A transaction is ended by either 00943 ** a COMMIT or a ROLLBACK. 00944 ** 00945 ** When this routine is called, the pager has the journal file open and 00946 ** a RESERVED or EXCLUSIVE lock on the database. This routine will release 00947 ** the database lock and acquires a SHARED lock in its place if that is 00948 ** the appropriate thing to do. Release locks usually is appropriate, 00949 ** unless we are in exclusive access mode or unless this is a 00950 ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation. 00951 ** 00952 ** The journal file is either deleted or truncated. 00953 ** 00954 ** TODO: Consider keeping the journal file open for temporary databases. 00955 ** This might give a performance improvement on windows where opening 00956 ** a file is an expensive operation. 00957 */ 00958 static int pager_end_transaction(Pager *pPager, int hasMaster){ 00959 int rc = SQLITE_OK; 00960 int rc2 = SQLITE_OK; 00961 if( pPager->state<PAGER_RESERVED ){ 00962 return SQLITE_OK; 00963 } 00964 sqlite3PagerStmtCommit(pPager); 00965 if( pPager->stmtOpen && !pPager->exclusiveMode ){ 00966 sqlite3OsClose(pPager->stfd); 00967 pPager->stmtOpen = 0; 00968 } 00969 if( pPager->journalOpen ){ 00970 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ 00971 int isMemoryJournal = sqlite3IsMemJournal(pPager->jfd); 00972 sqlite3OsClose(pPager->jfd); 00973 pPager->journalOpen = 0; 00974 if( !isMemoryJournal ){ 00975 rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); 00976 } 00977 }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE 00978 && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){ 00979 pPager->journalOff = 0; 00980 pPager->journalStarted = 0; 00981 }else if( pPager->exclusiveMode 00982 || pPager->journalMode==PAGER_JOURNALMODE_PERSIST 00983 ){ 00984 rc = zeroJournalHdr(pPager, hasMaster); 00985 pager_error(pPager, rc); 00986 pPager->journalOff = 0; 00987 pPager->journalStarted = 0; 00988 }else{ 00989 assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || rc ); 00990 sqlite3OsClose(pPager->jfd); 00991 pPager->journalOpen = 0; 00992 if( rc==SQLITE_OK && !pPager->tempFile ){ 00993 rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); 00994 } 00995 } 00996 sqlite3BitvecDestroy(pPager->pInJournal); 00997 pPager->pInJournal = 0; 00998 sqlite3BitvecDestroy(pPager->pAlwaysRollback); 00999 pPager->pAlwaysRollback = 0; 01000 sqlite3PcacheCleanAll(pPager->pPCache); 01001 #ifdef SQLITE_CHECK_PAGES 01002 sqlite3PcacheIterate(pPager->pPCache, pager_set_pagehash); 01003 #endif 01004 sqlite3PcacheClearFlags(pPager->pPCache, 01005 PGHDR_IN_JOURNAL | PGHDR_NEED_SYNC 01006 ); 01007 pPager->dirtyCache = 0; 01008 pPager->nRec = 0; 01009 }else{ 01010 assert( pPager->pInJournal==0 ); 01011 } 01012 01013 if( !pPager->exclusiveMode ){ 01014 rc2 = osUnlock(pPager->fd, SHARED_LOCK); 01015 pPager->state = PAGER_SHARED; 01016 }else if( pPager->state==PAGER_SYNCED ){ 01017 pPager->state = PAGER_EXCLUSIVE; 01018 } 01019 pPager->origDbSize = 0; 01020 pPager->setMaster = 0; 01021 pPager->needSync = 0; 01022 /* lruListSetFirstSynced(pPager); */ 01023 if( !MEMDB ){ 01024 pPager->dbSize = -1; 01025 } 01026 pPager->dbModified = 0; 01027 01028 return (rc==SQLITE_OK?rc2:rc); 01029 } 01030 01031 /* 01032 ** Compute and return a checksum for the page of data. 01033 ** 01034 ** This is not a real checksum. It is really just the sum of the 01035 ** random initial value and the page number. We experimented with 01036 ** a checksum of the entire data, but that was found to be too slow. 01037 ** 01038 ** Note that the page number is stored at the beginning of data and 01039 ** the checksum is stored at the end. This is important. If journal 01040 ** corruption occurs due to a power failure, the most likely scenario 01041 ** is that one end or the other of the record will be changed. It is 01042 ** much less likely that the two ends of the journal record will be 01043 ** correct and the middle be corrupt. Thus, this "checksum" scheme, 01044 ** though fast and simple, catches the mostly likely kind of corruption. 01045 ** 01046 ** FIX ME: Consider adding every 200th (or so) byte of the data to the 01047 ** checksum. That way if a single page spans 3 or more disk sectors and 01048 ** only the middle sector is corrupt, we will still have a reasonable 01049 ** chance of failing the checksum and thus detecting the problem. 01050 */ 01051 static u32 pager_cksum(Pager *pPager, const u8 *aData){ 01052 u32 cksum = pPager->cksumInit; 01053 int i = pPager->pageSize-200; 01054 while( i>0 ){ 01055 cksum += aData[i]; 01056 i -= 200; 01057 } 01058 return cksum; 01059 } 01060 01061 /* Forward declaration */ 01062 static void makeClean(PgHdr*); 01063 01064 /* 01065 ** Read a single page from the journal file opened on file descriptor 01066 ** jfd. Playback this one page. 01067 ** 01068 ** The isMainJrnl flag is true if this is the main rollback journal and 01069 ** false for the statement journal. The main rollback journal uses 01070 ** checksums - the statement journal does not. 01071 */ 01072 static int pager_playback_one_page( 01073 Pager *pPager, /* The pager being played back */ 01074 sqlite3_file *jfd, /* The file that is the journal being rolled back */ 01075 i64 offset, /* Offset of the page within the journal */ 01076 int isMainJrnl /* True for main rollback journal. False for Stmt jrnl */ 01077 ){ 01078 int rc; 01079 PgHdr *pPg; /* An existing page in the cache */ 01080 Pgno pgno; /* The page number of a page in journal */ 01081 u32 cksum; /* Checksum used for sanity checking */ 01082 u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */ 01083 01084 /* isMainJrnl should be true for the main journal and false for 01085 ** statement journals. Verify that this is always the case 01086 */ 01087 assert( jfd == (isMainJrnl ? pPager->jfd : pPager->stfd) ); 01088 assert( aData ); 01089 01090 rc = read32bits(jfd, offset, &pgno); 01091 if( rc!=SQLITE_OK ) return rc; 01092 rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4); 01093 if( rc!=SQLITE_OK ) return rc; 01094 pPager->journalOff += pPager->pageSize + 4; 01095 01096 /* Sanity checking on the page. This is more important that I originally 01097 ** thought. If a power failure occurs while the journal is being written, 01098 ** it could cause invalid data to be written into the journal. We need to 01099 ** detect this invalid data (with high probability) and ignore it. 01100 */ 01101 if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ 01102 return SQLITE_DONE; 01103 } 01104 if( pgno>(unsigned)pPager->dbSize ){ 01105 return SQLITE_OK; 01106 } 01107 if( isMainJrnl ){ 01108 rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum); 01109 if( rc ) return rc; 01110 pPager->journalOff += 4; 01111 if( pager_cksum(pPager, aData)!=cksum ){ 01112 return SQLITE_DONE; 01113 } 01114 } 01115 01116 assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE ); 01117 01118 /* If the pager is in RESERVED state, then there must be a copy of this 01119 ** page in the pager cache. In this case just update the pager cache, 01120 ** not the database file. The page is left marked dirty in this case. 01121 ** 01122 ** An exception to the above rule: If the database is in no-sync mode 01123 ** and a page is moved during an incremental vacuum then the page may 01124 ** not be in the pager cache. Later: if a malloc() or IO error occurs 01125 ** during a Movepage() call, then the page may not be in the cache 01126 ** either. So the condition described in the above paragraph is not 01127 ** assert()able. 01128 ** 01129 ** If in EXCLUSIVE state, then we update the pager cache if it exists 01130 ** and the main file. The page is then marked not dirty. 01131 ** 01132 ** Ticket #1171: The statement journal might contain page content that is 01133 ** different from the page content at the start of the transaction. 01134 ** This occurs when a page is changed prior to the start of a statement 01135 ** then changed again within the statement. When rolling back such a 01136 ** statement we must not write to the original database unless we know 01137 ** for certain that original page contents are synced into the main rollback 01138 ** journal. Otherwise, a power loss might leave modified data in the 01139 ** database file without an entry in the rollback journal that can 01140 ** restore the database to its original form. Two conditions must be 01141 ** met before writing to the database files. (1) the database must be 01142 ** locked. (2) we know that the original page content is fully synced 01143 ** in the main journal either because the page is not in cache or else 01144 ** the page is marked as needSync==0. 01145 ** 01146 ** 2008-04-14: When attempting to vacuum a corrupt database file, it 01147 ** is possible to fail a statement on a database that does not yet exist. 01148 ** Do not attempt to write if database file has never been opened. 01149 */ 01150 pPg = pager_lookup(pPager, pgno); 01151 PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n", 01152 PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData)); 01153 if( (pPager->state>=PAGER_EXCLUSIVE) 01154 && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC)) 01155 && (pPager->fd->pMethods) 01156 ){ 01157 i64 ofst = (pgno-1)*(i64)pPager->pageSize; 01158 rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst); 01159 } 01160 if( pPg ){ 01161 /* No page should ever be explicitly rolled back that is in use, except 01162 ** for page 1 which is held in use in order to keep the lock on the 01163 ** database active. However such a page may be rolled back as a result 01164 ** of an internal error resulting in an automatic call to 01165 ** sqlite3PagerRollback(). 01166 */ 01167 void *pData; 01168 pData = pPg->pData; 01169 memcpy(pData, aData, pPager->pageSize); 01170 if( pPager->xReiniter ){ 01171 pPager->xReiniter(pPg); 01172 } 01173 if( isMainJrnl ) makeClean(pPg); 01174 #ifdef SQLITE_CHECK_PAGES 01175 pPg->pageHash = pager_pagehash(pPg); 01176 #endif 01177 /* If this was page 1, then restore the value of Pager.dbFileVers. 01178 ** Do this before any decoding. */ 01179 if( pgno==1 ){ 01180 memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); 01181 } 01182 01183 /* Decode the page just read from disk */ 01184 CODEC1(pPager, pData, pPg->pgno, 3); 01185 sqlite3PcacheRelease(pPg); 01186 } 01187 return rc; 01188 } 01189 01190 /* 01191 ** Parameter zMaster is the name of a master journal file. A single journal 01192 ** file that referred to the master journal file has just been rolled back. 01193 ** This routine checks if it is possible to delete the master journal file, 01194 ** and does so if it is. 01195 ** 01196 ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 01197 ** available for use within this function. 01198 ** 01199 ** 01200 ** The master journal file contains the names of all child journals. 01201 ** To tell if a master journal can be deleted, check to each of the 01202 ** children. If all children are either missing or do not refer to 01203 ** a different master journal, then this master journal can be deleted. 01204 */ 01205 static int pager_delmaster(Pager *pPager, const char *zMaster){ 01206 sqlite3_vfs *pVfs = pPager->pVfs; 01207 int rc; 01208 int master_open = 0; 01209 sqlite3_file *pMaster; 01210 sqlite3_file *pJournal; 01211 char *zMasterJournal = 0; /* Contents of master journal file */ 01212 i64 nMasterJournal; /* Size of master journal file */ 01213 01214 /* Open the master journal file exclusively in case some other process 01215 ** is running this routine also. Not that it makes too much difference. 01216 */ 01217 pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2); 01218 pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile); 01219 if( !pMaster ){ 01220 rc = SQLITE_NOMEM; 01221 }else{ 01222 int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL); 01223 rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0); 01224 } 01225 if( rc!=SQLITE_OK ) goto delmaster_out; 01226 master_open = 1; 01227 01228 rc = sqlite3OsFileSize(pMaster, &nMasterJournal); 01229 if( rc!=SQLITE_OK ) goto delmaster_out; 01230 01231 if( nMasterJournal>0 ){ 01232 char *zJournal; 01233 char *zMasterPtr = 0; 01234 int nMasterPtr = pPager->pVfs->mxPathname+1; 01235 01236 /* Load the entire master journal file into space obtained from 01237 ** sqlite3_malloc() and pointed to by zMasterJournal. 01238 */ 01239 zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr); 01240 if( !zMasterJournal ){ 01241 rc = SQLITE_NOMEM; 01242 goto delmaster_out; 01243 } 01244 zMasterPtr = &zMasterJournal[nMasterJournal]; 01245 rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0); 01246 if( rc!=SQLITE_OK ) goto delmaster_out; 01247 01248 zJournal = zMasterJournal; 01249 while( (zJournal-zMasterJournal)<nMasterJournal ){ 01250 int exists; 01251 rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists); 01252 if( rc!=SQLITE_OK ){ 01253 goto delmaster_out; 01254 } 01255 if( exists ){ 01256 /* One of the journals pointed to by the master journal exists. 01257 ** Open it and check if it points at the master journal. If 01258 ** so, return without deleting the master journal file. 01259 */ 01260 int c; 01261 int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL); 01262 rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0); 01263 if( rc!=SQLITE_OK ){ 01264 goto delmaster_out; 01265 } 01266 01267 rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr); 01268 sqlite3OsClose(pJournal); 01269 if( rc!=SQLITE_OK ){ 01270 goto delmaster_out; 01271 } 01272 01273 c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0; 01274 if( c ){ 01275 /* We have a match. Do not delete the master journal file. */ 01276 goto delmaster_out; 01277 } 01278 } 01279 zJournal += (strlen(zJournal)+1); 01280 } 01281 } 01282 01283 rc = sqlite3OsDelete(pVfs, zMaster, 0); 01284 01285 delmaster_out: 01286 if( zMasterJournal ){ 01287 sqlite3_free(zMasterJournal); 01288 } 01289 if( master_open ){ 01290 sqlite3OsClose(pMaster); 01291 } 01292 sqlite3_free(pMaster); 01293 return rc; 01294 } 01295 01296 01297 static void pager_truncate_cache(Pager *pPager); 01298 01299 /* 01300 ** Truncate the main file of the given pager to the number of pages 01301 ** indicated. Also truncate the cached representation of the file. 01302 ** 01303 ** Might might be the case that the file on disk is smaller than nPage. 01304 ** This can happen, for example, if we are in the middle of a transaction 01305 ** which has extended the file size and the new pages are still all held 01306 ** in cache, then an INSERT or UPDATE does a statement rollback. Some 01307 ** operating system implementations can get confused if you try to 01308 ** truncate a file to some size that is larger than it currently is, 01309 ** so detect this case and write a single zero byte to the end of the new 01310 ** file instead. 01311 */ 01312 static int pager_truncate(Pager *pPager, int nPage){ 01313 int rc = SQLITE_OK; 01314 if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){ 01315 i64 currentSize, newSize; 01316 rc = sqlite3OsFileSize(pPager->fd, ¤tSize); 01317 newSize = pPager->pageSize*(i64)nPage; 01318 if( rc==SQLITE_OK && currentSize!=newSize ){ 01319 if( currentSize>newSize ){ 01320 rc = sqlite3OsTruncate(pPager->fd, newSize); 01321 }else{ 01322 rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1); 01323 } 01324 } 01325 } 01326 if( rc==SQLITE_OK ){ 01327 pPager->dbSize = nPage; 01328 pager_truncate_cache(pPager); 01329 } 01330 return rc; 01331 } 01332 01333 /* 01334 ** Set the sectorSize for the given pager. 01335 ** 01336 ** The sector size is at least as big as the sector size reported 01337 ** by sqlite3OsSectorSize(). The minimum sector size is 512. 01338 */ 01339 static void setSectorSize(Pager *pPager){ 01340 assert(pPager->fd->pMethods||pPager->tempFile); 01341 if( !pPager->tempFile ){ 01342 /* Sector size doesn't matter for temporary files. Also, the file 01343 ** may not have been opened yet, in whcih case the OsSectorSize() 01344 ** call will segfault. 01345 */ 01346 pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); 01347 } 01348 if( pPager->sectorSize<512 ){ 01349 pPager->sectorSize = 512; 01350 } 01351 } 01352 01353 /* 01354 ** Playback the journal and thus restore the database file to 01355 ** the state it was in before we started making changes. 01356 ** 01357 ** The journal file format is as follows: 01358 ** 01359 ** (1) 8 byte prefix. A copy of aJournalMagic[]. 01360 ** (2) 4 byte big-endian integer which is the number of valid page records 01361 ** in the journal. If this value is 0xffffffff, then compute the 01362 ** number of page records from the journal size. 01363 ** (3) 4 byte big-endian integer which is the initial value for the 01364 ** sanity checksum. 01365 ** (4) 4 byte integer which is the number of pages to truncate the 01366 ** database to during a rollback. 01367 ** (5) 4 byte big-endian integer which is the sector size. The header 01368 ** is this many bytes in size. 01369 ** (6) 4 byte big-endian integer which is the page case. 01370 ** (7) 4 byte integer which is the number of bytes in the master journal 01371 ** name. The value may be zero (indicate that there is no master 01372 ** journal.) 01373 ** (8) N bytes of the master journal name. The name will be nul-terminated 01374 ** and might be shorter than the value read from (5). If the first byte 01375 ** of the name is \000 then there is no master journal. The master 01376 ** journal name is stored in UTF-8. 01377 ** (9) Zero or more pages instances, each as follows: 01378 ** + 4 byte page number. 01379 ** + pPager->pageSize bytes of data. 01380 ** + 4 byte checksum 01381 ** 01382 ** When we speak of the journal header, we mean the first 8 items above. 01383 ** Each entry in the journal is an instance of the 9th item. 01384 ** 01385 ** Call the value from the second bullet "nRec". nRec is the number of 01386 ** valid page entries in the journal. In most cases, you can compute the 01387 ** value of nRec from the size of the journal file. But if a power 01388 ** failure occurred while the journal was being written, it could be the 01389 ** case that the size of the journal file had already been increased but 01390 ** the extra entries had not yet made it safely to disk. In such a case, 01391 ** the value of nRec computed from the file size would be too large. For 01392 ** that reason, we always use the nRec value in the header. 01393 ** 01394 ** If the nRec value is 0xffffffff it means that nRec should be computed 01395 ** from the file size. This value is used when the user selects the 01396 ** no-sync option for the journal. A power failure could lead to corruption 01397 ** in this case. But for things like temporary table (which will be 01398 ** deleted when the power is restored) we don't care. 01399 ** 01400 ** If the file opened as the journal file is not a well-formed 01401 ** journal file then all pages up to the first corrupted page are rolled 01402 ** back (or no pages if the journal header is corrupted). The journal file 01403 ** is then deleted and SQLITE_OK returned, just as if no corruption had 01404 ** been encountered. 01405 ** 01406 ** If an I/O or malloc() error occurs, the journal-file is not deleted 01407 ** and an error code is returned. 01408 */ 01409 static int pager_playback(Pager *pPager, int isHot){ 01410 sqlite3_vfs *pVfs = pPager->pVfs; 01411 i64 szJ; /* Size of the journal file in bytes */ 01412 u32 nRec; /* Number of Records in the journal */ 01413 u32 u; /* Unsigned loop counter */ 01414 Pgno mxPg = 0; /* Size of the original file in pages */ 01415 int rc; /* Result code of a subroutine */ 01416 int res = 1; /* Value returned by sqlite3OsAccess() */ 01417 char *zMaster = 0; /* Name of master journal file if any */ 01418 01419 /* Figure out how many records are in the journal. Abort early if 01420 ** the journal is empty. 01421 */ 01422 assert( pPager->journalOpen ); 01423 rc = sqlite3OsFileSize(pPager->jfd, &szJ); 01424 if( rc!=SQLITE_OK || szJ==0 ){ 01425 goto end_playback; 01426 } 01427 01428 /* Read the master journal name from the journal, if it is present. 01429 ** If a master journal file name is specified, but the file is not 01430 ** present on disk, then the journal is not hot and does not need to be 01431 ** played back. 01432 */ 01433 zMaster = pPager->pTmpSpace; 01434 rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); 01435 if( rc==SQLITE_OK && zMaster[0] ){ 01436 rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res); 01437 } 01438 zMaster = 0; 01439 if( rc!=SQLITE_OK || !res ){ 01440 goto end_playback; 01441 } 01442 pPager->journalOff = 0; 01443 01444 /* This loop terminates either when the readJournalHdr() call returns 01445 ** SQLITE_DONE or an IO error occurs. */ 01446 while( 1 ){ 01447 01448 /* Read the next journal header from the journal file. If there are 01449 ** not enough bytes left in the journal file for a complete header, or 01450 ** it is corrupted, then a process must of failed while writing it. 01451 ** This indicates nothing more needs to be rolled back. 01452 */ 01453 rc = readJournalHdr(pPager, szJ, &nRec, &mxPg); 01454 if( rc!=SQLITE_OK ){ 01455 if( rc==SQLITE_DONE ){ 01456 rc = SQLITE_OK; 01457 } 01458 goto end_playback; 01459 } 01460 01461 /* If nRec is 0xffffffff, then this journal was created by a process 01462 ** working in no-sync mode. This means that the rest of the journal 01463 ** file consists of pages, there are no more journal headers. Compute 01464 ** the value of nRec based on this assumption. 01465 */ 01466 if( nRec==0xffffffff ){ 01467 assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); 01468 nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager); 01469 } 01470 01471 /* If nRec is 0 and this rollback is of a transaction created by this 01472 ** process and if this is the final header in the journal, then it means 01473 ** that this part of the journal was being filled but has not yet been 01474 ** synced to disk. Compute the number of pages based on the remaining 01475 ** size of the file. 01476 ** 01477 ** The third term of the test was added to fix ticket #2565. 01478 */ 01479 if( nRec==0 && !isHot && 01480 pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ 01481 nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager); 01482 } 01483 01484 /* If this is the first header read from the journal, truncate the 01485 ** database file back to its original size. 01486 */ 01487 if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ 01488 rc = pager_truncate(pPager, mxPg); 01489 if( rc!=SQLITE_OK ){ 01490 goto end_playback; 01491 } 01492 } 01493 01494 /* Copy original pages out of the journal and back into the database file. 01495 */ 01496 for(u=0; u<nRec; u++){ 01497 rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1); 01498 if( rc!=SQLITE_OK ){ 01499 if( rc==SQLITE_DONE ){ 01500 rc = SQLITE_OK; 01501 pPager->journalOff = szJ; 01502 break; 01503 }else{ 01504 /* If we are unable to rollback, then the database is probably 01505 ** going to end up being corrupt. It is corrupt to us, anyhow. 01506 ** Perhaps the next process to come along can fix it.... 01507 */ 01508 rc = SQLITE_CORRUPT_BKPT; 01509 goto end_playback; 01510 } 01511 } 01512 } 01513 } 01514 /*NOTREACHED*/ 01515 assert( 0 ); 01516 01517 end_playback: 01518 if( rc==SQLITE_OK ){ 01519 zMaster = pPager->pTmpSpace; 01520 rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); 01521 } 01522 if( rc==SQLITE_OK ){ 01523 rc = pager_end_transaction(pPager, zMaster[0]!='\0'); 01524 } 01525 if( rc==SQLITE_OK && zMaster[0] && res ){ 01526 /* If there was a master journal and this routine will return success, 01527 ** see if it is possible to delete the master journal. 01528 */ 01529 rc = pager_delmaster(pPager, zMaster); 01530 } 01531 01532 /* The Pager.sectorSize variable may have been updated while rolling 01533 ** back a journal created by a process with a different sector size 01534 ** value. Reset it to the correct value for this process. 01535 */ 01536 setSectorSize(pPager); 01537 return rc; 01538 } 01539 01540 /* 01541 ** Playback the statement journal. 01542 ** 01543 ** This is similar to playing back the transaction journal but with 01544 ** a few extra twists. 01545 ** 01546 ** (1) The number of pages in the database file at the start of 01547 ** the statement is stored in pPager->stmtSize, not in the 01548 ** journal file itself. 01549 ** 01550 ** (2) In addition to playing back the statement journal, also 01551 ** playback all pages of the transaction journal beginning 01552 ** at offset pPager->stmtJSize. 01553 */ 01554 static int pager_stmt_playback(Pager *pPager){ 01555 i64 szJ; /* Size of the full journal */ 01556 i64 hdrOff; 01557 int nRec; /* Number of Records */ 01558 int i; /* Loop counter */ 01559 int rc; 01560 01561 szJ = pPager->journalOff; 01562 01563 /* Set hdrOff to be the offset just after the end of the last journal 01564 ** page written before the first journal-header for this statement 01565 ** transaction was written, or the end of the file if no journal 01566 ** header was written. 01567 */ 01568 hdrOff = pPager->stmtHdrOff; 01569 assert( pPager->fullSync || !hdrOff ); 01570 if( !hdrOff ){ 01571 hdrOff = szJ; 01572 } 01573 01574 /* Truncate the database back to its original size. 01575 */ 01576 rc = pager_truncate(pPager, pPager->stmtSize); 01577 assert( pPager->state>=PAGER_SHARED ); 01578 01579 /* Figure out how many records are in the statement journal. 01580 */ 01581 assert( pPager->stmtInUse && pPager->journalOpen ); 01582 nRec = pPager->stmtNRec; 01583 01584 /* Copy original pages out of the statement journal and back into the 01585 ** database file. Note that the statement journal omits checksums from 01586 ** each record since power-failure recovery is not important to statement 01587 ** journals. 01588 */ 01589 for(i=0; i<nRec; i++){ 01590 i64 offset = i*(4+pPager->pageSize); 01591 rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0); 01592 assert( rc!=SQLITE_DONE ); 01593 if( rc!=SQLITE_OK ) goto end_stmt_playback; 01594 } 01595 01596 /* Now roll some pages back from the transaction journal. Pager.stmtJSize 01597 ** was the size of the journal file when this statement was started, so 01598 ** everything after that needs to be rolled back, either into the 01599 ** database, the memory cache, or both. 01600 ** 01601 ** If it is not zero, then Pager.stmtHdrOff is the offset to the start 01602 ** of the first journal header written during this statement transaction. 01603 */ 01604 pPager->journalOff = pPager->stmtJSize; 01605 pPager->cksumInit = pPager->stmtCksum; 01606 while( pPager->journalOff < hdrOff ){ 01607 rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1); 01608 assert( rc!=SQLITE_DONE ); 01609 if( rc!=SQLITE_OK ) goto end_stmt_playback; 01610 } 01611 01612 while( pPager->journalOff < szJ ){ 01613 u32 nJRec; /* Number of Journal Records */ 01614 u32 dummy; 01615 rc = readJournalHdr(pPager, szJ, &nJRec, &dummy); 01616 if( rc!=SQLITE_OK ){ 01617 assert( rc!=SQLITE_DONE ); 01618 goto end_stmt_playback; 01619 } 01620 if( nJRec==0 ){ 01621 nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8); 01622 } 01623 for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){ 01624 rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1); 01625 assert( rc!=SQLITE_DONE ); 01626 if( rc!=SQLITE_OK ) goto end_stmt_playback; 01627 } 01628 } 01629 01630 pPager->journalOff = szJ; 01631 01632 end_stmt_playback: 01633 if( rc==SQLITE_OK) { 01634 pPager->journalOff = szJ; 01635 /* pager_reload_cache(pPager); */ 01636 } 01637 return rc; 01638 } 01639 01640 /* 01641 ** Change the maximum number of in-memory pages that are allowed. 01642 */ 01643 void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ 01644 sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); 01645 } 01646 01647 /* 01648 ** Adjust the robustness of the database to damage due to OS crashes 01649 ** or power failures by changing the number of syncs()s when writing 01650 ** the rollback journal. There are three levels: 01651 ** 01652 ** OFF sqlite3OsSync() is never called. This is the default 01653 ** for temporary and transient files. 01654 ** 01655 ** NORMAL The journal is synced once before writes begin on the 01656 ** database. This is normally adequate protection, but 01657 ** it is theoretically possible, though very unlikely, 01658 ** that an inopertune power failure could leave the journal 01659 ** in a state which would cause damage to the database 01660 ** when it is rolled back. 01661 ** 01662 ** FULL The journal is synced twice before writes begin on the 01663 ** database (with some additional information - the nRec field 01664 ** of the journal header - being written in between the two 01665 ** syncs). If we assume that writing a 01666 ** single disk sector is atomic, then this mode provides 01667 ** assurance that the journal will not be corrupted to the 01668 ** point of causing damage to the database during rollback. 01669 ** 01670 ** Numeric values associated with these states are OFF==1, NORMAL=2, 01671 ** and FULL=3. 01672 */ 01673 #ifndef SQLITE_OMIT_PAGER_PRAGMAS 01674 void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){ 01675 pPager->noSync = level==1 || pPager->tempFile; 01676 pPager->fullSync = level==3 && !pPager->tempFile; 01677 pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL); 01678 if( pPager->noSync ) pPager->needSync = 0; 01679 } 01680 #endif 01681 01682 /* 01683 ** The following global variable is incremented whenever the library 01684 ** attempts to open a temporary file. This information is used for 01685 ** testing and analysis only. 01686 */ 01687 #ifdef SQLITE_TEST 01688 int sqlite3_opentemp_count = 0; 01689 #endif 01690 01691 /* 01692 ** Open a temporary file. 01693 ** 01694 ** Write the file descriptor into *fd. Return SQLITE_OK on success or some 01695 ** other error code if we fail. The OS will automatically delete the temporary 01696 ** file when it is closed. 01697 */ 01698 static int sqlite3PagerOpentemp( 01699 Pager *pPager, /* The pager object */ 01700 sqlite3_file *pFile, /* Write the file descriptor here */ 01701 int vfsFlags /* Flags passed through to the VFS */ 01702 ){ 01703 int rc; 01704 01705 #ifdef SQLITE_TEST 01706 sqlite3_opentemp_count++; /* Used for testing and analysis only */ 01707 #endif 01708 01709 vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | 01710 SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; 01711 rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0); 01712 assert( rc!=SQLITE_OK || pFile->pMethods ); 01713 return rc; 01714 } 01715 01716 static int pagerStress(void *,PgHdr *); 01717 01718 /* 01719 ** Create a new page cache and put a pointer to the page cache in *ppPager. 01720 ** The file to be cached need not exist. The file is not locked until 01721 ** the first call to sqlite3PagerGet() and is only held open until the 01722 ** last page is released using sqlite3PagerUnref(). 01723 ** 01724 ** If zFilename is NULL then a randomly-named temporary file is created 01725 ** and used as the file to be cached. The file will be deleted 01726 ** automatically when it is closed. 01727 ** 01728 ** If zFilename is ":memory:" then all information is held in cache. 01729 ** It is never written to disk. This can be used to implement an 01730 ** in-memory database. 01731 */ 01732 int sqlite3PagerOpen( 01733 sqlite3_vfs *pVfs, /* The virtual file system to use */ 01734 Pager **ppPager, /* Return the Pager structure here */ 01735 const char *zFilename, /* Name of the database file to open */ 01736 int nExtra, /* Extra bytes append to each in-memory page */ 01737 int flags, /* flags controlling this file */ 01738 int vfsFlags /* flags passed through to sqlite3_vfs.xOpen() */ 01739 ){ 01740 u8 *pPtr; 01741 Pager *pPager = 0; 01742 int rc = SQLITE_OK; 01743 int i; 01744 int tempFile = 0; 01745 int memDb = 0; 01746 int readOnly = 0; 01747 int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; 01748 int noReadlock = (flags & PAGER_NO_READLOCK)!=0; 01749 int journalFileSize; 01750 int pcacheSize = sqlite3PcacheSize(); 01751 int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; 01752 char *zPathname = 0; 01753 int nPathname = 0; 01754 01755 if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){ 01756 journalFileSize = sqlite3JournalSize(pVfs); 01757 }else{ 01758 journalFileSize = sqlite3MemJournalSize(); 01759 } 01760 01761 /* The default return is a NULL pointer */ 01762 *ppPager = 0; 01763 01764 /* Compute and store the full pathname in an allocated buffer pointed 01765 ** to by zPathname, length nPathname. Or, if this is a temporary file, 01766 ** leave both nPathname and zPathname set to 0. 01767 */ 01768 if( zFilename && zFilename[0] ){ 01769 nPathname = pVfs->mxPathname+1; 01770 zPathname = sqlite3Malloc(nPathname*2); 01771 if( zPathname==0 ){ 01772 return SQLITE_NOMEM; 01773 } 01774 #ifndef SQLITE_OMIT_MEMORYDB 01775 if( strcmp(zFilename,":memory:")==0 ){ 01776 memDb = 1; 01777 zPathname[0] = 0; 01778 }else 01779 #endif 01780 { 01781 rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname); 01782 } 01783 if( rc!=SQLITE_OK ){ 01784 sqlite3_free(zPathname); 01785 return rc; 01786 } 01787 nPathname = strlen(zPathname); 01788 } 01789 01790 /* Allocate memory for the pager structure */ 01791 pPager = sqlite3MallocZero( 01792 sizeof(*pPager) + /* Pager structure */ 01793 pcacheSize + /* PCache object */ 01794 journalFileSize + /* The journal file structure */ 01795 pVfs->szOsFile + /* The main db file */ 01796 journalFileSize * 2 + /* The two journal files */ 01797 3*nPathname + 40 /* zFilename, zDirectory, zJournal */ 01798 ); 01799 if( !pPager ){ 01800 sqlite3_free(zPathname); 01801 return SQLITE_NOMEM; 01802 } 01803 pPager->pPCache = (PCache *)&pPager[1]; 01804 pPtr = ((u8 *)&pPager[1]) + pcacheSize; 01805 pPager->vfsFlags = vfsFlags; 01806 pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0]; 01807 pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile]; 01808 pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile+journalFileSize]; 01809 pPager->zFilename = (char*)&pPtr[pVfs->szOsFile+2*journalFileSize]; 01810 pPager->zDirectory = &pPager->zFilename[nPathname+1]; 01811 pPager->zJournal = &pPager->zDirectory[nPathname+1]; 01812 pPager->pVfs = pVfs; 01813 if( zPathname ){ 01814 memcpy(pPager->zFilename, zPathname, nPathname+1); 01815 sqlite3_free(zPathname); 01816 } 01817 01818 /* Open the pager file. 01819 */ 01820 if( zFilename && zFilename[0] && !memDb ){ 01821 if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){ 01822 rc = SQLITE_CANTOPEN; 01823 }else{ 01824 int fout = 0; 01825 rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, 01826 pPager->vfsFlags, &fout); 01827 readOnly = (fout&SQLITE_OPEN_READONLY); 01828 01829 /* If the file was successfully opened for read/write access, 01830 ** choose a default page size in case we have to create the 01831 ** database file. The default page size is the maximum of: 01832 ** 01833 ** + SQLITE_DEFAULT_PAGE_SIZE, 01834 ** + The value returned by sqlite3OsSectorSize() 01835 ** + The largest page size that can be written atomically. 01836 */ 01837 if( rc==SQLITE_OK && !readOnly ){ 01838 int iSectorSize = sqlite3OsSectorSize(pPager->fd); 01839 if( szPageDflt<iSectorSize ){ 01840 szPageDflt = iSectorSize; 01841 } 01842 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 01843 { 01844 int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); 01845 int ii; 01846 assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); 01847 assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); 01848 assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536); 01849 for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){ 01850 if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii; 01851 } 01852 } 01853 #endif 01854 if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){ 01855 szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE; 01856 } 01857 } 01858 } 01859 }else{ 01860 /* If a temporary file is requested, it is not opened immediately. 01861 ** In this case we accept the default page size and delay actually 01862 ** opening the file until the first call to OsWrite(). 01863 ** 01864 ** This branch is also run for an in-memory database. An in-memory 01865 ** database is the same as a temp-file that is never written out to 01866 ** disk and uses an in-memory rollback journal. 01867 */ 01868 tempFile = 1; 01869 pPager->state = PAGER_EXCLUSIVE; 01870 } 01871 01872 if( pPager && rc==SQLITE_OK ){ 01873 pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt); 01874 } 01875 01876 /* If an error occured in either of the blocks above. 01877 ** Free the Pager structure and close the file. 01878 ** Since the pager is not allocated there is no need to set 01879 ** any Pager.errMask variables. 01880 */ 01881 if( !pPager || !pPager->pTmpSpace ){ 01882 sqlite3OsClose(pPager->fd); 01883 sqlite3_free(pPager); 01884 return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc); 01885 } 01886 nExtra = FORCE_ALIGNMENT(nExtra); 01887 sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, 01888 !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); 01889 01890 PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename); 01891 IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) 01892 01893 /* Fill in Pager.zDirectory[] */ 01894 memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1); 01895 for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){} 01896 if( i>0 ) pPager->zDirectory[i-1] = 0; 01897 01898 /* Fill in Pager.zJournal[] */ 01899 if( zPathname ){ 01900 memcpy(pPager->zJournal, pPager->zFilename, nPathname); 01901 memcpy(&pPager->zJournal[nPathname], "-journal", 9); 01902 }else{ 01903 pPager->zJournal = 0; 01904 } 01905 01906 /* pPager->journalOpen = 0; */ 01907 pPager->useJournal = useJournal; 01908 pPager->noReadlock = noReadlock && readOnly; 01909 /* pPager->stmtOpen = 0; */ 01910 /* pPager->stmtInUse = 0; */ 01911 /* pPager->nRef = 0; */ 01912 pPager->dbSize = memDb-1; 01913 pPager->pageSize = szPageDflt; 01914 /* pPager->stmtSize = 0; */ 01915 /* pPager->stmtJSize = 0; */ 01916 /* pPager->nPage = 0; */ 01917 pPager->mxPage = 100; 01918 pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; 01919 /* pPager->state = PAGER_UNLOCK; */ 01920 assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) ); 01921 /* pPager->errMask = 0; */ 01922 pPager->tempFile = tempFile; 01923 assert( tempFile==PAGER_LOCKINGMODE_NORMAL 01924 || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE ); 01925 assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 ); 01926 pPager->exclusiveMode = tempFile; 01927 pPager->memDb = memDb; 01928 pPager->readOnly = readOnly; 01929 /* pPager->needSync = 0; */ 01930 pPager->noSync = pPager->tempFile || !useJournal; 01931 pPager->fullSync = (pPager->noSync?0:1); 01932 pPager->sync_flags = SQLITE_SYNC_NORMAL; 01933 /* pPager->pFirst = 0; */ 01934 /* pPager->pFirstSynced = 0; */ 01935 /* pPager->pLast = 0; */ 01936 pPager->nExtra = nExtra; 01937 pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT; 01938 assert(pPager->fd->pMethods||tempFile); 01939 setSectorSize(pPager); 01940 if( memDb ){ 01941 pPager->journalMode = PAGER_JOURNALMODE_MEMORY; 01942 } 01943 /* pPager->pBusyHandler = 0; */ 01944 /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ 01945 *ppPager = pPager; 01946 return SQLITE_OK; 01947 } 01948 01949 /* 01950 ** Set the busy handler function. 01951 */ 01952 void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){ 01953 pPager->pBusyHandler = pBusyHandler; 01954 } 01955 01956 /* 01957 ** Set the reinitializer for this pager. If not NULL, the reinitializer 01958 ** is called when the content of a page in cache is restored to its original 01959 ** value as a result of a rollback. The callback gives higher-level code 01960 ** an opportunity to restore the EXTRA section to agree with the restored 01961 ** page data. 01962 */ 01963 void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){ 01964 pPager->xReiniter = xReinit; 01965 } 01966 01967 /* 01968 ** Set the page size to *pPageSize. If the suggest new page size is 01969 ** inappropriate, then an alternative page size is set to that 01970 ** value before returning. 01971 */ 01972 int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){ 01973 int rc = pPager->errCode; 01974 if( rc==SQLITE_OK ){ 01975 u16 pageSize = *pPageSize; 01976 assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); 01977 if( pageSize && pageSize!=pPager->pageSize 01978 && (pPager->memDb==0 || pPager->dbSize==0) 01979 && sqlite3PcacheRefCount(pPager->pPCache)==0 01980 ){ 01981 char *pNew = (char *)sqlite3PageMalloc(pageSize); 01982 if( !pNew ){ 01983 rc = SQLITE_NOMEM; 01984 }else{ 01985 pager_reset(pPager); 01986 pPager->pageSize = pageSize; 01987 if( !pPager->memDb ) setSectorSize(pPager); 01988 sqlite3PageFree(pPager->pTmpSpace); 01989 pPager->pTmpSpace = pNew; 01990 sqlite3PcacheSetPageSize(pPager->pPCache, pageSize); 01991 } 01992 } 01993 *pPageSize = pPager->pageSize; 01994 } 01995 return rc; 01996 } 01997 01998 /* 01999 ** Return a pointer to the "temporary page" buffer held internally 02000 ** by the pager. This is a buffer that is big enough to hold the 02001 ** entire content of a database page. This buffer is used internally 02002 ** during rollback and will be overwritten whenever a rollback 02003 ** occurs. But other modules are free to use it too, as long as 02004 ** no rollbacks are happening. 02005 */ 02006 void *sqlite3PagerTempSpace(Pager *pPager){ 02007 return pPager->pTmpSpace; 02008 } 02009 02010 /* 02011 ** Attempt to set the maximum database page count if mxPage is positive. 02012 ** Make no changes if mxPage is zero or negative. And never reduce the 02013 ** maximum page count below the current size of the database. 02014 ** 02015 ** Regardless of mxPage, return the current maximum page count. 02016 */ 02017 int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){ 02018 if( mxPage>0 ){ 02019 pPager->mxPgno = mxPage; 02020 } 02021 sqlite3PagerPagecount(pPager, 0); 02022 return pPager->mxPgno; 02023 } 02024 02025 /* 02026 ** The following set of routines are used to disable the simulated 02027 ** I/O error mechanism. These routines are used to avoid simulated 02028 ** errors in places where we do not care about errors. 02029 ** 02030 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops 02031 ** and generate no code. 02032 */ 02033 #ifdef SQLITE_TEST 02034 extern int sqlite3_io_error_pending; 02035 extern int sqlite3_io_error_hit; 02036 static int saved_cnt; 02037 void disable_simulated_io_errors(void){ 02038 saved_cnt = sqlite3_io_error_pending; 02039 sqlite3_io_error_pending = -1; 02040 } 02041 void enable_simulated_io_errors(void){ 02042 sqlite3_io_error_pending = saved_cnt; 02043 } 02044 #else 02045 # define disable_simulated_io_errors() 02046 # define enable_simulated_io_errors() 02047 #endif 02048 02049 /* 02050 ** Read the first N bytes from the beginning of the file into memory 02051 ** that pDest points to. 02052 ** 02053 ** No error checking is done. The rational for this is that this function 02054 ** may be called even if the file does not exist or contain a header. In 02055 ** these cases sqlite3OsRead() will return an error, to which the correct 02056 ** response is to zero the memory at pDest and continue. A real IO error 02057 ** will presumably recur and be picked up later (Todo: Think about this). 02058 */ 02059 int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ 02060 int rc = SQLITE_OK; 02061 memset(pDest, 0, N); 02062 assert(pPager->fd->pMethods||pPager->tempFile); 02063 if( pPager->fd->pMethods ){ 02064 IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) 02065 rc = sqlite3OsRead(pPager->fd, pDest, N, 0); 02066 if( rc==SQLITE_IOERR_SHORT_READ ){ 02067 rc = SQLITE_OK; 02068 } 02069 } 02070 return rc; 02071 } 02072 02073 /* 02074 ** Return the total number of pages in the disk file associated with 02075 ** pPager. 02076 ** 02077 ** If the PENDING_BYTE lies on the page directly after the end of the 02078 ** file, then consider this page part of the file too. For example, if 02079 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the 02080 ** file is 4096 bytes, 5 is returned instead of 4. 02081 */ 02082 int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ 02083 i64 n = 0; 02084 int rc; 02085 assert( pPager!=0 ); 02086 if( pPager->errCode ){ 02087 rc = pPager->errCode; 02088 return rc; 02089 } 02090 if( pPager->dbSize>=0 ){ 02091 n = pPager->dbSize; 02092 } else { 02093 assert(pPager->fd->pMethods||pPager->tempFile); 02094 if( (pPager->fd->pMethods) 02095 && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){ 02096 pager_error(pPager, rc); 02097 return rc; 02098 } 02099 if( n>0 && n<pPager->pageSize ){ 02100 n = 1; 02101 }else{ 02102 n /= pPager->pageSize; 02103 } 02104 if( pPager->state!=PAGER_UNLOCK ){ 02105 pPager->dbSize = n; 02106 } 02107 } 02108 if( n==(PENDING_BYTE/pPager->pageSize) ){ 02109 n++; 02110 } 02111 if( n>pPager->mxPgno ){ 02112 pPager->mxPgno = n; 02113 } 02114 if( pnPage ){ 02115 *pnPage = n; 02116 } 02117 return SQLITE_OK; 02118 } 02119 02120 /* 02121 ** Forward declaration 02122 */ 02123 static int syncJournal(Pager*); 02124 02125 /* 02126 ** This routine is used to truncate the cache when a database 02127 ** is truncated. Drop from the cache all pages whose pgno is 02128 ** larger than pPager->dbSize and is unreferenced. 02129 ** 02130 ** Referenced pages larger than pPager->dbSize are zeroed. 02131 ** 02132 ** Actually, at the point this routine is called, it would be 02133 ** an error to have a referenced page. But rather than delete 02134 ** that page and guarantee a subsequent segfault, it seems better 02135 ** to zero it and hope that we error out sanely. 02136 */ 02137 static void pager_truncate_cache(Pager *pPager){ 02138 sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize); 02139 } 02140 02141 /* 02142 ** Try to obtain a lock on a file. Invoke the busy callback if the lock 02143 ** is currently not available. Repeat until the busy callback returns 02144 ** false or until the lock succeeds. 02145 ** 02146 ** Return SQLITE_OK on success and an error code if we cannot obtain 02147 ** the lock. 02148 */ 02149 static int pager_wait_on_lock(Pager *pPager, int locktype){ 02150 int rc; 02151 02152 /* The OS lock values must be the same as the Pager lock values */ 02153 assert( PAGER_SHARED==SHARED_LOCK ); 02154 assert( PAGER_RESERVED==RESERVED_LOCK ); 02155 assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK ); 02156 02157 /* If the file is currently unlocked then the size must be unknown */ 02158 assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 ); 02159 02160 if( pPager->state>=locktype ){ 02161 rc = SQLITE_OK; 02162 }else{ 02163 if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0; 02164 do { 02165 rc = sqlite3OsLock(pPager->fd, locktype); 02166 }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) ); 02167 if( rc==SQLITE_OK ){ 02168 pPager->state = locktype; 02169 IOTRACE(("LOCK %p %d\n", pPager, locktype)) 02170 } 02171 } 02172 return rc; 02173 } 02174 02175 /* 02176 ** Truncate the file to the number of pages specified. 02177 */ 02178 int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){ 02179 int rc = SQLITE_OK; 02180 assert( pPager->state>=PAGER_SHARED ); 02181 02182 sqlite3PagerPagecount(pPager, 0); 02183 if( pPager->errCode ){ 02184 rc = pPager->errCode; 02185 }else if( nPage<(unsigned)pPager->dbSize ){ 02186 rc = syncJournal(pPager); 02187 if( rc==SQLITE_OK ){ 02188 /* Get an exclusive lock on the database before truncating. */ 02189 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 02190 } 02191 if( rc==SQLITE_OK ){ 02192 rc = pager_truncate(pPager, nPage); 02193 } 02194 } 02195 02196 return rc; 02197 } 02198 02199 /* 02200 ** Shutdown the page cache. Free all memory and close all files. 02201 ** 02202 ** If a transaction was in progress when this routine is called, that 02203 ** transaction is rolled back. All outstanding pages are invalidated 02204 ** and their memory is freed. Any attempt to use a page associated 02205 ** with this page cache after this function returns will likely 02206 ** result in a coredump. 02207 ** 02208 ** This function always succeeds. If a transaction is active an attempt 02209 ** is made to roll it back. If an error occurs during the rollback 02210 ** a hot journal may be left in the filesystem but no error is returned 02211 ** to the caller. 02212 */ 02213 int sqlite3PagerClose(Pager *pPager){ 02214 02215 disable_simulated_io_errors(); 02216 sqlite3BeginBenignMalloc(); 02217 pPager->errCode = 0; 02218 pPager->exclusiveMode = 0; 02219 pager_reset(pPager); 02220 if( !MEMDB ){ 02221 pagerUnlockAndRollback(pPager); 02222 } 02223 enable_simulated_io_errors(); 02224 sqlite3EndBenignMalloc(); 02225 PAGERTRACE2("CLOSE %d\n", PAGERID(pPager)); 02226 IOTRACE(("CLOSE %p\n", pPager)) 02227 if( pPager->journalOpen ){ 02228 sqlite3OsClose(pPager->jfd); 02229 } 02230 sqlite3BitvecDestroy(pPager->pInJournal); 02231 sqlite3BitvecDestroy(pPager->pAlwaysRollback); 02232 if( pPager->stmtOpen ){ 02233 sqlite3OsClose(pPager->stfd); 02234 } 02235 sqlite3OsClose(pPager->fd); 02236 /* Temp files are automatically deleted by the OS 02237 ** if( pPager->tempFile ){ 02238 ** sqlite3OsDelete(pPager->zFilename); 02239 ** } 02240 */ 02241 02242 sqlite3PageFree(pPager->pTmpSpace); 02243 sqlite3PcacheClose(pPager->pPCache); 02244 sqlite3_free(pPager); 02245 return SQLITE_OK; 02246 } 02247 02248 #if !defined(NDEBUG) || defined(SQLITE_TEST) 02249 /* 02250 ** Return the page number for the given page data. 02251 */ 02252 Pgno sqlite3PagerPagenumber(DbPage *p){ 02253 return p->pgno; 02254 } 02255 #endif 02256 02257 /* 02258 ** Increment the reference count for a page. The input pointer is 02259 ** a reference to the page data. 02260 */ 02261 int sqlite3PagerRef(DbPage *pPg){ 02262 sqlite3PcacheRef(pPg); 02263 return SQLITE_OK; 02264 } 02265 02266 /* 02267 ** Sync the journal. In other words, make sure all the pages that have 02268 ** been written to the journal have actually reached the surface of the 02269 ** disk. It is not safe to modify the original database file until after 02270 ** the journal has been synced. If the original database is modified before 02271 ** the journal is synced and a power failure occurs, the unsynced journal 02272 ** data would be lost and we would be unable to completely rollback the 02273 ** database changes. Database corruption would occur. 02274 ** 02275 ** This routine also updates the nRec field in the header of the journal. 02276 ** (See comments on the pager_playback() routine for additional information.) 02277 ** If the sync mode is FULL, two syncs will occur. First the whole journal 02278 ** is synced, then the nRec field is updated, then a second sync occurs. 02279 ** 02280 ** For temporary databases, we do not care if we are able to rollback 02281 ** after a power failure, so no sync occurs. 02282 ** 02283 ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which 02284 ** the database is stored, then OsSync() is never called on the journal 02285 ** file. In this case all that is required is to update the nRec field in 02286 ** the journal header. 02287 ** 02288 ** This routine clears the needSync field of every page current held in 02289 ** memory. 02290 */ 02291 static int syncJournal(Pager *pPager){ 02292 int rc = SQLITE_OK; 02293 02294 /* Sync the journal before modifying the main database 02295 ** (assuming there is a journal and it needs to be synced.) 02296 */ 02297 if( pPager->needSync ){ 02298 assert( !pPager->tempFile ); 02299 if( pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ 02300 int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); 02301 assert( pPager->journalOpen ); 02302 02303 if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ 02304 /* Write the nRec value into the journal file header. If in 02305 ** full-synchronous mode, sync the journal first. This ensures that 02306 ** all data has really hit the disk before nRec is updated to mark 02307 ** it as a candidate for rollback. 02308 ** 02309 ** This is not required if the persistent media supports the 02310 ** SAFE_APPEND property. Because in this case it is not possible 02311 ** for garbage data to be appended to the file, the nRec field 02312 ** is populated with 0xFFFFFFFF when the journal header is written 02313 ** and never needs to be updated. 02314 */ 02315 i64 jrnlOff; 02316 if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ 02317 PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager)); 02318 IOTRACE(("JSYNC %p\n", pPager)) 02319 rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags); 02320 if( rc!=0 ) return rc; 02321 } 02322 02323 jrnlOff = pPager->journalHdr + sizeof(aJournalMagic); 02324 IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4)); 02325 rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec); 02326 if( rc ) return rc; 02327 } 02328 if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ 02329 PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager)); 02330 IOTRACE(("JSYNC %p\n", pPager)) 02331 rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 02332 (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) 02333 ); 02334 if( rc!=0 ) return rc; 02335 } 02336 pPager->journalStarted = 1; 02337 } 02338 pPager->needSync = 0; 02339 02340 /* Erase the needSync flag from every page. 02341 */ 02342 sqlite3PcacheClearFlags(pPager->pPCache, PGHDR_NEED_SYNC); 02343 } 02344 02345 #ifndef NDEBUG 02346 /* If the Pager.needSync flag is clear then the PgHdr.needSync 02347 ** flag must also be clear for all pages. Verify that this 02348 ** invariant is true. 02349 */ 02350 else{ 02351 sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_NEED_SYNC); 02352 } 02353 #endif 02354 02355 return rc; 02356 } 02357 02358 /* 02359 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write 02360 ** every one of those pages out to the database file. No calls are made 02361 ** to the page-cache to mark the pages as clean. It is the responsibility 02362 ** of the caller to use PcacheCleanAll() or PcacheMakeClean() to mark 02363 ** the pages as clean. 02364 */ 02365 static int pager_write_pagelist(PgHdr *pList){ 02366 Pager *pPager; 02367 int rc; 02368 02369 if( pList==0 ) return SQLITE_OK; 02370 pPager = pList->pPager; 02371 02372 /* At this point there may be either a RESERVED or EXCLUSIVE lock on the 02373 ** database file. If there is already an EXCLUSIVE lock, the following 02374 ** calls to sqlite3OsLock() are no-ops. 02375 ** 02376 ** Moving the lock from RESERVED to EXCLUSIVE actually involves going 02377 ** through an intermediate state PENDING. A PENDING lock prevents new 02378 ** readers from attaching to the database but is unsufficient for us to 02379 ** write. The idea of a PENDING lock is to prevent new readers from 02380 ** coming in while we wait for existing readers to clear. 02381 ** 02382 ** While the pager is in the RESERVED state, the original database file 02383 ** is unchanged and we can rollback without having to playback the 02384 ** journal into the original database file. Once we transition to 02385 ** EXCLUSIVE, it means the database file has been changed and any rollback 02386 ** will require a journal playback. 02387 */ 02388 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 02389 if( rc!=SQLITE_OK ){ 02390 return rc; 02391 } 02392 02393 while( pList ){ 02394 02395 /* If the file has not yet been opened, open it now. */ 02396 if( !pPager->fd->pMethods ){ 02397 assert(pPager->tempFile); 02398 rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags); 02399 if( rc ) return rc; 02400 } 02401 02402 /* If there are dirty pages in the page cache with page numbers greater 02403 ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to 02404 ** make the file smaller (presumably by auto-vacuum code). Do not write 02405 ** any such pages to the file. 02406 */ 02407 if( pList->pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ 02408 i64 offset = (pList->pgno-1)*(i64)pPager->pageSize; 02409 char *pData = CODEC2(pPager, pList->pData, pList->pgno, 6); 02410 PAGERTRACE4("STORE %d page %d hash(%08x)\n", 02411 PAGERID(pPager), pList->pgno, pager_pagehash(pList)); 02412 IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno)); 02413 rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); 02414 PAGER_INCR(sqlite3_pager_writedb_count); 02415 PAGER_INCR(pPager->nWrite); 02416 if( pList->pgno==1 ){ 02417 memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); 02418 } 02419 } 02420 #ifndef NDEBUG 02421 else{ 02422 PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno); 02423 } 02424 #endif 02425 if( rc ) return rc; 02426 #ifdef SQLITE_CHECK_PAGES 02427 pList->pageHash = pager_pagehash(pList); 02428 #endif 02429 pList = pList->pDirty; 02430 } 02431 02432 return SQLITE_OK; 02433 } 02434 02435 /* 02436 ** This function is called by the pcache layer when it has reached some 02437 ** soft memory limit. The argument is a pointer to a purgeable Pager 02438 ** object. This function attempts to make a single dirty page that has no 02439 ** outstanding references (if one exists) clean so that it can be recycled 02440 ** by the pcache layer. 02441 */ 02442 static int pagerStress(void *p, PgHdr *pPg){ 02443 Pager *pPager = (Pager *)p; 02444 int rc = SQLITE_OK; 02445 02446 if( pPager->doNotSync ){ 02447 return SQLITE_OK; 02448 } 02449 02450 assert( pPg->flags&PGHDR_DIRTY ); 02451 if( pPager->errCode==SQLITE_OK ){ 02452 if( pPg->flags&PGHDR_NEED_SYNC ){ 02453 rc = syncJournal(pPager); 02454 if( rc==SQLITE_OK && pPager->fullSync && 02455 !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) && 02456 !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 02457 ){ 02458 pPager->nRec = 0; 02459 rc = writeJournalHdr(pPager); 02460 } 02461 } 02462 if( rc==SQLITE_OK ){ 02463 pPg->pDirty = 0; 02464 rc = pager_write_pagelist(pPg); 02465 } 02466 if( rc!=SQLITE_OK ){ 02467 pager_error(pPager, rc); 02468 } 02469 } 02470 02471 if( rc==SQLITE_OK ){ 02472 sqlite3PcacheMakeClean(pPg); 02473 } 02474 return rc; 02475 } 02476 02477 02478 /* 02479 ** Return 1 if there is a hot journal on the given pager. 02480 ** A hot journal is one that needs to be played back. 02481 ** 02482 ** If the current size of the database file is 0 but a journal file 02483 ** exists, that is probably an old journal left over from a prior 02484 ** database with the same name. Just delete the journal. 02485 ** 02486 ** Return negative if unable to determine the status of the journal. 02487 ** 02488 ** This routine does not open the journal file to examine its 02489 ** content. Hence, the journal might contain the name of a master 02490 ** journal file that has been deleted, and hence not be hot. Or 02491 ** the header of the journal might be zeroed out. This routine 02492 ** does not discover these cases of a non-hot journal - if the 02493 ** journal file exists and is not empty this routine assumes it 02494 ** is hot. The pager_playback() routine will discover that the 02495 ** journal file is not really hot and will no-op. 02496 */ 02497 static int hasHotJournal(Pager *pPager, int *pExists){ 02498 sqlite3_vfs *pVfs = pPager->pVfs; 02499 int rc = SQLITE_OK; 02500 int exists; 02501 int locked; 02502 assert( pPager!=0 ); 02503 assert( pPager->useJournal ); 02504 assert( pPager->fd->pMethods ); 02505 *pExists = 0; 02506 rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists); 02507 if( rc==SQLITE_OK && exists ){ 02508 rc = sqlite3OsCheckReservedLock(pPager->fd, &locked); 02509 } 02510 if( rc==SQLITE_OK && exists && !locked ){ 02511 int nPage; 02512 rc = sqlite3PagerPagecount(pPager, &nPage); 02513 if( rc==SQLITE_OK ){ 02514 if( nPage==0 ){ 02515 sqlite3OsDelete(pVfs, pPager->zJournal, 0); 02516 }else{ 02517 *pExists = 1; 02518 } 02519 } 02520 } 02521 return rc; 02522 } 02523 02524 /* 02525 ** Read the content of page pPg out of the database file. 02526 */ 02527 static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){ 02528 int rc; 02529 i64 offset; 02530 assert( MEMDB==0 ); 02531 assert(pPager->fd->pMethods||pPager->tempFile); 02532 if( !pPager->fd->pMethods ){ 02533 return SQLITE_IOERR_SHORT_READ; 02534 } 02535 offset = (pgno-1)*(i64)pPager->pageSize; 02536 rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, offset); 02537 PAGER_INCR(sqlite3_pager_readdb_count); 02538 PAGER_INCR(pPager->nRead); 02539 IOTRACE(("PGIN %p %d\n", pPager, pgno)); 02540 if( pgno==1 ){ 02541 memcpy(&pPager->dbFileVers, &((u8*)pPg->pData)[24], 02542 sizeof(pPager->dbFileVers)); 02543 } 02544 CODEC1(pPager, pPg->pData, pPg->pgno, 3); 02545 PAGERTRACE4("FETCH %d page %d hash(%08x)\n", 02546 PAGERID(pPager), pPg->pgno, pager_pagehash(pPg)); 02547 return rc; 02548 } 02549 02550 02551 /* 02552 ** This function is called to obtain the shared lock required before 02553 ** data may be read from the pager cache. If the shared lock has already 02554 ** been obtained, this function is a no-op. 02555 ** 02556 ** Immediately after obtaining the shared lock (if required), this function 02557 ** checks for a hot-journal file. If one is found, an emergency rollback 02558 ** is performed immediately. 02559 */ 02560 static int pagerSharedLock(Pager *pPager){ 02561 int rc = SQLITE_OK; 02562 int isErrorReset = 0; 02563 02564 /* If this database is opened for exclusive access, has no outstanding 02565 ** page references and is in an error-state, now is the chance to clear 02566 ** the error. Discard the contents of the pager-cache and treat any 02567 ** open journal file as a hot-journal. 02568 */ 02569 if( !MEMDB && pPager->exclusiveMode 02570 && sqlite3PcacheRefCount(pPager->pPCache)==0 && pPager->errCode 02571 ){ 02572 if( pPager->journalOpen ){ 02573 isErrorReset = 1; 02574 } 02575 pPager->errCode = SQLITE_OK; 02576 pager_reset(pPager); 02577 } 02578 02579 /* If the pager is still in an error state, do not proceed. The error 02580 ** state will be cleared at some point in the future when all page 02581 ** references are dropped and the cache can be discarded. 02582 */ 02583 if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ 02584 return pPager->errCode; 02585 } 02586 02587 if( pPager->state==PAGER_UNLOCK || isErrorReset ){ 02588 sqlite3_vfs *pVfs = pPager->pVfs; 02589 int isHotJournal; 02590 assert( !MEMDB ); 02591 assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); 02592 if( !pPager->noReadlock ){ 02593 rc = pager_wait_on_lock(pPager, SHARED_LOCK); 02594 if( rc!=SQLITE_OK ){ 02595 assert( pPager->state==PAGER_UNLOCK ); 02596 return pager_error(pPager, rc); 02597 } 02598 assert( pPager->state>=SHARED_LOCK ); 02599 } 02600 02601 /* If a journal file exists, and there is no RESERVED lock on the 02602 ** database file, then it either needs to be played back or deleted. 02603 */ 02604 if( !isErrorReset ){ 02605 rc = hasHotJournal(pPager, &isHotJournal); 02606 if( rc!=SQLITE_OK ){ 02607 goto failed; 02608 } 02609 } 02610 if( isErrorReset || isHotJournal ){ 02611 /* Get an EXCLUSIVE lock on the database file. At this point it is 02612 ** important that a RESERVED lock is not obtained on the way to the 02613 ** EXCLUSIVE lock. If it were, another process might open the 02614 ** database file, detect the RESERVED lock, and conclude that the 02615 ** database is safe to read while this process is still rolling it 02616 ** back. 02617 ** 02618 ** Because the intermediate RESERVED lock is not requested, the 02619 ** second process will get to this point in the code and fail to 02620 ** obtain its own EXCLUSIVE lock on the database file. 02621 */ 02622 if( pPager->state<EXCLUSIVE_LOCK ){ 02623 rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK); 02624 if( rc!=SQLITE_OK ){ 02625 rc = pager_error(pPager, rc); 02626 goto failed; 02627 } 02628 pPager->state = PAGER_EXCLUSIVE; 02629 } 02630 02631 /* Open the journal for read/write access. This is because in 02632 ** exclusive-access mode the file descriptor will be kept open and 02633 ** possibly used for a transaction later on. On some systems, the 02634 ** OsTruncate() call used in exclusive-access mode also requires 02635 ** a read/write file handle. 02636 */ 02637 if( !isErrorReset && pPager->journalOpen==0 ){ 02638 int res; 02639 rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res); 02640 if( rc==SQLITE_OK ){ 02641 if( res ){ 02642 int fout = 0; 02643 int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; 02644 assert( !pPager->tempFile ); 02645 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); 02646 assert( rc!=SQLITE_OK || pPager->jfd->pMethods ); 02647 if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){ 02648 rc = SQLITE_CANTOPEN; 02649 sqlite3OsClose(pPager->jfd); 02650 } 02651 }else{ 02652 /* If the journal does not exist, that means some other process 02653 ** has already rolled it back */ 02654 rc = SQLITE_BUSY; 02655 } 02656 } 02657 } 02658 if( rc!=SQLITE_OK ){ 02659 goto failed; 02660 } 02661 pPager->journalOpen = 1; 02662 pPager->journalStarted = 0; 02663 pPager->journalOff = 0; 02664 pPager->setMaster = 0; 02665 pPager->journalHdr = 0; 02666 02667 /* Playback and delete the journal. Drop the database write 02668 ** lock and reacquire the read lock. 02669 */ 02670 rc = pager_playback(pPager, 1); 02671 if( rc!=SQLITE_OK ){ 02672 rc = pager_error(pPager, rc); 02673 goto failed; 02674 } 02675 assert(pPager->state==PAGER_SHARED || 02676 (pPager->exclusiveMode && pPager->state>PAGER_SHARED) 02677 ); 02678 } 02679 02680 if( sqlite3PcachePagecount(pPager->pPCache)>0 ){ 02681 /* The shared-lock has just been acquired on the database file 02682 ** and there are already pages in the cache (from a previous 02683 ** read or write transaction). Check to see if the database 02684 ** has been modified. If the database has changed, flush the 02685 ** cache. 02686 ** 02687 ** Database changes is detected by looking at 15 bytes beginning 02688 ** at offset 24 into the file. The first 4 of these 16 bytes are 02689 ** a 32-bit counter that is incremented with each change. The 02690 ** other bytes change randomly with each file change when 02691 ** a codec is in use. 02692 ** 02693 ** There is a vanishingly small chance that a change will not be 02694 ** detected. The chance of an undetected change is so small that 02695 ** it can be neglected. 02696 */ 02697 char dbFileVers[sizeof(pPager->dbFileVers)]; 02698 sqlite3PagerPagecount(pPager, 0); 02699 02700 if( pPager->errCode ){ 02701 rc = pPager->errCode; 02702 goto failed; 02703 } 02704 02705 if( pPager->dbSize>0 ){ 02706 IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); 02707 rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); 02708 if( rc!=SQLITE_OK ){ 02709 goto failed; 02710 } 02711 }else{ 02712 memset(dbFileVers, 0, sizeof(dbFileVers)); 02713 } 02714 02715 if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ 02716 pager_reset(pPager); 02717 } 02718 } 02719 assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED ); 02720 if( pPager->state==PAGER_UNLOCK ){ 02721 pPager->state = PAGER_SHARED; 02722 } 02723 } 02724 02725 failed: 02726 if( rc!=SQLITE_OK ){ 02727 /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */ 02728 pager_unlock(pPager); 02729 } 02730 return rc; 02731 } 02732 02733 /* 02734 ** Make sure we have the content for a page. If the page was 02735 ** previously acquired with noContent==1, then the content was 02736 ** just initialized to zeros instead of being read from disk. 02737 ** But now we need the real data off of disk. So make sure we 02738 ** have it. Read it in if we do not have it already. 02739 */ 02740 static int pager_get_content(PgHdr *pPg){ 02741 if( pPg->flags&PGHDR_NEED_READ ){ 02742 int rc = readDbPage(pPg->pPager, pPg, pPg->pgno); 02743 if( rc==SQLITE_OK ){ 02744 pPg->flags &= ~PGHDR_NEED_READ; 02745 }else{ 02746 return rc; 02747 } 02748 } 02749 return SQLITE_OK; 02750 } 02751 02752 /* 02753 ** If the reference count has reached zero, and the pager is not in the 02754 ** middle of a write transaction or opened in exclusive mode, unlock it. 02755 */ 02756 static void pagerUnlockIfUnused(Pager *pPager){ 02757 if( (sqlite3PcacheRefCount(pPager->pPCache)==0) 02758 && (!pPager->exclusiveMode || pPager->journalOff>0) 02759 ){ 02760 pagerUnlockAndRollback(pPager); 02761 } 02762 } 02763 02764 /* 02765 ** Drop a page from the cache using sqlite3PcacheDrop(). 02766 ** 02767 ** If this means there are now no pages with references to them, a rollback 02768 ** occurs and the lock on the database is removed. 02769 */ 02770 static void pagerDropPage(DbPage *pPg){ 02771 Pager *pPager = pPg->pPager; 02772 sqlite3PcacheDrop(pPg); 02773 pagerUnlockIfUnused(pPager); 02774 } 02775 02776 /* 02777 ** Acquire a page. 02778 ** 02779 ** A read lock on the disk file is obtained when the first page is acquired. 02780 ** This read lock is dropped when the last page is released. 02781 ** 02782 ** This routine works for any page number greater than 0. If the database 02783 ** file is smaller than the requested page, then no actual disk 02784 ** read occurs and the memory image of the page is initialized to 02785 ** all zeros. The extra data appended to a page is always initialized 02786 ** to zeros the first time a page is loaded into memory. 02787 ** 02788 ** The acquisition might fail for several reasons. In all cases, 02789 ** an appropriate error code is returned and *ppPage is set to NULL. 02790 ** 02791 ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt 02792 ** to find a page in the in-memory cache first. If the page is not already 02793 ** in memory, this routine goes to disk to read it in whereas Lookup() 02794 ** just returns 0. This routine acquires a read-lock the first time it 02795 ** has to go to disk, and could also playback an old journal if necessary. 02796 ** Since Lookup() never goes to disk, it never has to deal with locks 02797 ** or journal files. 02798 ** 02799 ** If noContent is false, the page contents are actually read from disk. 02800 ** If noContent is true, it means that we do not care about the contents 02801 ** of the page at this time, so do not do a disk read. Just fill in the 02802 ** page content with zeros. But mark the fact that we have not read the 02803 ** content by setting the PgHdr.needRead flag. Later on, if 02804 ** sqlite3PagerWrite() is called on this page or if this routine is 02805 ** called again with noContent==0, that means that the content is needed 02806 ** and the disk read should occur at that point. 02807 */ 02808 int sqlite3PagerAcquire( 02809 Pager *pPager, /* The pager open on the database file */ 02810 Pgno pgno, /* Page number to fetch */ 02811 DbPage **ppPage, /* Write a pointer to the page here */ 02812 int noContent /* Do not bother reading content from disk if true */ 02813 ){ 02814 PgHdr *pPg = 0; 02815 int rc; 02816 02817 assert( pPager->state==PAGER_UNLOCK 02818 || sqlite3PcacheRefCount(pPager->pPCache)>0 02819 || pgno==1 02820 ); 02821 02822 /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page 02823 ** number greater than this, or zero, is requested. 02824 */ 02825 if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ 02826 return SQLITE_CORRUPT_BKPT; 02827 } 02828 02829 /* Make sure we have not hit any critical errors. 02830 */ 02831 assert( pPager!=0 ); 02832 *ppPage = 0; 02833 02834 /* If this is the first page accessed, then get a SHARED lock 02835 ** on the database file. pagerSharedLock() is a no-op if 02836 ** a database lock is already held. 02837 */ 02838 rc = pagerSharedLock(pPager); 02839 if( rc!=SQLITE_OK ){ 02840 return rc; 02841 } 02842 assert( pPager->state!=PAGER_UNLOCK ); 02843 02844 rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, &pPg); 02845 if( rc!=SQLITE_OK ){ 02846 return rc; 02847 } 02848 if( pPg->pPager==0 ){ 02849 /* The pager cache has created a new page. Its content needs to 02850 ** be initialized. 02851 */ 02852 int nMax; 02853 PAGER_INCR(pPager->nMiss); 02854 pPg->pPager = pPager; 02855 if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){ 02856 pPg->flags |= PGHDR_IN_JOURNAL; 02857 } 02858 memset(pPg->pExtra, 0, pPager->nExtra); 02859 02860 rc = sqlite3PagerPagecount(pPager, &nMax); 02861 if( rc!=SQLITE_OK ){ 02862 sqlite3PagerUnref(pPg); 02863 return rc; 02864 } 02865 02866 if( nMax<(int)pgno || MEMDB || noContent ){ 02867 if( pgno>pPager->mxPgno ){ 02868 sqlite3PagerUnref(pPg); 02869 return SQLITE_FULL; 02870 } 02871 memset(pPg->pData, 0, pPager->pageSize); 02872 if( noContent ){ 02873 pPg->flags |= PGHDR_NEED_READ; 02874 } 02875 IOTRACE(("ZERO %p %d\n", pPager, pgno)); 02876 }else{ 02877 rc = readDbPage(pPager, pPg, pgno); 02878 if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ 02879 /* sqlite3PagerUnref(pPg); */ 02880 pagerDropPage(pPg); 02881 return rc; 02882 } 02883 } 02884 #ifdef SQLITE_CHECK_PAGES 02885 pPg->pageHash = pager_pagehash(pPg); 02886 #endif 02887 }else{ 02888 /* The requested page is in the page cache. */ 02889 assert(sqlite3PcacheRefCount(pPager->pPCache)>0 || pgno==1); 02890 PAGER_INCR(pPager->nHit); 02891 if( !noContent ){ 02892 rc = pager_get_content(pPg); 02893 if( rc ){ 02894 sqlite3PagerUnref(pPg); 02895 return rc; 02896 } 02897 } 02898 } 02899 02900 *ppPage = pPg; 02901 return SQLITE_OK; 02902 } 02903 02904 /* 02905 ** Acquire a page if it is already in the in-memory cache. Do 02906 ** not read the page from disk. Return a pointer to the page, 02907 ** or 0 if the page is not in cache. 02908 ** 02909 ** See also sqlite3PagerGet(). The difference between this routine 02910 ** and sqlite3PagerGet() is that _get() will go to the disk and read 02911 ** in the page if the page is not already in cache. This routine 02912 ** returns NULL if the page is not in cache or if a disk I/O error 02913 ** has ever happened. 02914 */ 02915 DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ 02916 PgHdr *pPg = 0; 02917 assert( pPager!=0 ); 02918 assert( pgno!=0 ); 02919 02920 if( (pPager->state!=PAGER_UNLOCK) 02921 && (pPager->errCode==SQLITE_OK || pPager->errCode==SQLITE_FULL) 02922 ){ 02923 sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg); 02924 } 02925 02926 return pPg; 02927 } 02928 02929 /* 02930 ** Release a page. 02931 ** 02932 ** If the number of references to the page drop to zero, then the 02933 ** page is added to the LRU list. When all references to all pages 02934 ** are released, a rollback occurs and the lock on the database is 02935 ** removed. 02936 */ 02937 int sqlite3PagerUnref(DbPage *pPg){ 02938 if( pPg ){ 02939 Pager *pPager = pPg->pPager; 02940 sqlite3PcacheRelease(pPg); 02941 pagerUnlockIfUnused(pPager); 02942 } 02943 return SQLITE_OK; 02944 } 02945 02946 /* 02947 ** Create a journal file for pPager. There should already be a RESERVED 02948 ** or EXCLUSIVE lock on the database file when this routine is called. 02949 ** 02950 ** Return SQLITE_OK if everything. Return an error code and release the 02951 ** write lock if anything goes wrong. 02952 */ 02953 static int pager_open_journal(Pager *pPager){ 02954 sqlite3_vfs *pVfs = pPager->pVfs; 02955 int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE); 02956 02957 int rc; 02958 assert( pPager->state>=PAGER_RESERVED ); 02959 assert( pPager->useJournal ); 02960 assert( pPager->pInJournal==0 ); 02961 sqlite3PagerPagecount(pPager, 0); 02962 pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize); 02963 if( pPager->pInJournal==0 ){ 02964 rc = SQLITE_NOMEM; 02965 goto failed_to_open_journal; 02966 } 02967 02968 if( pPager->journalOpen==0 ){ 02969 if( pPager->tempFile ){ 02970 flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL); 02971 }else{ 02972 flags |= (SQLITE_OPEN_MAIN_JOURNAL); 02973 } 02974 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ 02975 sqlite3MemJournalOpen(pPager->jfd); 02976 rc = SQLITE_OK; 02977 }else{ 02978 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 02979 rc = sqlite3JournalOpen( 02980 pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager) 02981 ); 02982 #else 02983 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0); 02984 #endif 02985 } 02986 assert( rc!=SQLITE_OK || pPager->jfd->pMethods ); 02987 pPager->journalOff = 0; 02988 pPager->setMaster = 0; 02989 pPager->journalHdr = 0; 02990 if( rc!=SQLITE_OK ){ 02991 if( rc==SQLITE_NOMEM ){ 02992 sqlite3OsDelete(pVfs, pPager->zJournal, 0); 02993 } 02994 goto failed_to_open_journal; 02995 } 02996 } 02997 pPager->journalOpen = 1; 02998 pPager->journalStarted = 0; 02999 pPager->needSync = 0; 03000 pPager->nRec = 0; 03001 if( pPager->errCode ){ 03002 rc = pPager->errCode; 03003 goto failed_to_open_journal; 03004 } 03005 pPager->origDbSize = pPager->dbSize; 03006 03007 rc = writeJournalHdr(pPager); 03008 03009 if( pPager->stmtAutoopen && rc==SQLITE_OK ){ 03010 rc = sqlite3PagerStmtBegin(pPager); 03011 } 03012 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){ 03013 rc = pager_end_transaction(pPager, 0); 03014 if( rc==SQLITE_OK ){ 03015 rc = SQLITE_FULL; 03016 } 03017 } 03018 return rc; 03019 03020 failed_to_open_journal: 03021 sqlite3BitvecDestroy(pPager->pInJournal); 03022 pPager->pInJournal = 0; 03023 return rc; 03024 } 03025 03026 /* 03027 ** Acquire a write-lock on the database. The lock is removed when 03028 ** the any of the following happen: 03029 ** 03030 ** * sqlite3PagerCommitPhaseTwo() is called. 03031 ** * sqlite3PagerRollback() is called. 03032 ** * sqlite3PagerClose() is called. 03033 ** * sqlite3PagerUnref() is called to on every outstanding page. 03034 ** 03035 ** The first parameter to this routine is a pointer to any open page of the 03036 ** database file. Nothing changes about the page - it is used merely to 03037 ** acquire a pointer to the Pager structure and as proof that there is 03038 ** already a read-lock on the database. 03039 ** 03040 ** The second parameter indicates how much space in bytes to reserve for a 03041 ** master journal file-name at the start of the journal when it is created. 03042 ** 03043 ** A journal file is opened if this is not a temporary file. For temporary 03044 ** files, the opening of the journal file is deferred until there is an 03045 ** actual need to write to the journal. 03046 ** 03047 ** If the database is already reserved for writing, this routine is a no-op. 03048 ** 03049 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file 03050 ** immediately instead of waiting until we try to flush the cache. The 03051 ** exFlag is ignored if a transaction is already active. 03052 */ 03053 int sqlite3PagerBegin(DbPage *pPg, int exFlag){ 03054 Pager *pPager = pPg->pPager; 03055 int rc = SQLITE_OK; 03056 assert( pPg->nRef>0 ); 03057 assert( pPager->state!=PAGER_UNLOCK ); 03058 if( pPager->state==PAGER_SHARED ){ 03059 assert( pPager->pInJournal==0 ); 03060 assert( !MEMDB ); 03061 sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL); 03062 rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); 03063 if( rc==SQLITE_OK ){ 03064 pPager->state = PAGER_RESERVED; 03065 if( exFlag ){ 03066 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 03067 } 03068 } 03069 if( rc!=SQLITE_OK ){ 03070 return rc; 03071 } 03072 pPager->dirtyCache = 0; 03073 PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager)); 03074 if( pPager->useJournal && !pPager->tempFile 03075 && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ 03076 rc = pager_open_journal(pPager); 03077 } 03078 }else if( pPager->journalOpen && pPager->journalOff==0 ){ 03079 /* This happens when the pager was in exclusive-access mode the last 03080 ** time a (read or write) transaction was successfully concluded 03081 ** by this connection. Instead of deleting the journal file it was 03082 ** kept open and either was truncated to 0 bytes or its header was 03083 ** overwritten with zeros. 03084 */ 03085 assert( pPager->nRec==0 ); 03086 assert( pPager->origDbSize==0 ); 03087 assert( pPager->pInJournal==0 ); 03088 sqlite3PagerPagecount(pPager, 0); 03089 pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize ); 03090 if( !pPager->pInJournal ){ 03091 rc = SQLITE_NOMEM; 03092 }else{ 03093 pPager->origDbSize = pPager->dbSize; 03094 rc = writeJournalHdr(pPager); 03095 } 03096 } 03097 assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK ); 03098 return rc; 03099 } 03100 03101 /* 03102 ** Make a page dirty. Set its dirty flag and add it to the dirty 03103 ** page list. 03104 */ 03105 static void makeDirty(PgHdr *pPg){ 03106 sqlite3PcacheMakeDirty(pPg); 03107 } 03108 03109 /* 03110 ** Make a page clean. Clear its dirty bit and remove it from the 03111 ** dirty page list. 03112 */ 03113 static void makeClean(PgHdr *pPg){ 03114 sqlite3PcacheMakeClean(pPg); 03115 } 03116 03117 03118 /* 03119 ** Mark a data page as writeable. The page is written into the journal 03120 ** if it is not there already. This routine must be called before making 03121 ** changes to a page. 03122 ** 03123 ** The first time this routine is called, the pager creates a new 03124 ** journal and acquires a RESERVED lock on the database. If the RESERVED 03125 ** lock could not be acquired, this routine returns SQLITE_BUSY. The 03126 ** calling routine must check for that return value and be careful not to 03127 ** change any page data until this routine returns SQLITE_OK. 03128 ** 03129 ** If the journal file could not be written because the disk is full, 03130 ** then this routine returns SQLITE_FULL and does an immediate rollback. 03131 ** All subsequent write attempts also return SQLITE_FULL until there 03132 ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to 03133 ** reset. 03134 */ 03135 static int pager_write(PgHdr *pPg){ 03136 void *pData = pPg->pData; 03137 Pager *pPager = pPg->pPager; 03138 int rc = SQLITE_OK; 03139 03140 /* Check for errors 03141 */ 03142 if( pPager->errCode ){ 03143 return pPager->errCode; 03144 } 03145 if( pPager->readOnly ){ 03146 return SQLITE_PERM; 03147 } 03148 03149 assert( !pPager->setMaster ); 03150 03151 CHECK_PAGE(pPg); 03152 03153 /* If this page was previously acquired with noContent==1, that means 03154 ** we didn't really read in the content of the page. This can happen 03155 ** (for example) when the page is being moved to the freelist. But 03156 ** now we are (perhaps) moving the page off of the freelist for 03157 ** reuse and we need to know its original content so that content 03158 ** can be stored in the rollback journal. So do the read at this 03159 ** time. 03160 */ 03161 rc = pager_get_content(pPg); 03162 if( rc ){ 03163 return rc; 03164 } 03165 03166 /* Mark the page as dirty. If the page has already been written 03167 ** to the journal then we can return right away. 03168 */ 03169 makeDirty(pPg); 03170 if( (pPg->flags&PGHDR_IN_JOURNAL) 03171 && (pageInStatement(pPg) || pPager->stmtInUse==0) 03172 ){ 03173 pPager->dirtyCache = 1; 03174 pPager->dbModified = 1; 03175 }else{ 03176 03177 /* If we get this far, it means that the page needs to be 03178 ** written to the transaction journal or the ckeckpoint journal 03179 ** or both. 03180 ** 03181 ** First check to see that the transaction journal exists and 03182 ** create it if it does not. 03183 */ 03184 assert( pPager->state!=PAGER_UNLOCK ); 03185 rc = sqlite3PagerBegin(pPg, 0); 03186 if( rc!=SQLITE_OK ){ 03187 return rc; 03188 } 03189 assert( pPager->state>=PAGER_RESERVED ); 03190 if( !pPager->journalOpen && pPager->useJournal 03191 && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ 03192 rc = pager_open_journal(pPager); 03193 if( rc!=SQLITE_OK ) return rc; 03194 } 03195 pPager->dirtyCache = 1; 03196 pPager->dbModified = 1; 03197 03198 /* The transaction journal now exists and we have a RESERVED or an 03199 ** EXCLUSIVE lock on the main database file. Write the current page to 03200 ** the transaction journal if it is not there already. 03201 */ 03202 if( !(pPg->flags&PGHDR_IN_JOURNAL) && pPager->journalOpen ){ 03203 if( (int)pPg->pgno <= pPager->origDbSize ){ 03204 u32 cksum; 03205 char *pData2; 03206 03207 /* We should never write to the journal file the page that 03208 ** contains the database locks. The following assert verifies 03209 ** that we do not. */ 03210 assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); 03211 pData2 = CODEC2(pPager, pData, pPg->pgno, 7); 03212 cksum = pager_cksum(pPager, (u8*)pData2); 03213 rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno); 03214 if( rc==SQLITE_OK ){ 03215 rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, 03216 pPager->journalOff + 4); 03217 pPager->journalOff += pPager->pageSize+4; 03218 } 03219 if( rc==SQLITE_OK ){ 03220 rc = write32bits(pPager->jfd, pPager->journalOff, cksum); 03221 pPager->journalOff += 4; 03222 } 03223 IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 03224 pPager->journalOff, pPager->pageSize)); 03225 PAGER_INCR(sqlite3_pager_writej_count); 03226 PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n", 03227 PAGERID(pPager), pPg->pgno, 03228 ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg)); 03229 03230 /* An error has occured writing to the journal file. The 03231 ** transaction will be rolled back by the layer above. 03232 */ 03233 if( rc!=SQLITE_OK ){ 03234 return rc; 03235 } 03236 03237 pPager->nRec++; 03238 assert( pPager->pInJournal!=0 ); 03239 sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); 03240 if( !pPager->noSync ){ 03241 pPg->flags |= PGHDR_NEED_SYNC; 03242 } 03243 if( pPager->stmtInUse ){ 03244 sqlite3BitvecSet(pPager->pInStmt, pPg->pgno); 03245 } 03246 }else{ 03247 if( !pPager->journalStarted && !pPager->noSync ){ 03248 pPg->flags |= PGHDR_NEED_SYNC; 03249 } 03250 PAGERTRACE4("APPEND %d page %d needSync=%d\n", 03251 PAGERID(pPager), pPg->pgno, 03252 ((pPg->flags&PGHDR_NEED_SYNC)?1:0)); 03253 } 03254 if( pPg->flags&PGHDR_NEED_SYNC ){ 03255 pPager->needSync = 1; 03256 } 03257 pPg->flags |= PGHDR_IN_JOURNAL; 03258 } 03259 03260 /* If the statement journal is open and the page is not in it, 03261 ** then write the current page to the statement journal. Note that 03262 ** the statement journal format differs from the standard journal format 03263 ** in that it omits the checksums and the header. 03264 */ 03265 if( pPager->stmtInUse 03266 && !pageInStatement(pPg) 03267 && (int)pPg->pgno<=pPager->stmtSize 03268 ){ 03269 i64 offset = pPager->stmtNRec*(4+pPager->pageSize); 03270 char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7); 03271 assert( (pPg->flags&PGHDR_IN_JOURNAL) 03272 || (int)pPg->pgno>pPager->origDbSize ); 03273 rc = write32bits(pPager->stfd, offset, pPg->pgno); 03274 if( rc==SQLITE_OK ){ 03275 rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4); 03276 } 03277 PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); 03278 if( rc!=SQLITE_OK ){ 03279 return rc; 03280 } 03281 pPager->stmtNRec++; 03282 assert( pPager->pInStmt!=0 ); 03283 sqlite3BitvecSet(pPager->pInStmt, pPg->pgno); 03284 } 03285 } 03286 03287 /* Update the database size and return. 03288 */ 03289 assert( pPager->state>=PAGER_SHARED ); 03290 if( pPager->dbSize<(int)pPg->pgno ){ 03291 pPager->dbSize = pPg->pgno; 03292 if( pPager->dbSize==PENDING_BYTE/pPager->pageSize ){ 03293 pPager->dbSize++; 03294 } 03295 } 03296 return rc; 03297 } 03298 03299 /* 03300 ** This function is used to mark a data-page as writable. It uses 03301 ** pager_write() to open a journal file (if it is not already open) 03302 ** and write the page *pData to the journal. 03303 ** 03304 ** The difference between this function and pager_write() is that this 03305 ** function also deals with the special case where 2 or more pages 03306 ** fit on a single disk sector. In this case all co-resident pages 03307 ** must have been written to the journal file before returning. 03308 */ 03309 int sqlite3PagerWrite(DbPage *pDbPage){ 03310 int rc = SQLITE_OK; 03311 03312 PgHdr *pPg = pDbPage; 03313 Pager *pPager = pPg->pPager; 03314 Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); 03315 03316 if( nPagePerSector>1 ){ 03317 Pgno nPageCount; /* Total number of pages in database file */ 03318 Pgno pg1; /* First page of the sector pPg is located on. */ 03319 int nPage; /* Number of pages starting at pg1 to journal */ 03320 int ii; 03321 int needSync = 0; 03322 03323 /* Set the doNotSync flag to 1. This is because we cannot allow a journal 03324 ** header to be written between the pages journaled by this function. 03325 */ 03326 assert( !MEMDB ); 03327 assert( pPager->doNotSync==0 ); 03328 pPager->doNotSync = 1; 03329 03330 /* This trick assumes that both the page-size and sector-size are 03331 ** an integer power of 2. It sets variable pg1 to the identifier 03332 ** of the first page of the sector pPg is located on. 03333 */ 03334 pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; 03335 03336 sqlite3PagerPagecount(pPager, (int *)&nPageCount); 03337 if( pPg->pgno>nPageCount ){ 03338 nPage = (pPg->pgno - pg1)+1; 03339 }else if( (pg1+nPagePerSector-1)>nPageCount ){ 03340 nPage = nPageCount+1-pg1; 03341 }else{ 03342 nPage = nPagePerSector; 03343 } 03344 assert(nPage>0); 03345 assert(pg1<=pPg->pgno); 03346 assert((pg1+nPage)>pPg->pgno); 03347 03348 for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){ 03349 Pgno pg = pg1+ii; 03350 PgHdr *pPage; 03351 if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){ 03352 if( pg!=PAGER_MJ_PGNO(pPager) ){ 03353 rc = sqlite3PagerGet(pPager, pg, &pPage); 03354 if( rc==SQLITE_OK ){ 03355 rc = pager_write(pPage); 03356 if( pPage->flags&PGHDR_NEED_SYNC ){ 03357 needSync = 1; 03358 } 03359 sqlite3PagerUnref(pPage); 03360 } 03361 } 03362 }else if( (pPage = pager_lookup(pPager, pg))!=0 ){ 03363 if( pPage->flags&PGHDR_NEED_SYNC ){ 03364 needSync = 1; 03365 } 03366 sqlite3PagerUnref(pPage); 03367 } 03368 } 03369 03370 /* If the PgHdr.needSync flag is set for any of the nPage pages 03371 ** starting at pg1, then it needs to be set for all of them. Because 03372 ** writing to any of these nPage pages may damage the others, the 03373 ** journal file must contain sync()ed copies of all of them 03374 ** before any of them can be written out to the database file. 03375 */ 03376 if( needSync ){ 03377 assert( !MEMDB && pPager->noSync==0 ); 03378 for(ii=0; ii<nPage && needSync; ii++){ 03379 PgHdr *pPage = pager_lookup(pPager, pg1+ii); 03380 if( pPage ) pPage->flags |= PGHDR_NEED_SYNC; 03381 sqlite3PagerUnref(pPage); 03382 } 03383 assert(pPager->needSync); 03384 } 03385 03386 assert( pPager->doNotSync==1 ); 03387 pPager->doNotSync = 0; 03388 }else{ 03389 rc = pager_write(pDbPage); 03390 } 03391 return rc; 03392 } 03393 03394 /* 03395 ** Return TRUE if the page given in the argument was previously passed 03396 ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok 03397 ** to change the content of the page. 03398 */ 03399 #ifndef NDEBUG 03400 int sqlite3PagerIswriteable(DbPage *pPg){ 03401 return pPg->flags&PGHDR_DIRTY; 03402 } 03403 #endif 03404 03405 /* 03406 ** A call to this routine tells the pager that it is not necessary to 03407 ** write the information on page pPg back to the disk, even though 03408 ** that page might be marked as dirty. This happens, for example, when 03409 ** the page has been added as a leaf of the freelist and so its 03410 ** content no longer matters. 03411 ** 03412 ** The overlying software layer calls this routine when all of the data 03413 ** on the given page is unused. The pager marks the page as clean so 03414 ** that it does not get written to disk. 03415 ** 03416 ** Tests show that this optimization, together with the 03417 ** sqlite3PagerDontRollback() below, more than double the speed 03418 ** of large INSERT operations and quadruple the speed of large DELETEs. 03419 ** 03420 ** When this routine is called, set the alwaysRollback flag to true. 03421 ** Subsequent calls to sqlite3PagerDontRollback() for the same page 03422 ** will thereafter be ignored. This is necessary to avoid a problem 03423 ** where a page with data is added to the freelist during one part of 03424 ** a transaction then removed from the freelist during a later part 03425 ** of the same transaction and reused for some other purpose. When it 03426 ** is first added to the freelist, this routine is called. When reused, 03427 ** the sqlite3PagerDontRollback() routine is called. But because the 03428 ** page contains critical data, we still need to be sure it gets 03429 ** rolled back in spite of the sqlite3PagerDontRollback() call. 03430 */ 03431 int sqlite3PagerDontWrite(DbPage *pDbPage){ 03432 PgHdr *pPg = pDbPage; 03433 Pager *pPager = pPg->pPager; 03434 int rc; 03435 03436 if( pPg->pgno>pPager->origDbSize ){ 03437 return SQLITE_OK; 03438 } 03439 if( pPager->pAlwaysRollback==0 ){ 03440 assert( pPager->pInJournal ); 03441 pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->origDbSize); 03442 if( !pPager->pAlwaysRollback ){ 03443 return SQLITE_NOMEM; 03444 } 03445 } 03446 rc = sqlite3BitvecSet(pPager->pAlwaysRollback, pPg->pgno); 03447 03448 if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && !pPager->stmtInUse ){ 03449 assert( pPager->state>=PAGER_SHARED ); 03450 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){ 03451 /* If this pages is the last page in the file and the file has grown 03452 ** during the current transaction, then do NOT mark the page as clean. 03453 ** When the database file grows, we must make sure that the last page 03454 ** gets written at least once so that the disk file will be the correct 03455 ** size. If you do not write this page and the size of the file 03456 ** on the disk ends up being too small, that can lead to database 03457 ** corruption during the next transaction. 03458 */ 03459 }else{ 03460 PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)); 03461 IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) 03462 pPg->flags |= PGHDR_DONT_WRITE; 03463 #ifdef SQLITE_CHECK_PAGES 03464 pPg->pageHash = pager_pagehash(pPg); 03465 #endif 03466 } 03467 } 03468 return rc; 03469 } 03470 03471 /* 03472 ** A call to this routine tells the pager that if a rollback occurs, 03473 ** it is not necessary to restore the data on the given page. This 03474 ** means that the pager does not have to record the given page in the 03475 ** rollback journal. 03476 ** 03477 ** If we have not yet actually read the content of this page (if 03478 ** the PgHdr.needRead flag is set) then this routine acts as a promise 03479 ** that we will never need to read the page content in the future. 03480 ** so the needRead flag can be cleared at this point. 03481 */ 03482 void sqlite3PagerDontRollback(DbPage *pPg){ 03483 Pager *pPager = pPg->pPager; 03484 03485 assert( pPager->state>=PAGER_RESERVED ); 03486 03487 /* If the journal file is not open, or DontWrite() has been called on 03488 ** this page (DontWrite() sets the alwaysRollback flag), then this 03489 ** function is a no-op. 03490 */ 03491 if( pPager->journalOpen==0 03492 || sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno) 03493 || pPg->pgno>pPager->origDbSize 03494 ){ 03495 return; 03496 } 03497 03498 #ifdef SQLITE_SECURE_DELETE 03499 if( (pPg->flags & PGHDR_IN_JOURNAL)!=0 || (int)pPg->pgno>pPager->origDbSize ){ 03500 return; 03501 } 03502 #endif 03503 03504 /* If SECURE_DELETE is disabled, then there is no way that this 03505 ** routine can be called on a page for which sqlite3PagerDontWrite() 03506 ** has not been previously called during the same transaction. 03507 ** And if DontWrite() has previously been called, the following 03508 ** conditions must be met. 03509 ** 03510 ** (Later:) Not true. If the database is corrupted by having duplicate 03511 ** pages on the freelist (ex: corrupt9.test) then the following is not 03512 ** necessarily true: 03513 */ 03514 /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */ 03515 03516 assert( pPager->pInJournal!=0 ); 03517 sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); 03518 pPg->flags |= PGHDR_IN_JOURNAL; 03519 pPg->flags &= ~PGHDR_NEED_READ; 03520 if( pPager->stmtInUse ){ 03521 assert( pPager->stmtSize >= pPager->origDbSize ); 03522 sqlite3BitvecSet(pPager->pInStmt, pPg->pgno); 03523 } 03524 PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager)); 03525 IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno)) 03526 } 03527 03528 03529 /* 03530 ** This routine is called to increment the database file change-counter, 03531 ** stored at byte 24 of the pager file. 03532 */ 03533 static int pager_incr_changecounter(Pager *pPager, int isDirect){ 03534 PgHdr *pPgHdr; 03535 u32 change_counter; 03536 int rc = SQLITE_OK; 03537 03538 #ifndef SQLITE_ENABLE_ATOMIC_WRITE 03539 assert( isDirect==0 ); /* isDirect is only true for atomic writes */ 03540 #endif 03541 if( !pPager->changeCountDone ){ 03542 /* Open page 1 of the file for writing. */ 03543 rc = sqlite3PagerGet(pPager, 1, &pPgHdr); 03544 if( rc!=SQLITE_OK ) return rc; 03545 03546 if( !isDirect ){ 03547 rc = sqlite3PagerWrite(pPgHdr); 03548 if( rc!=SQLITE_OK ){ 03549 sqlite3PagerUnref(pPgHdr); 03550 return rc; 03551 } 03552 } 03553 03554 /* Increment the value just read and write it back to byte 24. */ 03555 change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers); 03556 change_counter++; 03557 put32bits(((char*)pPgHdr->pData)+24, change_counter); 03558 03559 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 03560 if( isDirect && pPager->fd->pMethods ){ 03561 const void *zBuf = pPgHdr->pData; 03562 rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); 03563 } 03564 #endif 03565 03566 /* Release the page reference. */ 03567 sqlite3PagerUnref(pPgHdr); 03568 pPager->changeCountDone = 1; 03569 } 03570 return rc; 03571 } 03572 03573 /* 03574 ** Sync the pager file to disk. 03575 */ 03576 int sqlite3PagerSync(Pager *pPager){ 03577 int rc; 03578 if( MEMDB ){ 03579 rc = SQLITE_OK; 03580 }else{ 03581 rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); 03582 } 03583 return rc; 03584 } 03585 03586 /* 03587 ** Sync the database file for the pager pPager. zMaster points to the name 03588 ** of a master journal file that should be written into the individual 03589 ** journal file. zMaster may be NULL, which is interpreted as no master 03590 ** journal (a single database transaction). 03591 ** 03592 ** This routine ensures that the journal is synced, all dirty pages written 03593 ** to the database file and the database file synced. The only thing that 03594 ** remains to commit the transaction is to delete the journal file (or 03595 ** master journal file if specified). 03596 ** 03597 ** Note that if zMaster==NULL, this does not overwrite a previous value 03598 ** passed to an sqlite3PagerCommitPhaseOne() call. 03599 ** 03600 ** If parameter nTrunc is non-zero, then the pager file is truncated to 03601 ** nTrunc pages (this is used by auto-vacuum databases). 03602 ** 03603 ** If the final parameter - noSync - is true, then the database file itself 03604 ** is not synced. The caller must call sqlite3PagerSync() directly to 03605 ** sync the database file before calling CommitPhaseTwo() to delete the 03606 ** journal file in this case. 03607 */ 03608 int sqlite3PagerCommitPhaseOne( 03609 Pager *pPager, 03610 const char *zMaster, 03611 Pgno nTrunc, 03612 int noSync 03613 ){ 03614 int rc = SQLITE_OK; 03615 03616 if( pPager->errCode ){ 03617 return pPager->errCode; 03618 } 03619 03620 /* If no changes have been made, we can leave the transaction early. 03621 */ 03622 if( pPager->dbModified==0 && 03623 (pPager->journalMode!=PAGER_JOURNALMODE_DELETE || 03624 pPager->exclusiveMode!=0) ){ 03625 assert( pPager->dirtyCache==0 || pPager->journalOpen==0 ); 03626 return SQLITE_OK; 03627 } 03628 03629 PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 03630 pPager->zFilename, zMaster, nTrunc); 03631 03632 /* If this is an in-memory db, or no pages have been written to, or this 03633 ** function has already been called, it is a no-op. 03634 */ 03635 if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){ 03636 PgHdr *pPg; 03637 03638 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 03639 /* The atomic-write optimization can be used if all of the 03640 ** following are true: 03641 ** 03642 ** + The file-system supports the atomic-write property for 03643 ** blocks of size page-size, and 03644 ** + This commit is not part of a multi-file transaction, and 03645 ** + Exactly one page has been modified and store in the journal file. 03646 ** 03647 ** If the optimization can be used, then the journal file will never 03648 ** be created for this transaction. 03649 */ 03650 int useAtomicWrite; 03651 pPg = sqlite3PcacheDirtyList(pPager->pPCache); 03652 useAtomicWrite = ( 03653 !zMaster && 03654 pPager->journalOpen && 03655 pPager->journalOff==jrnlBufferSize(pPager) && 03656 nTrunc==0 && 03657 (pPg==0 || pPg->pDirty==0) 03658 ); 03659 assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF ); 03660 if( useAtomicWrite ){ 03661 /* Update the nRec field in the journal file. */ 03662 int offset = pPager->journalHdr + sizeof(aJournalMagic); 03663 assert(pPager->nRec==1); 03664 rc = write32bits(pPager->jfd, offset, pPager->nRec); 03665 03666 /* Update the db file change counter. The following call will modify 03667 ** the in-memory representation of page 1 to include the updated 03668 ** change counter and then write page 1 directly to the database 03669 ** file. Because of the atomic-write property of the host file-system, 03670 ** this is safe. 03671 */ 03672 if( rc==SQLITE_OK ){ 03673 rc = pager_incr_changecounter(pPager, 1); 03674 } 03675 }else{ 03676 rc = sqlite3JournalCreate(pPager->jfd); 03677 } 03678 03679 if( !useAtomicWrite && rc==SQLITE_OK ) 03680 #endif 03681 03682 /* If a master journal file name has already been written to the 03683 ** journal file, then no sync is required. This happens when it is 03684 ** written, then the process fails to upgrade from a RESERVED to an 03685 ** EXCLUSIVE lock. The next time the process tries to commit the 03686 ** transaction the m-j name will have already been written. 03687 */ 03688 if( !pPager->setMaster ){ 03689 rc = pager_incr_changecounter(pPager, 0); 03690 if( rc!=SQLITE_OK ) goto sync_exit; 03691 if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ 03692 #ifndef SQLITE_OMIT_AUTOVACUUM 03693 if( nTrunc!=0 ){ 03694 /* If this transaction has made the database smaller, then all pages 03695 ** being discarded by the truncation must be written to the journal 03696 ** file. 03697 */ 03698 Pgno i; 03699 int iSkip = PAGER_MJ_PGNO(pPager); 03700 for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){ 03701 if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ 03702 rc = sqlite3PagerGet(pPager, i, &pPg); 03703 if( rc!=SQLITE_OK ) goto sync_exit; 03704 rc = sqlite3PagerWrite(pPg); 03705 sqlite3PagerUnref(pPg); 03706 if( rc!=SQLITE_OK ) goto sync_exit; 03707 } 03708 } 03709 } 03710 #endif 03711 rc = writeMasterJournal(pPager, zMaster); 03712 if( rc!=SQLITE_OK ) goto sync_exit; 03713 rc = syncJournal(pPager); 03714 } 03715 } 03716 if( rc!=SQLITE_OK ) goto sync_exit; 03717 03718 #ifndef SQLITE_OMIT_AUTOVACUUM 03719 if( nTrunc!=0 ){ 03720 rc = sqlite3PagerTruncate(pPager, nTrunc); 03721 if( rc!=SQLITE_OK ) goto sync_exit; 03722 } 03723 #endif 03724 03725 /* Write all dirty pages to the database file */ 03726 pPg = sqlite3PcacheDirtyList(pPager->pPCache); 03727 rc = pager_write_pagelist(pPg); 03728 if( rc!=SQLITE_OK ){ 03729 assert( rc!=SQLITE_IOERR_BLOCKED ); 03730 /* The error might have left the dirty list all fouled up here, 03731 ** but that does not matter because if the if the dirty list did 03732 ** get corrupted, then the transaction will roll back and 03733 ** discard the dirty list. There is an assert in 03734 ** pager_get_all_dirty_pages() that verifies that no attempt 03735 ** is made to use an invalid dirty list. 03736 */ 03737 goto sync_exit; 03738 } 03739 sqlite3PcacheCleanAll(pPager->pPCache); 03740 03741 /* Sync the database file. */ 03742 if( !pPager->noSync && !noSync ){ 03743 rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); 03744 } 03745 IOTRACE(("DBSYNC %p\n", pPager)) 03746 03747 pPager->state = PAGER_SYNCED; 03748 }else if( MEMDB && nTrunc!=0 ){ 03749 rc = sqlite3PagerTruncate(pPager, nTrunc); 03750 } 03751 03752 sync_exit: 03753 if( rc==SQLITE_IOERR_BLOCKED ){ 03754 /* pager_incr_changecounter() may attempt to obtain an exclusive 03755 * lock to spill the cache and return IOERR_BLOCKED. But since 03756 * there is no chance the cache is inconsistent, it is 03757 * better to return SQLITE_BUSY. 03758 */ 03759 rc = SQLITE_BUSY; 03760 } 03761 return rc; 03762 } 03763 03764 03765 /* 03766 ** Commit all changes to the database and release the write lock. 03767 ** 03768 ** If the commit fails for any reason, a rollback attempt is made 03769 ** and an error code is returned. If the commit worked, SQLITE_OK 03770 ** is returned. 03771 */ 03772 int sqlite3PagerCommitPhaseTwo(Pager *pPager){ 03773 int rc = SQLITE_OK; 03774 03775 if( pPager->errCode ){ 03776 return pPager->errCode; 03777 } 03778 if( pPager->state<PAGER_RESERVED ){ 03779 return SQLITE_ERROR; 03780 } 03781 if( pPager->dbModified==0 && 03782 (pPager->journalMode!=PAGER_JOURNALMODE_DELETE || 03783 pPager->exclusiveMode!=0) ){ 03784 assert( pPager->dirtyCache==0 || pPager->journalOpen==0 ); 03785 return SQLITE_OK; 03786 } 03787 PAGERTRACE2("COMMIT %d\n", PAGERID(pPager)); 03788 assert( pPager->state==PAGER_SYNCED || MEMDB || !pPager->dirtyCache ); 03789 rc = pager_end_transaction(pPager, pPager->setMaster); 03790 rc = pager_error(pPager, rc); 03791 return rc; 03792 } 03793 03794 /* 03795 ** Rollback all changes. The database falls back to PAGER_SHARED mode. 03796 ** All in-memory cache pages revert to their original data contents. 03797 ** The journal is deleted. 03798 ** 03799 ** This routine cannot fail unless some other process is not following 03800 ** the correct locking protocol or unless some other 03801 ** process is writing trash into the journal file (SQLITE_CORRUPT) or 03802 ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error 03803 ** codes are returned for all these occasions. Otherwise, 03804 ** SQLITE_OK is returned. 03805 */ 03806 int sqlite3PagerRollback(Pager *pPager){ 03807 int rc = SQLITE_OK; 03808 PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager)); 03809 if( !pPager->dirtyCache || !pPager->journalOpen ){ 03810 rc = pager_end_transaction(pPager, pPager->setMaster); 03811 }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ 03812 if( pPager->state>=PAGER_EXCLUSIVE ){ 03813 pager_playback(pPager, 0); 03814 } 03815 rc = pPager->errCode; 03816 }else{ 03817 if( pPager->state==PAGER_RESERVED ){ 03818 int rc2; 03819 rc = pager_playback(pPager, 0); 03820 rc2 = pager_end_transaction(pPager, pPager->setMaster); 03821 if( rc==SQLITE_OK ){ 03822 rc = rc2; 03823 } 03824 }else{ 03825 rc = pager_playback(pPager, 0); 03826 } 03827 03828 if( !MEMDB ){ 03829 pPager->dbSize = -1; 03830 } 03831 03832 /* If an error occurs during a ROLLBACK, we can no longer trust the pager 03833 ** cache. So call pager_error() on the way out to make any error 03834 ** persistent. 03835 */ 03836 rc = pager_error(pPager, rc); 03837 } 03838 return rc; 03839 } 03840 03841 /* 03842 ** Return TRUE if the database file is opened read-only. Return FALSE 03843 ** if the database is (in theory) writable. 03844 */ 03845 int sqlite3PagerIsreadonly(Pager *pPager){ 03846 return pPager->readOnly; 03847 } 03848 03849 /* 03850 ** Return the number of references to the pager. 03851 */ 03852 int sqlite3PagerRefcount(Pager *pPager){ 03853 return sqlite3PcacheRefCount(pPager->pPCache); 03854 } 03855 03856 /* 03857 ** Return the number of references to the specified page. 03858 */ 03859 int sqlite3PagerPageRefcount(DbPage *pPage){ 03860 return sqlite3PcachePageRefcount(pPage); 03861 } 03862 03863 #ifdef SQLITE_TEST 03864 /* 03865 ** This routine is used for testing and analysis only. 03866 */ 03867 int *sqlite3PagerStats(Pager *pPager){ 03868 static int a[11]; 03869 a[0] = sqlite3PcacheRefCount(pPager->pPCache); 03870 a[1] = sqlite3PcachePagecount(pPager->pPCache); 03871 a[2] = sqlite3PcacheGetCachesize(pPager->pPCache); 03872 a[3] = pPager->dbSize; 03873 a[4] = pPager->state; 03874 a[5] = pPager->errCode; 03875 a[6] = pPager->nHit; 03876 a[7] = pPager->nMiss; 03877 a[8] = 0; /* Used to be pPager->nOvfl */ 03878 a[9] = pPager->nRead; 03879 a[10] = pPager->nWrite; 03880 return a; 03881 } 03882 int sqlite3PagerIsMemdb(Pager *pPager){ 03883 return MEMDB; 03884 } 03885 #endif 03886 03887 /* 03888 ** Set the statement rollback point. 03889 ** 03890 ** This routine should be called with the transaction journal already 03891 ** open. A new statement journal is created that can be used to rollback 03892 ** changes of a single SQL command within a larger transaction. 03893 */ 03894 static int pagerStmtBegin(Pager *pPager){ 03895 int rc; 03896 assert( !pPager->stmtInUse ); 03897 assert( pPager->state>=PAGER_SHARED ); 03898 assert( pPager->dbSize>=0 ); 03899 PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager)); 03900 if( !pPager->journalOpen ){ 03901 pPager->stmtAutoopen = 1; 03902 return SQLITE_OK; 03903 } 03904 assert( pPager->journalOpen ); 03905 assert( pPager->pInStmt==0 ); 03906 pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize); 03907 if( pPager->pInStmt==0 ){ 03908 /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */ 03909 return SQLITE_NOMEM; 03910 } 03911 pPager->stmtJSize = pPager->journalOff; 03912 pPager->stmtSize = pPager->dbSize; 03913 pPager->stmtHdrOff = 0; 03914 pPager->stmtCksum = pPager->cksumInit; 03915 if( !pPager->stmtOpen ){ 03916 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ 03917 sqlite3MemJournalOpen(pPager->stfd); 03918 }else{ 03919 rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL); 03920 if( rc ){ 03921 goto stmt_begin_failed; 03922 } 03923 } 03924 pPager->stmtOpen = 1; 03925 pPager->stmtNRec = 0; 03926 } 03927 pPager->stmtInUse = 1; 03928 return SQLITE_OK; 03929 03930 stmt_begin_failed: 03931 if( pPager->pInStmt ){ 03932 sqlite3BitvecDestroy(pPager->pInStmt); 03933 pPager->pInStmt = 0; 03934 } 03935 return rc; 03936 } 03937 int sqlite3PagerStmtBegin(Pager *pPager){ 03938 int rc; 03939 rc = pagerStmtBegin(pPager); 03940 return rc; 03941 } 03942 03943 /* 03944 ** Commit a statement. 03945 */ 03946 int sqlite3PagerStmtCommit(Pager *pPager){ 03947 if( pPager->stmtInUse ){ 03948 PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager)); 03949 sqlite3BitvecDestroy(pPager->pInStmt); 03950 pPager->pInStmt = 0; 03951 pPager->stmtNRec = 0; 03952 pPager->stmtInUse = 0; 03953 if( sqlite3IsMemJournal(pPager->stfd) ){ 03954 sqlite3OsTruncate(pPager->stfd, 0); 03955 } 03956 } 03957 pPager->stmtAutoopen = 0; 03958 return SQLITE_OK; 03959 } 03960 03961 /* 03962 ** Rollback a statement. 03963 */ 03964 int sqlite3PagerStmtRollback(Pager *pPager){ 03965 int rc; 03966 if( pPager->stmtInUse ){ 03967 PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager)); 03968 rc = pager_stmt_playback(pPager); 03969 sqlite3PagerStmtCommit(pPager); 03970 }else{ 03971 rc = SQLITE_OK; 03972 } 03973 pPager->stmtAutoopen = 0; 03974 return rc; 03975 } 03976 03977 /* 03978 ** Return the full pathname of the database file. 03979 */ 03980 const char *sqlite3PagerFilename(Pager *pPager){ 03981 return pPager->zFilename; 03982 } 03983 03984 /* 03985 ** Return the VFS structure for the pager. 03986 */ 03987 const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ 03988 return pPager->pVfs; 03989 } 03990 03991 /* 03992 ** Return the file handle for the database file associated 03993 ** with the pager. This might return NULL if the file has 03994 ** not yet been opened. 03995 */ 03996 sqlite3_file *sqlite3PagerFile(Pager *pPager){ 03997 return pPager->fd; 03998 } 03999 04000 /* 04001 ** Return the directory of the database file. 04002 */ 04003 const char *sqlite3PagerDirname(Pager *pPager){ 04004 return pPager->zDirectory; 04005 } 04006 04007 /* 04008 ** Return the full pathname of the journal file. 04009 */ 04010 const char *sqlite3PagerJournalname(Pager *pPager){ 04011 return pPager->zJournal; 04012 } 04013 04014 /* 04015 ** Return true if fsync() calls are disabled for this pager. Return FALSE 04016 ** if fsync()s are executed normally. 04017 */ 04018 int sqlite3PagerNosync(Pager *pPager){ 04019 return pPager->noSync; 04020 } 04021 04022 #ifdef SQLITE_HAS_CODEC 04023 /* 04024 ** Set the codec for this pager 04025 */ 04026 void sqlite3PagerSetCodec( 04027 Pager *pPager, 04028 void *(*xCodec)(void*,void*,Pgno,int), 04029 void *pCodecArg 04030 ){ 04031 pPager->xCodec = xCodec; 04032 pPager->pCodecArg = pCodecArg; 04033 } 04034 #endif 04035 04036 #ifndef SQLITE_OMIT_AUTOVACUUM 04037 /* 04038 ** Move the page pPg to location pgno in the file. 04039 ** 04040 ** There must be no references to the page previously located at 04041 ** pgno (which we call pPgOld) though that page is allowed to be 04042 ** in cache. If the page previously located at pgno is not already 04043 ** in the rollback journal, it is not put there by by this routine. 04044 ** 04045 ** References to the page pPg remain valid. Updating any 04046 ** meta-data associated with pPg (i.e. data stored in the nExtra bytes 04047 ** allocated along with the page) is the responsibility of the caller. 04048 ** 04049 ** A transaction must be active when this routine is called. It used to be 04050 ** required that a statement transaction was not active, but this restriction 04051 ** has been removed (CREATE INDEX needs to move a page when a statement 04052 ** transaction is active). 04053 ** 04054 ** If the fourth argument, isCommit, is non-zero, then this page is being 04055 ** moved as part of a database reorganization just before the transaction 04056 ** is being committed. In this case, it is guaranteed that the database page 04057 ** pPg refers to will not be written to again within this transaction. 04058 */ 04059 int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ 04060 PgHdr *pPgOld; /* The page being overwritten. */ 04061 Pgno needSyncPgno = 0; 04062 04063 assert( pPg->nRef>0 ); 04064 04065 PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 04066 PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno); 04067 IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) 04068 04069 pager_get_content(pPg); 04070 04071 /* If the journal needs to be sync()ed before page pPg->pgno can 04072 ** be written to, store pPg->pgno in local variable needSyncPgno. 04073 ** 04074 ** If the isCommit flag is set, there is no need to remember that 04075 ** the journal needs to be sync()ed before database page pPg->pgno 04076 ** can be written to. The caller has already promised not to write to it. 04077 */ 04078 if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){ 04079 needSyncPgno = pPg->pgno; 04080 assert( (pPg->flags&PGHDR_IN_JOURNAL) || (int)pgno>pPager->origDbSize ); 04081 assert( pPg->flags&PGHDR_DIRTY ); 04082 assert( pPager->needSync ); 04083 } 04084 04085 /* If the cache contains a page with page-number pgno, remove it 04086 ** from its hash chain. Also, if the PgHdr.needSync was set for 04087 ** page pgno before the 'move' operation, it needs to be retained 04088 ** for the page moved there. 04089 */ 04090 pPg->flags &= ~(PGHDR_NEED_SYNC|PGHDR_IN_JOURNAL); 04091 pPgOld = pager_lookup(pPager, pgno); 04092 assert( !pPgOld || pPgOld->nRef==1 ); 04093 if( pPgOld ){ 04094 pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC); 04095 } 04096 if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){ 04097 pPg->flags |= PGHDR_IN_JOURNAL; 04098 } 04099 04100 sqlite3PcacheMove(pPg, pgno); 04101 if( pPgOld ){ 04102 sqlite3PcacheMove(pPgOld, 0); 04103 sqlite3PcacheRelease(pPgOld); 04104 } 04105 04106 makeDirty(pPg); 04107 pPager->dirtyCache = 1; 04108 pPager->dbModified = 1; 04109 04110 if( needSyncPgno ){ 04111 /* If needSyncPgno is non-zero, then the journal file needs to be 04112 ** sync()ed before any data is written to database file page needSyncPgno. 04113 ** Currently, no such page exists in the page-cache and the 04114 ** "is journaled" bitvec flag has been set. This needs to be remedied by 04115 ** loading the page into the pager-cache and setting the PgHdr.needSync 04116 ** flag. 04117 ** 04118 ** If the attempt to load the page into the page-cache fails, (due 04119 ** to a malloc() or IO failure), clear the bit in the pInJournal[] 04120 ** array. Otherwise, if the page is loaded and written again in 04121 ** this transaction, it may be written to the database file before 04122 ** it is synced into the journal file. This way, it may end up in 04123 ** the journal file twice, but that is not a problem. 04124 ** 04125 ** The sqlite3PagerGet() call may cause the journal to sync. So make 04126 ** sure the Pager.needSync flag is set too. 04127 */ 04128 int rc; 04129 PgHdr *pPgHdr; 04130 assert( pPager->needSync ); 04131 rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr); 04132 if( rc!=SQLITE_OK ){ 04133 if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){ 04134 sqlite3BitvecClear(pPager->pInJournal, needSyncPgno); 04135 } 04136 return rc; 04137 } 04138 pPager->needSync = 1; 04139 assert( pPager->noSync==0 && !MEMDB ); 04140 pPgHdr->flags |= PGHDR_NEED_SYNC; 04141 pPgHdr->flags |= PGHDR_IN_JOURNAL; 04142 makeDirty(pPgHdr); 04143 sqlite3PagerUnref(pPgHdr); 04144 } 04145 04146 return SQLITE_OK; 04147 } 04148 #endif 04149 04150 /* 04151 ** Return a pointer to the data for the specified page. 04152 */ 04153 void *sqlite3PagerGetData(DbPage *pPg){ 04154 assert( pPg->nRef>0 || pPg->pPager->memDb ); 04155 return pPg->pData; 04156 } 04157 04158 /* 04159 ** Return a pointer to the Pager.nExtra bytes of "extra" space 04160 ** allocated along with the specified page. 04161 */ 04162 void *sqlite3PagerGetExtra(DbPage *pPg){ 04163 Pager *pPager = pPg->pPager; 04164 return (pPager?pPg->pExtra:0); 04165 } 04166 04167 /* 04168 ** Get/set the locking-mode for this pager. Parameter eMode must be one 04169 ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 04170 ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then 04171 ** the locking-mode is set to the value specified. 04172 ** 04173 ** The returned value is either PAGER_LOCKINGMODE_NORMAL or 04174 ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) 04175 ** locking-mode. 04176 */ 04177 int sqlite3PagerLockingMode(Pager *pPager, int eMode){ 04178 assert( eMode==PAGER_LOCKINGMODE_QUERY 04179 || eMode==PAGER_LOCKINGMODE_NORMAL 04180 || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); 04181 assert( PAGER_LOCKINGMODE_QUERY<0 ); 04182 assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); 04183 if( eMode>=0 && !pPager->tempFile ){ 04184 pPager->exclusiveMode = eMode; 04185 } 04186 return (int)pPager->exclusiveMode; 04187 } 04188 04189 /* 04190 ** Get/set the journal-mode for this pager. Parameter eMode must be one of: 04191 ** 04192 ** PAGER_JOURNALMODE_QUERY 04193 ** PAGER_JOURNALMODE_DELETE 04194 ** PAGER_JOURNALMODE_TRUNCATE 04195 ** PAGER_JOURNALMODE_PERSIST 04196 ** PAGER_JOURNALMODE_OFF 04197 ** 04198 ** If the parameter is not _QUERY, then the journal-mode is set to the 04199 ** value specified. 04200 ** 04201 ** The returned indicate the current (possibly updated) 04202 ** journal-mode. 04203 */ 04204 int sqlite3PagerJournalMode(Pager *pPager, int eMode){ 04205 if( !MEMDB ){ 04206 assert( eMode==PAGER_JOURNALMODE_QUERY 04207 || eMode==PAGER_JOURNALMODE_DELETE 04208 || eMode==PAGER_JOURNALMODE_TRUNCATE 04209 || eMode==PAGER_JOURNALMODE_PERSIST 04210 || eMode==PAGER_JOURNALMODE_OFF 04211 || eMode==PAGER_JOURNALMODE_MEMORY ); 04212 assert( PAGER_JOURNALMODE_QUERY<0 ); 04213 if( eMode>=0 ){ 04214 pPager->journalMode = eMode; 04215 }else{ 04216 assert( eMode==PAGER_JOURNALMODE_QUERY ); 04217 } 04218 } 04219 return (int)pPager->journalMode; 04220 } 04221 04222 /* 04223 ** Get/set the size-limit used for persistent journal files. 04224 */ 04225 i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){ 04226 if( iLimit>=-1 ){ 04227 pPager->journalSizeLimit = iLimit; 04228 } 04229 return pPager->journalSizeLimit; 04230 } 04231 04232 #endif /* SQLITE_OMIT_DISKIO */
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:55 2011 by Doxygen 1.6.1