00001 /* 00002 ** 2004 April 6 00003 ** 00004 ** The author disclaims copyright to this source code. In place of 00005 ** a legal notice, here is a blessing: 00006 ** 00007 ** May you do good and not evil. 00008 ** May you find forgiveness for yourself and forgive others. 00009 ** May you share freely, never taking more than you give. 00010 ** 00011 ************************************************************************* 00012 ** $Id: btree.c,v 1.533 2008/11/12 08:49:52 danielk1977 Exp $ 00013 ** 00014 ** This file implements a external (disk-based) database using BTrees. 00015 ** See the header comment on "btreeInt.h" for additional information. 00016 ** Including a description of file format and an overview of operation. 00017 */ 00018 #include "btreeInt.h" 00019 00020 /* 00021 ** The header string that appears at the beginning of every 00022 ** SQLite database. 00023 */ 00024 static const char zMagicHeader[] = SQLITE_FILE_HEADER; 00025 00026 /* 00027 ** Set this global variable to 1 to enable tracing using the TRACE 00028 ** macro. 00029 */ 00030 #if 0 00031 int sqlite3BtreeTrace=0; /* True to enable tracing */ 00032 # define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);} 00033 #else 00034 # define TRACE(X) 00035 #endif 00036 00037 /* 00038 ** Sometimes we need a small amount of code such as a variable initialization 00039 ** to setup for a later assert() statement. We do not want this code to 00040 ** appear when assert() is disabled. The following macro is therefore 00041 ** used to contain that setup code. The "VVA" acronym stands for 00042 ** "Verification, Validation, and Accreditation". In other words, the 00043 ** code within VVA_ONLY() will only run during verification processes. 00044 */ 00045 #ifndef NDEBUG 00046 # define VVA_ONLY(X) X 00047 #else 00048 # define VVA_ONLY(X) 00049 #endif 00050 00051 00052 00053 #ifndef SQLITE_OMIT_SHARED_CACHE 00054 /* 00055 ** A list of BtShared objects that are eligible for participation 00056 ** in shared cache. This variable has file scope during normal builds, 00057 ** but the test harness needs to access it so we make it global for 00058 ** test builds. 00059 */ 00060 #ifdef SQLITE_TEST 00061 BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; 00062 #else 00063 static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; 00064 #endif 00065 #endif /* SQLITE_OMIT_SHARED_CACHE */ 00066 00067 #ifndef SQLITE_OMIT_SHARED_CACHE 00068 /* 00069 ** Enable or disable the shared pager and schema features. 00070 ** 00071 ** This routine has no effect on existing database connections. 00072 ** The shared cache setting effects only future calls to 00073 ** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2(). 00074 */ 00075 int sqlite3_enable_shared_cache(int enable){ 00076 sqlite3GlobalConfig.sharedCacheEnabled = enable; 00077 return SQLITE_OK; 00078 } 00079 #endif 00080 00081 00082 /* 00083 ** Forward declaration 00084 */ 00085 static int checkReadLocks(Btree*, Pgno, BtCursor*, i64); 00086 00087 00088 #ifdef SQLITE_OMIT_SHARED_CACHE 00089 /* 00090 ** The functions queryTableLock(), lockTable() and unlockAllTables() 00091 ** manipulate entries in the BtShared.pLock linked list used to store 00092 ** shared-cache table level locks. If the library is compiled with the 00093 ** shared-cache feature disabled, then there is only ever one user 00094 ** of each BtShared structure and so this locking is not necessary. 00095 ** So define the lock related functions as no-ops. 00096 */ 00097 #define queryTableLock(a,b,c) SQLITE_OK 00098 #define lockTable(a,b,c) SQLITE_OK 00099 #define unlockAllTables(a) 00100 #endif 00101 00102 #ifndef SQLITE_OMIT_SHARED_CACHE 00103 /* 00104 ** Query to see if btree handle p may obtain a lock of type eLock 00105 ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return 00106 ** SQLITE_OK if the lock may be obtained (by calling lockTable()), or 00107 ** SQLITE_LOCKED if not. 00108 */ 00109 static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){ 00110 BtShared *pBt = p->pBt; 00111 BtLock *pIter; 00112 00113 assert( sqlite3BtreeHoldsMutex(p) ); 00114 assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); 00115 assert( p->db!=0 ); 00116 00117 /* This is a no-op if the shared-cache is not enabled */ 00118 if( !p->sharable ){ 00119 return SQLITE_OK; 00120 } 00121 00122 /* If some other connection is holding an exclusive lock, the 00123 ** requested lock may not be obtained. 00124 */ 00125 if( pBt->pExclusive && pBt->pExclusive!=p ){ 00126 return SQLITE_LOCKED; 00127 } 00128 00129 /* This (along with lockTable()) is where the ReadUncommitted flag is 00130 ** dealt with. If the caller is querying for a read-lock and the flag is 00131 ** set, it is unconditionally granted - even if there are write-locks 00132 ** on the table. If a write-lock is requested, the ReadUncommitted flag 00133 ** is not considered. 00134 ** 00135 ** In function lockTable(), if a read-lock is demanded and the 00136 ** ReadUncommitted flag is set, no entry is added to the locks list 00137 ** (BtShared.pLock). 00138 ** 00139 ** To summarize: If the ReadUncommitted flag is set, then read cursors do 00140 ** not create or respect table locks. The locking procedure for a 00141 ** write-cursor does not change. 00142 */ 00143 if( 00144 0==(p->db->flags&SQLITE_ReadUncommitted) || 00145 eLock==WRITE_LOCK || 00146 iTab==MASTER_ROOT 00147 ){ 00148 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ 00149 if( pIter->pBtree!=p && pIter->iTable==iTab && 00150 (pIter->eLock!=eLock || eLock!=READ_LOCK) ){ 00151 return SQLITE_LOCKED; 00152 } 00153 } 00154 } 00155 return SQLITE_OK; 00156 } 00157 #endif /* !SQLITE_OMIT_SHARED_CACHE */ 00158 00159 #ifndef SQLITE_OMIT_SHARED_CACHE 00160 /* 00161 ** Add a lock on the table with root-page iTable to the shared-btree used 00162 ** by Btree handle p. Parameter eLock must be either READ_LOCK or 00163 ** WRITE_LOCK. 00164 ** 00165 ** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and 00166 ** SQLITE_NOMEM may also be returned. 00167 */ 00168 static int lockTable(Btree *p, Pgno iTable, u8 eLock){ 00169 BtShared *pBt = p->pBt; 00170 BtLock *pLock = 0; 00171 BtLock *pIter; 00172 00173 assert( sqlite3BtreeHoldsMutex(p) ); 00174 assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); 00175 assert( p->db!=0 ); 00176 00177 /* This is a no-op if the shared-cache is not enabled */ 00178 if( !p->sharable ){ 00179 return SQLITE_OK; 00180 } 00181 00182 assert( SQLITE_OK==queryTableLock(p, iTable, eLock) ); 00183 00184 /* If the read-uncommitted flag is set and a read-lock is requested, 00185 ** return early without adding an entry to the BtShared.pLock list. See 00186 ** comment in function queryTableLock() for more info on handling 00187 ** the ReadUncommitted flag. 00188 */ 00189 if( 00190 (p->db->flags&SQLITE_ReadUncommitted) && 00191 (eLock==READ_LOCK) && 00192 iTable!=MASTER_ROOT 00193 ){ 00194 return SQLITE_OK; 00195 } 00196 00197 /* First search the list for an existing lock on this table. */ 00198 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ 00199 if( pIter->iTable==iTable && pIter->pBtree==p ){ 00200 pLock = pIter; 00201 break; 00202 } 00203 } 00204 00205 /* If the above search did not find a BtLock struct associating Btree p 00206 ** with table iTable, allocate one and link it into the list. 00207 */ 00208 if( !pLock ){ 00209 pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); 00210 if( !pLock ){ 00211 return SQLITE_NOMEM; 00212 } 00213 pLock->iTable = iTable; 00214 pLock->pBtree = p; 00215 pLock->pNext = pBt->pLock; 00216 pBt->pLock = pLock; 00217 } 00218 00219 /* Set the BtLock.eLock variable to the maximum of the current lock 00220 ** and the requested lock. This means if a write-lock was already held 00221 ** and a read-lock requested, we don't incorrectly downgrade the lock. 00222 */ 00223 assert( WRITE_LOCK>READ_LOCK ); 00224 if( eLock>pLock->eLock ){ 00225 pLock->eLock = eLock; 00226 } 00227 00228 return SQLITE_OK; 00229 } 00230 #endif /* !SQLITE_OMIT_SHARED_CACHE */ 00231 00232 #ifndef SQLITE_OMIT_SHARED_CACHE 00233 /* 00234 ** Release all the table locks (locks obtained via calls to the lockTable() 00235 ** procedure) held by Btree handle p. 00236 */ 00237 static void unlockAllTables(Btree *p){ 00238 BtShared *pBt = p->pBt; 00239 BtLock **ppIter = &pBt->pLock; 00240 00241 assert( sqlite3BtreeHoldsMutex(p) ); 00242 assert( p->sharable || 0==*ppIter ); 00243 00244 while( *ppIter ){ 00245 BtLock *pLock = *ppIter; 00246 assert( pBt->pExclusive==0 || pBt->pExclusive==pLock->pBtree ); 00247 if( pLock->pBtree==p ){ 00248 *ppIter = pLock->pNext; 00249 sqlite3_free(pLock); 00250 }else{ 00251 ppIter = &pLock->pNext; 00252 } 00253 } 00254 00255 if( pBt->pExclusive==p ){ 00256 pBt->pExclusive = 0; 00257 } 00258 } 00259 #endif /* SQLITE_OMIT_SHARED_CACHE */ 00260 00261 static void releasePage(MemPage *pPage); /* Forward reference */ 00262 00263 /* 00264 ** Verify that the cursor holds a mutex on the BtShared 00265 */ 00266 #ifndef NDEBUG 00267 static int cursorHoldsMutex(BtCursor *p){ 00268 return sqlite3_mutex_held(p->pBt->mutex); 00269 } 00270 #endif 00271 00272 00273 #ifndef SQLITE_OMIT_INCRBLOB 00274 /* 00275 ** Invalidate the overflow page-list cache for cursor pCur, if any. 00276 */ 00277 static void invalidateOverflowCache(BtCursor *pCur){ 00278 assert( cursorHoldsMutex(pCur) ); 00279 sqlite3_free(pCur->aOverflow); 00280 pCur->aOverflow = 0; 00281 } 00282 00283 /* 00284 ** Invalidate the overflow page-list cache for all cursors opened 00285 ** on the shared btree structure pBt. 00286 */ 00287 static void invalidateAllOverflowCache(BtShared *pBt){ 00288 BtCursor *p; 00289 assert( sqlite3_mutex_held(pBt->mutex) ); 00290 for(p=pBt->pCursor; p; p=p->pNext){ 00291 invalidateOverflowCache(p); 00292 } 00293 } 00294 #else 00295 #define invalidateOverflowCache(x) 00296 #define invalidateAllOverflowCache(x) 00297 #endif 00298 00299 /* 00300 ** Save the current cursor position in the variables BtCursor.nKey 00301 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. 00302 */ 00303 static int saveCursorPosition(BtCursor *pCur){ 00304 int rc; 00305 00306 assert( CURSOR_VALID==pCur->eState ); 00307 assert( 0==pCur->pKey ); 00308 assert( cursorHoldsMutex(pCur) ); 00309 00310 rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); 00311 00312 /* If this is an intKey table, then the above call to BtreeKeySize() 00313 ** stores the integer key in pCur->nKey. In this case this value is 00314 ** all that is required. Otherwise, if pCur is not open on an intKey 00315 ** table, then malloc space for and store the pCur->nKey bytes of key 00316 ** data. 00317 */ 00318 if( rc==SQLITE_OK && 0==pCur->apPage[0]->intKey){ 00319 void *pKey = sqlite3Malloc(pCur->nKey); 00320 if( pKey ){ 00321 rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey); 00322 if( rc==SQLITE_OK ){ 00323 pCur->pKey = pKey; 00324 }else{ 00325 sqlite3_free(pKey); 00326 } 00327 }else{ 00328 rc = SQLITE_NOMEM; 00329 } 00330 } 00331 assert( !pCur->apPage[0]->intKey || !pCur->pKey ); 00332 00333 if( rc==SQLITE_OK ){ 00334 int i; 00335 for(i=0; i<=pCur->iPage; i++){ 00336 releasePage(pCur->apPage[i]); 00337 pCur->apPage[i] = 0; 00338 } 00339 pCur->iPage = -1; 00340 pCur->eState = CURSOR_REQUIRESEEK; 00341 } 00342 00343 invalidateOverflowCache(pCur); 00344 return rc; 00345 } 00346 00347 /* 00348 ** Save the positions of all cursors except pExcept open on the table 00349 ** with root-page iRoot. Usually, this is called just before cursor 00350 ** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()). 00351 */ 00352 static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ 00353 BtCursor *p; 00354 assert( sqlite3_mutex_held(pBt->mutex) ); 00355 assert( pExcept==0 || pExcept->pBt==pBt ); 00356 for(p=pBt->pCursor; p; p=p->pNext){ 00357 if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) && 00358 p->eState==CURSOR_VALID ){ 00359 int rc = saveCursorPosition(p); 00360 if( SQLITE_OK!=rc ){ 00361 return rc; 00362 } 00363 } 00364 } 00365 return SQLITE_OK; 00366 } 00367 00368 /* 00369 ** Clear the current cursor position. 00370 */ 00371 void sqlite3BtreeClearCursor(BtCursor *pCur){ 00372 assert( cursorHoldsMutex(pCur) ); 00373 sqlite3_free(pCur->pKey); 00374 pCur->pKey = 0; 00375 pCur->eState = CURSOR_INVALID; 00376 } 00377 00378 /* 00379 ** Restore the cursor to the position it was in (or as close to as possible) 00380 ** when saveCursorPosition() was called. Note that this call deletes the 00381 ** saved position info stored by saveCursorPosition(), so there can be 00382 ** at most one effective restoreCursorPosition() call after each 00383 ** saveCursorPosition(). 00384 */ 00385 int sqlite3BtreeRestoreCursorPosition(BtCursor *pCur){ 00386 int rc; 00387 assert( cursorHoldsMutex(pCur) ); 00388 assert( pCur->eState>=CURSOR_REQUIRESEEK ); 00389 if( pCur->eState==CURSOR_FAULT ){ 00390 return pCur->skip; 00391 } 00392 pCur->eState = CURSOR_INVALID; 00393 rc = sqlite3BtreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skip); 00394 if( rc==SQLITE_OK ){ 00395 sqlite3_free(pCur->pKey); 00396 pCur->pKey = 0; 00397 assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); 00398 } 00399 return rc; 00400 } 00401 00402 #define restoreCursorPosition(p) \ 00403 (p->eState>=CURSOR_REQUIRESEEK ? \ 00404 sqlite3BtreeRestoreCursorPosition(p) : \ 00405 SQLITE_OK) 00406 00407 /* 00408 ** Determine whether or not a cursor has moved from the position it 00409 ** was last placed at. Cursors can move when the row they are pointing 00410 ** at is deleted out from under them. 00411 ** 00412 ** This routine returns an error code if something goes wrong. The 00413 ** integer *pHasMoved is set to one if the cursor has moved and 0 if not. 00414 */ 00415 int sqlite3BtreeCursorHasMoved(BtCursor *pCur, int *pHasMoved){ 00416 int rc; 00417 00418 rc = restoreCursorPosition(pCur); 00419 if( rc ){ 00420 *pHasMoved = 1; 00421 return rc; 00422 } 00423 if( pCur->eState!=CURSOR_VALID || pCur->skip!=0 ){ 00424 *pHasMoved = 1; 00425 }else{ 00426 *pHasMoved = 0; 00427 } 00428 return SQLITE_OK; 00429 } 00430 00431 #ifndef SQLITE_OMIT_AUTOVACUUM 00432 /* 00433 ** Given a page number of a regular database page, return the page 00434 ** number for the pointer-map page that contains the entry for the 00435 ** input page number. 00436 */ 00437 static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){ 00438 int nPagesPerMapPage, iPtrMap, ret; 00439 assert( sqlite3_mutex_held(pBt->mutex) ); 00440 nPagesPerMapPage = (pBt->usableSize/5)+1; 00441 iPtrMap = (pgno-2)/nPagesPerMapPage; 00442 ret = (iPtrMap*nPagesPerMapPage) + 2; 00443 if( ret==PENDING_BYTE_PAGE(pBt) ){ 00444 ret++; 00445 } 00446 return ret; 00447 } 00448 00449 /* 00450 ** Write an entry into the pointer map. 00451 ** 00452 ** This routine updates the pointer map entry for page number 'key' 00453 ** so that it maps to type 'eType' and parent page number 'pgno'. 00454 ** An error code is returned if something goes wrong, otherwise SQLITE_OK. 00455 */ 00456 static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){ 00457 DbPage *pDbPage; /* The pointer map page */ 00458 u8 *pPtrmap; /* The pointer map data */ 00459 Pgno iPtrmap; /* The pointer map page number */ 00460 int offset; /* Offset in pointer map page */ 00461 int rc; 00462 00463 assert( sqlite3_mutex_held(pBt->mutex) ); 00464 /* The master-journal page number must never be used as a pointer map page */ 00465 assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); 00466 00467 assert( pBt->autoVacuum ); 00468 if( key==0 ){ 00469 return SQLITE_CORRUPT_BKPT; 00470 } 00471 iPtrmap = PTRMAP_PAGENO(pBt, key); 00472 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); 00473 if( rc!=SQLITE_OK ){ 00474 return rc; 00475 } 00476 offset = PTRMAP_PTROFFSET(iPtrmap, key); 00477 pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); 00478 00479 if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ 00480 TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); 00481 rc = sqlite3PagerWrite(pDbPage); 00482 if( rc==SQLITE_OK ){ 00483 pPtrmap[offset] = eType; 00484 put4byte(&pPtrmap[offset+1], parent); 00485 } 00486 } 00487 00488 sqlite3PagerUnref(pDbPage); 00489 return rc; 00490 } 00491 00492 /* 00493 ** Read an entry from the pointer map. 00494 ** 00495 ** This routine retrieves the pointer map entry for page 'key', writing 00496 ** the type and parent page number to *pEType and *pPgno respectively. 00497 ** An error code is returned if something goes wrong, otherwise SQLITE_OK. 00498 */ 00499 static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ 00500 DbPage *pDbPage; /* The pointer map page */ 00501 int iPtrmap; /* Pointer map page index */ 00502 u8 *pPtrmap; /* Pointer map page data */ 00503 int offset; /* Offset of entry in pointer map */ 00504 int rc; 00505 00506 assert( sqlite3_mutex_held(pBt->mutex) ); 00507 00508 iPtrmap = PTRMAP_PAGENO(pBt, key); 00509 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); 00510 if( rc!=0 ){ 00511 return rc; 00512 } 00513 pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); 00514 00515 offset = PTRMAP_PTROFFSET(iPtrmap, key); 00516 assert( pEType!=0 ); 00517 *pEType = pPtrmap[offset]; 00518 if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); 00519 00520 sqlite3PagerUnref(pDbPage); 00521 if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT; 00522 return SQLITE_OK; 00523 } 00524 00525 #else /* if defined SQLITE_OMIT_AUTOVACUUM */ 00526 #define ptrmapPut(w,x,y,z) SQLITE_OK 00527 #define ptrmapGet(w,x,y,z) SQLITE_OK 00528 #define ptrmapPutOvfl(y,z) SQLITE_OK 00529 #endif 00530 00531 /* 00532 ** Given a btree page and a cell index (0 means the first cell on 00533 ** the page, 1 means the second cell, and so forth) return a pointer 00534 ** to the cell content. 00535 ** 00536 ** This routine works only for pages that do not contain overflow cells. 00537 */ 00538 #define findCell(P,I) \ 00539 ((P)->aData + ((P)->maskPage & get2byte(&(P)->aData[(P)->cellOffset+2*(I)]))) 00540 00541 /* 00542 ** This a more complex version of findCell() that works for 00543 ** pages that do contain overflow cells. See insert 00544 */ 00545 static u8 *findOverflowCell(MemPage *pPage, int iCell){ 00546 int i; 00547 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00548 for(i=pPage->nOverflow-1; i>=0; i--){ 00549 int k; 00550 struct _OvflCell *pOvfl; 00551 pOvfl = &pPage->aOvfl[i]; 00552 k = pOvfl->idx; 00553 if( k<=iCell ){ 00554 if( k==iCell ){ 00555 return pOvfl->pCell; 00556 } 00557 iCell--; 00558 } 00559 } 00560 return findCell(pPage, iCell); 00561 } 00562 00563 /* 00564 ** Parse a cell content block and fill in the CellInfo structure. There 00565 ** are two versions of this function. sqlite3BtreeParseCell() takes a 00566 ** cell index as the second argument and sqlite3BtreeParseCellPtr() 00567 ** takes a pointer to the body of the cell as its second argument. 00568 ** 00569 ** Within this file, the parseCell() macro can be called instead of 00570 ** sqlite3BtreeParseCellPtr(). Using some compilers, this will be faster. 00571 */ 00572 void sqlite3BtreeParseCellPtr( 00573 MemPage *pPage, /* Page containing the cell */ 00574 u8 *pCell, /* Pointer to the cell text. */ 00575 CellInfo *pInfo /* Fill in this structure */ 00576 ){ 00577 int n; /* Number bytes in cell content header */ 00578 u32 nPayload; /* Number of bytes of cell payload */ 00579 00580 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00581 00582 pInfo->pCell = pCell; 00583 assert( pPage->leaf==0 || pPage->leaf==1 ); 00584 n = pPage->childPtrSize; 00585 assert( n==4-4*pPage->leaf ); 00586 if( pPage->intKey ){ 00587 if( pPage->hasData ){ 00588 n += getVarint32(&pCell[n], nPayload); 00589 }else{ 00590 nPayload = 0; 00591 } 00592 n += getVarint(&pCell[n], (u64*)&pInfo->nKey); 00593 pInfo->nData = nPayload; 00594 }else{ 00595 pInfo->nData = 0; 00596 n += getVarint32(&pCell[n], nPayload); 00597 pInfo->nKey = nPayload; 00598 } 00599 pInfo->nPayload = nPayload; 00600 pInfo->nHeader = n; 00601 if( likely(nPayload<=pPage->maxLocal) ){ 00602 /* This is the (easy) common case where the entire payload fits 00603 ** on the local page. No overflow is required. 00604 */ 00605 int nSize; /* Total size of cell content in bytes */ 00606 nSize = nPayload + n; 00607 pInfo->nLocal = nPayload; 00608 pInfo->iOverflow = 0; 00609 if( (nSize & ~3)==0 ){ 00610 nSize = 4; /* Minimum cell size is 4 */ 00611 } 00612 pInfo->nSize = nSize; 00613 }else{ 00614 /* If the payload will not fit completely on the local page, we have 00615 ** to decide how much to store locally and how much to spill onto 00616 ** overflow pages. The strategy is to minimize the amount of unused 00617 ** space on overflow pages while keeping the amount of local storage 00618 ** in between minLocal and maxLocal. 00619 ** 00620 ** Warning: changing the way overflow payload is distributed in any 00621 ** way will result in an incompatible file format. 00622 */ 00623 int minLocal; /* Minimum amount of payload held locally */ 00624 int maxLocal; /* Maximum amount of payload held locally */ 00625 int surplus; /* Overflow payload available for local storage */ 00626 00627 minLocal = pPage->minLocal; 00628 maxLocal = pPage->maxLocal; 00629 surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4); 00630 if( surplus <= maxLocal ){ 00631 pInfo->nLocal = surplus; 00632 }else{ 00633 pInfo->nLocal = minLocal; 00634 } 00635 pInfo->iOverflow = pInfo->nLocal + n; 00636 pInfo->nSize = pInfo->iOverflow + 4; 00637 } 00638 } 00639 #define parseCell(pPage, iCell, pInfo) \ 00640 sqlite3BtreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo)) 00641 void sqlite3BtreeParseCell( 00642 MemPage *pPage, /* Page containing the cell */ 00643 int iCell, /* The cell index. First cell is 0 */ 00644 CellInfo *pInfo /* Fill in this structure */ 00645 ){ 00646 parseCell(pPage, iCell, pInfo); 00647 } 00648 00649 /* 00650 ** Compute the total number of bytes that a Cell needs in the cell 00651 ** data area of the btree-page. The return number includes the cell 00652 ** data header and the local payload, but not any overflow page or 00653 ** the space used by the cell pointer. 00654 */ 00655 #ifndef NDEBUG 00656 static u16 cellSize(MemPage *pPage, int iCell){ 00657 CellInfo info; 00658 sqlite3BtreeParseCell(pPage, iCell, &info); 00659 return info.nSize; 00660 } 00661 #endif 00662 static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ 00663 CellInfo info; 00664 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 00665 return info.nSize; 00666 } 00667 00668 #ifndef SQLITE_OMIT_AUTOVACUUM 00669 /* 00670 ** If the cell pCell, part of page pPage contains a pointer 00671 ** to an overflow page, insert an entry into the pointer-map 00672 ** for the overflow page. 00673 */ 00674 static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){ 00675 CellInfo info; 00676 assert( pCell!=0 ); 00677 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 00678 assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload ); 00679 if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){ 00680 Pgno ovfl = get4byte(&pCell[info.iOverflow]); 00681 return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno); 00682 } 00683 return SQLITE_OK; 00684 } 00685 /* 00686 ** If the cell with index iCell on page pPage contains a pointer 00687 ** to an overflow page, insert an entry into the pointer-map 00688 ** for the overflow page. 00689 */ 00690 static int ptrmapPutOvfl(MemPage *pPage, int iCell){ 00691 u8 *pCell; 00692 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00693 pCell = findOverflowCell(pPage, iCell); 00694 return ptrmapPutOvflPtr(pPage, pCell); 00695 } 00696 #endif 00697 00698 00699 /* 00700 ** Defragment the page given. All Cells are moved to the 00701 ** end of the page and all free space is collected into one 00702 ** big FreeBlk that occurs in between the header and cell 00703 ** pointer array and the cell content area. 00704 */ 00705 static int defragmentPage(MemPage *pPage){ 00706 int i; /* Loop counter */ 00707 int pc; /* Address of a i-th cell */ 00708 int addr; /* Offset of first byte after cell pointer array */ 00709 int hdr; /* Offset to the page header */ 00710 int size; /* Size of a cell */ 00711 int usableSize; /* Number of usable bytes on a page */ 00712 int cellOffset; /* Offset to the cell pointer array */ 00713 int cbrk; /* Offset to the cell content area */ 00714 int nCell; /* Number of cells on the page */ 00715 unsigned char *data; /* The page data */ 00716 unsigned char *temp; /* Temp area for cell content */ 00717 00718 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 00719 assert( pPage->pBt!=0 ); 00720 assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); 00721 assert( pPage->nOverflow==0 ); 00722 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00723 temp = sqlite3PagerTempSpace(pPage->pBt->pPager); 00724 data = pPage->aData; 00725 hdr = pPage->hdrOffset; 00726 cellOffset = pPage->cellOffset; 00727 nCell = pPage->nCell; 00728 assert( nCell==get2byte(&data[hdr+3]) ); 00729 usableSize = pPage->pBt->usableSize; 00730 cbrk = get2byte(&data[hdr+5]); 00731 memcpy(&temp[cbrk], &data[cbrk], usableSize - cbrk); 00732 cbrk = usableSize; 00733 for(i=0; i<nCell; i++){ 00734 u8 *pAddr; /* The i-th cell pointer */ 00735 pAddr = &data[cellOffset + i*2]; 00736 pc = get2byte(pAddr); 00737 if (pc >= pPage->pBt->usableSize) { 00738 return SQLITE_CORRUPT_BKPT; 00739 } 00740 size = cellSizePtr(pPage, &temp[pc]); 00741 cbrk -= size; 00742 if ((cbrk < cellOffset+2*nCell) || (cbrk+size>pPage->pBt->usableSize)) { 00743 return SQLITE_CORRUPT_BKPT; 00744 } 00745 memcpy(&data[cbrk], &temp[pc], size); 00746 put2byte(pAddr, cbrk); 00747 } 00748 assert( cbrk>=cellOffset+2*nCell ); 00749 put2byte(&data[hdr+5], cbrk); 00750 data[hdr+1] = 0; 00751 data[hdr+2] = 0; 00752 data[hdr+7] = 0; 00753 addr = cellOffset+2*nCell; 00754 memset(&data[addr], 0, cbrk-addr); 00755 if( cbrk-addr!=pPage->nFree ){ 00756 return SQLITE_CORRUPT_BKPT; 00757 } 00758 return SQLITE_OK; 00759 } 00760 00761 /* 00762 ** Allocate nByte bytes of space on a page. 00763 ** 00764 ** Return the index into pPage->aData[] of the first byte of 00765 ** the new allocation. The caller guarantees that there is enough 00766 ** space. This routine will never fail. 00767 ** 00768 ** If the page contains nBytes of free space but does not contain 00769 ** nBytes of contiguous free space, then this routine automatically 00770 ** calls defragementPage() to consolidate all free space before 00771 ** allocating the new chunk. 00772 */ 00773 static int allocateSpace(MemPage *pPage, int nByte){ 00774 int addr, pc, hdr; 00775 int size; 00776 int nFrag; 00777 int top; 00778 int nCell; 00779 int cellOffset; 00780 unsigned char *data; 00781 00782 data = pPage->aData; 00783 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 00784 assert( pPage->pBt ); 00785 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00786 assert( nByte>=0 ); /* Minimum cell size is 4 */ 00787 assert( pPage->nFree>=nByte ); 00788 assert( pPage->nOverflow==0 ); 00789 pPage->nFree -= nByte; 00790 hdr = pPage->hdrOffset; 00791 00792 nFrag = data[hdr+7]; 00793 if( nFrag<60 ){ 00794 /* Search the freelist looking for a slot big enough to satisfy the 00795 ** space request. */ 00796 addr = hdr+1; 00797 while( (pc = get2byte(&data[addr]))>0 ){ 00798 size = get2byte(&data[pc+2]); 00799 if( size>=nByte ){ 00800 if( size<nByte+4 ){ 00801 memcpy(&data[addr], &data[pc], 2); 00802 data[hdr+7] = nFrag + size - nByte; 00803 return pc; 00804 }else{ 00805 put2byte(&data[pc+2], size-nByte); 00806 return pc + size - nByte; 00807 } 00808 } 00809 addr = pc; 00810 } 00811 } 00812 00813 /* Allocate memory from the gap in between the cell pointer array 00814 ** and the cell content area. 00815 */ 00816 top = get2byte(&data[hdr+5]); 00817 nCell = get2byte(&data[hdr+3]); 00818 cellOffset = pPage->cellOffset; 00819 if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){ 00820 defragmentPage(pPage); 00821 top = get2byte(&data[hdr+5]); 00822 } 00823 top -= nByte; 00824 assert( cellOffset + 2*nCell <= top ); 00825 put2byte(&data[hdr+5], top); 00826 return top; 00827 } 00828 00829 /* 00830 ** Return a section of the pPage->aData to the freelist. 00831 ** The first byte of the new free block is pPage->aDisk[start] 00832 ** and the size of the block is "size" bytes. 00833 ** 00834 ** Most of the effort here is involved in coalesing adjacent 00835 ** free blocks into a single big free block. 00836 */ 00837 static void freeSpace(MemPage *pPage, int start, int size){ 00838 int addr, pbegin, hdr; 00839 unsigned char *data = pPage->aData; 00840 00841 assert( pPage->pBt!=0 ); 00842 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 00843 assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) ); 00844 assert( (start + size)<=pPage->pBt->usableSize ); 00845 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00846 assert( size>=0 ); /* Minimum cell size is 4 */ 00847 00848 #ifdef SQLITE_SECURE_DELETE 00849 /* Overwrite deleted information with zeros when the SECURE_DELETE 00850 ** option is enabled at compile-time */ 00851 memset(&data[start], 0, size); 00852 #endif 00853 00854 /* Add the space back into the linked list of freeblocks */ 00855 hdr = pPage->hdrOffset; 00856 addr = hdr + 1; 00857 while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){ 00858 assert( pbegin<=pPage->pBt->usableSize-4 ); 00859 assert( pbegin>addr ); 00860 addr = pbegin; 00861 } 00862 assert( pbegin<=pPage->pBt->usableSize-4 ); 00863 assert( pbegin>addr || pbegin==0 ); 00864 put2byte(&data[addr], start); 00865 put2byte(&data[start], pbegin); 00866 put2byte(&data[start+2], size); 00867 pPage->nFree += size; 00868 00869 /* Coalesce adjacent free blocks */ 00870 addr = pPage->hdrOffset + 1; 00871 while( (pbegin = get2byte(&data[addr]))>0 ){ 00872 int pnext, psize; 00873 assert( pbegin>addr ); 00874 assert( pbegin<=pPage->pBt->usableSize-4 ); 00875 pnext = get2byte(&data[pbegin]); 00876 psize = get2byte(&data[pbegin+2]); 00877 if( pbegin + psize + 3 >= pnext && pnext>0 ){ 00878 int frag = pnext - (pbegin+psize); 00879 assert( frag<=data[pPage->hdrOffset+7] ); 00880 data[pPage->hdrOffset+7] -= frag; 00881 put2byte(&data[pbegin], get2byte(&data[pnext])); 00882 put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin); 00883 }else{ 00884 addr = pbegin; 00885 } 00886 } 00887 00888 /* If the cell content area begins with a freeblock, remove it. */ 00889 if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){ 00890 int top; 00891 pbegin = get2byte(&data[hdr+1]); 00892 memcpy(&data[hdr+1], &data[pbegin], 2); 00893 top = get2byte(&data[hdr+5]); 00894 put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2])); 00895 } 00896 } 00897 00898 /* 00899 ** Decode the flags byte (the first byte of the header) for a page 00900 ** and initialize fields of the MemPage structure accordingly. 00901 ** 00902 ** Only the following combinations are supported. Anything different 00903 ** indicates a corrupt database files: 00904 ** 00905 ** PTF_ZERODATA 00906 ** PTF_ZERODATA | PTF_LEAF 00907 ** PTF_LEAFDATA | PTF_INTKEY 00908 ** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF 00909 */ 00910 static int decodeFlags(MemPage *pPage, int flagByte){ 00911 BtShared *pBt; /* A copy of pPage->pBt */ 00912 00913 assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); 00914 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00915 pPage->leaf = flagByte>>3; assert( PTF_LEAF == 1<<3 ); 00916 flagByte &= ~PTF_LEAF; 00917 pPage->childPtrSize = 4-4*pPage->leaf; 00918 pBt = pPage->pBt; 00919 if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ 00920 pPage->intKey = 1; 00921 pPage->hasData = pPage->leaf; 00922 pPage->maxLocal = pBt->maxLeaf; 00923 pPage->minLocal = pBt->minLeaf; 00924 }else if( flagByte==PTF_ZERODATA ){ 00925 pPage->intKey = 0; 00926 pPage->hasData = 0; 00927 pPage->maxLocal = pBt->maxLocal; 00928 pPage->minLocal = pBt->minLocal; 00929 }else{ 00930 return SQLITE_CORRUPT_BKPT; 00931 } 00932 return SQLITE_OK; 00933 } 00934 00935 /* 00936 ** Initialize the auxiliary information for a disk block. 00937 ** 00938 ** Return SQLITE_OK on success. If we see that the page does 00939 ** not contain a well-formed database page, then return 00940 ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not 00941 ** guarantee that the page is well-formed. It only shows that 00942 ** we failed to detect any corruption. 00943 */ 00944 int sqlite3BtreeInitPage(MemPage *pPage){ 00945 00946 assert( pPage->pBt!=0 ); 00947 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 00948 assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); 00949 assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); 00950 assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); 00951 00952 if( !pPage->isInit ){ 00953 int pc; /* Address of a freeblock within pPage->aData[] */ 00954 int hdr; /* Offset to beginning of page header */ 00955 u8 *data; /* Equal to pPage->aData */ 00956 BtShared *pBt; /* The main btree structure */ 00957 int usableSize; /* Amount of usable space on each page */ 00958 int cellOffset; /* Offset from start of page to first cell pointer */ 00959 int nFree; /* Number of unused bytes on the page */ 00960 int top; /* First byte of the cell content area */ 00961 00962 pBt = pPage->pBt; 00963 00964 hdr = pPage->hdrOffset; 00965 data = pPage->aData; 00966 if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT; 00967 assert( pBt->pageSize>=512 && pBt->pageSize<=32768 ); 00968 pPage->maskPage = pBt->pageSize - 1; 00969 pPage->nOverflow = 0; 00970 usableSize = pBt->usableSize; 00971 pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf; 00972 top = get2byte(&data[hdr+5]); 00973 pPage->nCell = get2byte(&data[hdr+3]); 00974 if( pPage->nCell>MX_CELL(pBt) ){ 00975 /* To many cells for a single page. The page must be corrupt */ 00976 return SQLITE_CORRUPT_BKPT; 00977 } 00978 00979 /* Compute the total free space on the page */ 00980 pc = get2byte(&data[hdr+1]); 00981 nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell); 00982 while( pc>0 ){ 00983 int next, size; 00984 if( pc>usableSize-4 ){ 00985 /* Free block is off the page */ 00986 return SQLITE_CORRUPT_BKPT; 00987 } 00988 next = get2byte(&data[pc]); 00989 size = get2byte(&data[pc+2]); 00990 if( next>0 && next<=pc+size+3 ){ 00991 /* Free blocks must be in accending order */ 00992 return SQLITE_CORRUPT_BKPT; 00993 } 00994 nFree += size; 00995 pc = next; 00996 } 00997 pPage->nFree = nFree; 00998 if( nFree>=usableSize ){ 00999 /* Free space cannot exceed total page size */ 01000 return SQLITE_CORRUPT_BKPT; 01001 } 01002 01003 #if 0 01004 /* Check that all the offsets in the cell offset array are within range. 01005 ** 01006 ** Omitting this consistency check and using the pPage->maskPage mask 01007 ** to prevent overrunning the page buffer in findCell() results in a 01008 ** 2.5% performance gain. 01009 */ 01010 { 01011 u8 *pOff; /* Iterator used to check all cell offsets are in range */ 01012 u8 *pEnd; /* Pointer to end of cell offset array */ 01013 u8 mask; /* Mask of bits that must be zero in MSB of cell offsets */ 01014 mask = ~(((u8)(pBt->pageSize>>8))-1); 01015 pEnd = &data[cellOffset + pPage->nCell*2]; 01016 for(pOff=&data[cellOffset]; pOff!=pEnd && !((*pOff)&mask); pOff+=2); 01017 if( pOff!=pEnd ){ 01018 return SQLITE_CORRUPT_BKPT; 01019 } 01020 } 01021 #endif 01022 01023 pPage->isInit = 1; 01024 } 01025 return SQLITE_OK; 01026 } 01027 01028 /* 01029 ** Set up a raw page so that it looks like a database page holding 01030 ** no entries. 01031 */ 01032 static void zeroPage(MemPage *pPage, int flags){ 01033 unsigned char *data = pPage->aData; 01034 BtShared *pBt = pPage->pBt; 01035 int hdr = pPage->hdrOffset; 01036 int first; 01037 01038 assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno ); 01039 assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); 01040 assert( sqlite3PagerGetData(pPage->pDbPage) == data ); 01041 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 01042 assert( sqlite3_mutex_held(pBt->mutex) ); 01043 /*memset(&data[hdr], 0, pBt->usableSize - hdr);*/ 01044 data[hdr] = flags; 01045 first = hdr + 8 + 4*((flags&PTF_LEAF)==0); 01046 memset(&data[hdr+1], 0, 4); 01047 data[hdr+7] = 0; 01048 put2byte(&data[hdr+5], pBt->usableSize); 01049 pPage->nFree = pBt->usableSize - first; 01050 decodeFlags(pPage, flags); 01051 pPage->hdrOffset = hdr; 01052 pPage->cellOffset = first; 01053 pPage->nOverflow = 0; 01054 assert( pBt->pageSize>=512 && pBt->pageSize<=32768 ); 01055 pPage->maskPage = pBt->pageSize - 1; 01056 pPage->nCell = 0; 01057 pPage->isInit = 1; 01058 } 01059 01060 01061 /* 01062 ** Convert a DbPage obtained from the pager into a MemPage used by 01063 ** the btree layer. 01064 */ 01065 static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ 01066 MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); 01067 pPage->aData = sqlite3PagerGetData(pDbPage); 01068 pPage->pDbPage = pDbPage; 01069 pPage->pBt = pBt; 01070 pPage->pgno = pgno; 01071 pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; 01072 return pPage; 01073 } 01074 01075 /* 01076 ** Get a page from the pager. Initialize the MemPage.pBt and 01077 ** MemPage.aData elements if needed. 01078 ** 01079 ** If the noContent flag is set, it means that we do not care about 01080 ** the content of the page at this time. So do not go to the disk 01081 ** to fetch the content. Just fill in the content with zeros for now. 01082 ** If in the future we call sqlite3PagerWrite() on this page, that 01083 ** means we have started to be concerned about content and the disk 01084 ** read should occur at that point. 01085 */ 01086 int sqlite3BtreeGetPage( 01087 BtShared *pBt, /* The btree */ 01088 Pgno pgno, /* Number of the page to fetch */ 01089 MemPage **ppPage, /* Return the page in this parameter */ 01090 int noContent /* Do not load page content if true */ 01091 ){ 01092 int rc; 01093 DbPage *pDbPage; 01094 01095 assert( sqlite3_mutex_held(pBt->mutex) ); 01096 rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent); 01097 if( rc ) return rc; 01098 *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); 01099 return SQLITE_OK; 01100 } 01101 01102 /* 01103 ** Return the size of the database file in pages. Or return -1 if 01104 ** there is any kind of error. 01105 */ 01106 static int pagerPagecount(Pager *pPager){ 01107 int rc; 01108 int nPage; 01109 rc = sqlite3PagerPagecount(pPager, &nPage); 01110 return (rc==SQLITE_OK?nPage:-1); 01111 } 01112 01113 /* 01114 ** Get a page from the pager and initialize it. This routine 01115 ** is just a convenience wrapper around separate calls to 01116 ** sqlite3BtreeGetPage() and sqlite3BtreeInitPage(). 01117 */ 01118 static int getAndInitPage( 01119 BtShared *pBt, /* The database file */ 01120 Pgno pgno, /* Number of the page to get */ 01121 MemPage **ppPage /* Write the page pointer here */ 01122 ){ 01123 int rc; 01124 DbPage *pDbPage; 01125 MemPage *pPage; 01126 01127 assert( sqlite3_mutex_held(pBt->mutex) ); 01128 if( pgno==0 ){ 01129 return SQLITE_CORRUPT_BKPT; 01130 } 01131 01132 /* It is often the case that the page we want is already in cache. 01133 ** If so, get it directly. This saves us from having to call 01134 ** pagerPagecount() to make sure pgno is within limits, which results 01135 ** in a measureable performance improvements. 01136 */ 01137 pDbPage = sqlite3PagerLookup(pBt->pPager, pgno); 01138 if( pDbPage ){ 01139 /* Page is already in cache */ 01140 *ppPage = pPage = btreePageFromDbPage(pDbPage, pgno, pBt); 01141 rc = SQLITE_OK; 01142 }else{ 01143 /* Page not in cache. Acquire it. */ 01144 if( pgno>pagerPagecount(pBt->pPager) ){ 01145 return SQLITE_CORRUPT_BKPT; 01146 } 01147 rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0); 01148 if( rc ) return rc; 01149 pPage = *ppPage; 01150 } 01151 if( !pPage->isInit ){ 01152 rc = sqlite3BtreeInitPage(pPage); 01153 } 01154 if( rc!=SQLITE_OK ){ 01155 releasePage(pPage); 01156 *ppPage = 0; 01157 } 01158 return rc; 01159 } 01160 01161 /* 01162 ** Release a MemPage. This should be called once for each prior 01163 ** call to sqlite3BtreeGetPage. 01164 */ 01165 static void releasePage(MemPage *pPage){ 01166 if( pPage ){ 01167 assert( pPage->aData ); 01168 assert( pPage->pBt ); 01169 assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); 01170 assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); 01171 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 01172 sqlite3PagerUnref(pPage->pDbPage); 01173 } 01174 } 01175 01176 /* 01177 ** During a rollback, when the pager reloads information into the cache 01178 ** so that the cache is restored to its original state at the start of 01179 ** the transaction, for each page restored this routine is called. 01180 ** 01181 ** This routine needs to reset the extra data section at the end of the 01182 ** page to agree with the restored data. 01183 */ 01184 static void pageReinit(DbPage *pData){ 01185 MemPage *pPage; 01186 pPage = (MemPage *)sqlite3PagerGetExtra(pData); 01187 if( pPage->isInit ){ 01188 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 01189 pPage->isInit = 0; 01190 if( sqlite3PagerPageRefcount(pData)>0 ){ 01191 sqlite3BtreeInitPage(pPage); 01192 } 01193 } 01194 } 01195 01196 /* 01197 ** Invoke the busy handler for a btree. 01198 */ 01199 static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){ 01200 BtShared *pBt = (BtShared*)pArg; 01201 assert( pBt->db ); 01202 assert( sqlite3_mutex_held(pBt->db->mutex) ); 01203 return sqlite3InvokeBusyHandler(&pBt->db->busyHandler); 01204 } 01205 01206 /* 01207 ** Open a database file. 01208 ** 01209 ** zFilename is the name of the database file. If zFilename is NULL 01210 ** a new database with a random name is created. This randomly named 01211 ** database file will be deleted when sqlite3BtreeClose() is called. 01212 ** If zFilename is ":memory:" then an in-memory database is created 01213 ** that is automatically destroyed when it is closed. 01214 */ 01215 int sqlite3BtreeOpen( 01216 const char *zFilename, /* Name of the file containing the BTree database */ 01217 sqlite3 *db, /* Associated database handle */ 01218 Btree **ppBtree, /* Pointer to new Btree object written here */ 01219 int flags, /* Options */ 01220 int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ 01221 ){ 01222 sqlite3_vfs *pVfs; /* The VFS to use for this btree */ 01223 BtShared *pBt = 0; /* Shared part of btree structure */ 01224 Btree *p; /* Handle to return */ 01225 int rc = SQLITE_OK; 01226 int nReserve; 01227 unsigned char zDbHeader[100]; 01228 01229 /* Set the variable isMemdb to true for an in-memory database, or 01230 ** false for a file-based database. This symbol is only required if 01231 ** either of the shared-data or autovacuum features are compiled 01232 ** into the library. 01233 */ 01234 #if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM) 01235 #ifdef SQLITE_OMIT_MEMORYDB 01236 const int isMemdb = 0; 01237 #else 01238 const int isMemdb = zFilename && !strcmp(zFilename, ":memory:"); 01239 #endif 01240 #endif 01241 01242 assert( db!=0 ); 01243 assert( sqlite3_mutex_held(db->mutex) ); 01244 01245 pVfs = db->pVfs; 01246 p = sqlite3MallocZero(sizeof(Btree)); 01247 if( !p ){ 01248 return SQLITE_NOMEM; 01249 } 01250 p->inTrans = TRANS_NONE; 01251 p->db = db; 01252 01253 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 01254 /* 01255 ** If this Btree is a candidate for shared cache, try to find an 01256 ** existing BtShared object that we can share with 01257 */ 01258 if( isMemdb==0 01259 && (db->flags & SQLITE_Vtab)==0 01260 && zFilename && zFilename[0] 01261 ){ 01262 if( sqlite3GlobalConfig.sharedCacheEnabled ){ 01263 int nFullPathname = pVfs->mxPathname+1; 01264 char *zFullPathname = sqlite3Malloc(nFullPathname); 01265 sqlite3_mutex *mutexShared; 01266 p->sharable = 1; 01267 db->flags |= SQLITE_SharedCache; 01268 if( !zFullPathname ){ 01269 sqlite3_free(p); 01270 return SQLITE_NOMEM; 01271 } 01272 sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname); 01273 mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); 01274 sqlite3_mutex_enter(mutexShared); 01275 for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){ 01276 assert( pBt->nRef>0 ); 01277 if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager)) 01278 && sqlite3PagerVfs(pBt->pPager)==pVfs ){ 01279 p->pBt = pBt; 01280 pBt->nRef++; 01281 break; 01282 } 01283 } 01284 sqlite3_mutex_leave(mutexShared); 01285 sqlite3_free(zFullPathname); 01286 } 01287 #ifdef SQLITE_DEBUG 01288 else{ 01289 /* In debug mode, we mark all persistent databases as sharable 01290 ** even when they are not. This exercises the locking code and 01291 ** gives more opportunity for asserts(sqlite3_mutex_held()) 01292 ** statements to find locking problems. 01293 */ 01294 p->sharable = 1; 01295 } 01296 #endif 01297 } 01298 #endif 01299 if( pBt==0 ){ 01300 /* 01301 ** The following asserts make sure that structures used by the btree are 01302 ** the right size. This is to guard against size changes that result 01303 ** when compiling on a different architecture. 01304 */ 01305 assert( sizeof(i64)==8 || sizeof(i64)==4 ); 01306 assert( sizeof(u64)==8 || sizeof(u64)==4 ); 01307 assert( sizeof(u32)==4 ); 01308 assert( sizeof(u16)==2 ); 01309 assert( sizeof(Pgno)==4 ); 01310 01311 pBt = sqlite3MallocZero( sizeof(*pBt) ); 01312 if( pBt==0 ){ 01313 rc = SQLITE_NOMEM; 01314 goto btree_open_out; 01315 } 01316 pBt->busyHdr.xFunc = sqlite3BtreeInvokeBusyHandler; 01317 pBt->busyHdr.pArg = pBt; 01318 rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, 01319 EXTRA_SIZE, flags, vfsFlags); 01320 if( rc==SQLITE_OK ){ 01321 rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); 01322 } 01323 if( rc!=SQLITE_OK ){ 01324 goto btree_open_out; 01325 } 01326 sqlite3PagerSetBusyhandler(pBt->pPager, &pBt->busyHdr); 01327 p->pBt = pBt; 01328 01329 sqlite3PagerSetReiniter(pBt->pPager, pageReinit); 01330 pBt->pCursor = 0; 01331 pBt->pPage1 = 0; 01332 pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager); 01333 pBt->pageSize = get2byte(&zDbHeader[16]); 01334 if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE 01335 || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ 01336 pBt->pageSize = 0; 01337 sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); 01338 #ifndef SQLITE_OMIT_AUTOVACUUM 01339 /* If the magic name ":memory:" will create an in-memory database, then 01340 ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if 01341 ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if 01342 ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a 01343 ** regular file-name. In this case the auto-vacuum applies as per normal. 01344 */ 01345 if( zFilename && !isMemdb ){ 01346 pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); 01347 pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); 01348 } 01349 #endif 01350 nReserve = 0; 01351 }else{ 01352 nReserve = zDbHeader[20]; 01353 pBt->pageSizeFixed = 1; 01354 #ifndef SQLITE_OMIT_AUTOVACUUM 01355 pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); 01356 pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); 01357 #endif 01358 } 01359 pBt->usableSize = pBt->pageSize - nReserve; 01360 assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ 01361 sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); 01362 01363 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 01364 /* Add the new BtShared object to the linked list sharable BtShareds. 01365 */ 01366 if( p->sharable ){ 01367 sqlite3_mutex *mutexShared; 01368 pBt->nRef = 1; 01369 mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); 01370 if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ 01371 pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); 01372 if( pBt->mutex==0 ){ 01373 rc = SQLITE_NOMEM; 01374 db->mallocFailed = 0; 01375 goto btree_open_out; 01376 } 01377 } 01378 sqlite3_mutex_enter(mutexShared); 01379 pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList); 01380 GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt; 01381 sqlite3_mutex_leave(mutexShared); 01382 } 01383 #endif 01384 } 01385 01386 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 01387 /* If the new Btree uses a sharable pBtShared, then link the new 01388 ** Btree into the list of all sharable Btrees for the same connection. 01389 ** The list is kept in ascending order by pBt address. 01390 */ 01391 if( p->sharable ){ 01392 int i; 01393 Btree *pSib; 01394 for(i=0; i<db->nDb; i++){ 01395 if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){ 01396 while( pSib->pPrev ){ pSib = pSib->pPrev; } 01397 if( p->pBt<pSib->pBt ){ 01398 p->pNext = pSib; 01399 p->pPrev = 0; 01400 pSib->pPrev = p; 01401 }else{ 01402 while( pSib->pNext && pSib->pNext->pBt<p->pBt ){ 01403 pSib = pSib->pNext; 01404 } 01405 p->pNext = pSib->pNext; 01406 p->pPrev = pSib; 01407 if( p->pNext ){ 01408 p->pNext->pPrev = p; 01409 } 01410 pSib->pNext = p; 01411 } 01412 break; 01413 } 01414 } 01415 } 01416 #endif 01417 *ppBtree = p; 01418 01419 btree_open_out: 01420 if( rc!=SQLITE_OK ){ 01421 if( pBt && pBt->pPager ){ 01422 sqlite3PagerClose(pBt->pPager); 01423 } 01424 sqlite3_free(pBt); 01425 sqlite3_free(p); 01426 *ppBtree = 0; 01427 } 01428 return rc; 01429 } 01430 01431 /* 01432 ** Decrement the BtShared.nRef counter. When it reaches zero, 01433 ** remove the BtShared structure from the sharing list. Return 01434 ** true if the BtShared.nRef counter reaches zero and return 01435 ** false if it is still positive. 01436 */ 01437 static int removeFromSharingList(BtShared *pBt){ 01438 #ifndef SQLITE_OMIT_SHARED_CACHE 01439 sqlite3_mutex *pMaster; 01440 BtShared *pList; 01441 int removed = 0; 01442 01443 assert( sqlite3_mutex_notheld(pBt->mutex) ); 01444 pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); 01445 sqlite3_mutex_enter(pMaster); 01446 pBt->nRef--; 01447 if( pBt->nRef<=0 ){ 01448 if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){ 01449 GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext; 01450 }else{ 01451 pList = GLOBAL(BtShared*,sqlite3SharedCacheList); 01452 while( ALWAYS(pList) && pList->pNext!=pBt ){ 01453 pList=pList->pNext; 01454 } 01455 if( ALWAYS(pList) ){ 01456 pList->pNext = pBt->pNext; 01457 } 01458 } 01459 if( SQLITE_THREADSAFE ){ 01460 sqlite3_mutex_free(pBt->mutex); 01461 } 01462 removed = 1; 01463 } 01464 sqlite3_mutex_leave(pMaster); 01465 return removed; 01466 #else 01467 return 1; 01468 #endif 01469 } 01470 01471 /* 01472 ** Make sure pBt->pTmpSpace points to an allocation of 01473 ** MX_CELL_SIZE(pBt) bytes. 01474 */ 01475 static void allocateTempSpace(BtShared *pBt){ 01476 if( !pBt->pTmpSpace ){ 01477 pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); 01478 } 01479 } 01480 01481 /* 01482 ** Free the pBt->pTmpSpace allocation 01483 */ 01484 static void freeTempSpace(BtShared *pBt){ 01485 sqlite3PageFree( pBt->pTmpSpace); 01486 pBt->pTmpSpace = 0; 01487 } 01488 01489 /* 01490 ** Close an open database and invalidate all cursors. 01491 */ 01492 int sqlite3BtreeClose(Btree *p){ 01493 BtShared *pBt = p->pBt; 01494 BtCursor *pCur; 01495 01496 /* Close all cursors opened via this handle. */ 01497 assert( sqlite3_mutex_held(p->db->mutex) ); 01498 sqlite3BtreeEnter(p); 01499 pBt->db = p->db; 01500 pCur = pBt->pCursor; 01501 while( pCur ){ 01502 BtCursor *pTmp = pCur; 01503 pCur = pCur->pNext; 01504 if( pTmp->pBtree==p ){ 01505 sqlite3BtreeCloseCursor(pTmp); 01506 } 01507 } 01508 01509 /* Rollback any active transaction and free the handle structure. 01510 ** The call to sqlite3BtreeRollback() drops any table-locks held by 01511 ** this handle. 01512 */ 01513 sqlite3BtreeRollback(p); 01514 sqlite3BtreeLeave(p); 01515 01516 /* If there are still other outstanding references to the shared-btree 01517 ** structure, return now. The remainder of this procedure cleans 01518 ** up the shared-btree. 01519 */ 01520 assert( p->wantToLock==0 && p->locked==0 ); 01521 if( !p->sharable || removeFromSharingList(pBt) ){ 01522 /* The pBt is no longer on the sharing list, so we can access 01523 ** it without having to hold the mutex. 01524 ** 01525 ** Clean out and delete the BtShared object. 01526 */ 01527 assert( !pBt->pCursor ); 01528 sqlite3PagerClose(pBt->pPager); 01529 if( pBt->xFreeSchema && pBt->pSchema ){ 01530 pBt->xFreeSchema(pBt->pSchema); 01531 } 01532 sqlite3_free(pBt->pSchema); 01533 freeTempSpace(pBt); 01534 sqlite3_free(pBt); 01535 } 01536 01537 #ifndef SQLITE_OMIT_SHARED_CACHE 01538 assert( p->wantToLock==0 ); 01539 assert( p->locked==0 ); 01540 if( p->pPrev ) p->pPrev->pNext = p->pNext; 01541 if( p->pNext ) p->pNext->pPrev = p->pPrev; 01542 #endif 01543 01544 sqlite3_free(p); 01545 return SQLITE_OK; 01546 } 01547 01548 /* 01549 ** Change the limit on the number of pages allowed in the cache. 01550 ** 01551 ** The maximum number of cache pages is set to the absolute 01552 ** value of mxPage. If mxPage is negative, the pager will 01553 ** operate asynchronously - it will not stop to do fsync()s 01554 ** to insure data is written to the disk surface before 01555 ** continuing. Transactions still work if synchronous is off, 01556 ** and the database cannot be corrupted if this program 01557 ** crashes. But if the operating system crashes or there is 01558 ** an abrupt power failure when synchronous is off, the database 01559 ** could be left in an inconsistent and unrecoverable state. 01560 ** Synchronous is on by default so database corruption is not 01561 ** normally a worry. 01562 */ 01563 int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ 01564 BtShared *pBt = p->pBt; 01565 assert( sqlite3_mutex_held(p->db->mutex) ); 01566 sqlite3BtreeEnter(p); 01567 sqlite3PagerSetCachesize(pBt->pPager, mxPage); 01568 sqlite3BtreeLeave(p); 01569 return SQLITE_OK; 01570 } 01571 01572 /* 01573 ** Change the way data is synced to disk in order to increase or decrease 01574 ** how well the database resists damage due to OS crashes and power 01575 ** failures. Level 1 is the same as asynchronous (no syncs() occur and 01576 ** there is a high probability of damage) Level 2 is the default. There 01577 ** is a very low but non-zero probability of damage. Level 3 reduces the 01578 ** probability of damage to near zero but with a write performance reduction. 01579 */ 01580 #ifndef SQLITE_OMIT_PAGER_PRAGMAS 01581 int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){ 01582 BtShared *pBt = p->pBt; 01583 assert( sqlite3_mutex_held(p->db->mutex) ); 01584 sqlite3BtreeEnter(p); 01585 sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync); 01586 sqlite3BtreeLeave(p); 01587 return SQLITE_OK; 01588 } 01589 #endif 01590 01591 /* 01592 ** Return TRUE if the given btree is set to safety level 1. In other 01593 ** words, return TRUE if no sync() occurs on the disk files. 01594 */ 01595 int sqlite3BtreeSyncDisabled(Btree *p){ 01596 BtShared *pBt = p->pBt; 01597 int rc; 01598 assert( sqlite3_mutex_held(p->db->mutex) ); 01599 sqlite3BtreeEnter(p); 01600 assert( pBt && pBt->pPager ); 01601 rc = sqlite3PagerNosync(pBt->pPager); 01602 sqlite3BtreeLeave(p); 01603 return rc; 01604 } 01605 01606 #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) 01607 /* 01608 ** Change the default pages size and the number of reserved bytes per page. 01609 ** 01610 ** The page size must be a power of 2 between 512 and 65536. If the page 01611 ** size supplied does not meet this constraint then the page size is not 01612 ** changed. 01613 ** 01614 ** Page sizes are constrained to be a power of two so that the region 01615 ** of the database file used for locking (beginning at PENDING_BYTE, 01616 ** the first byte past the 1GB boundary, 0x40000000) needs to occur 01617 ** at the beginning of a page. 01618 ** 01619 ** If parameter nReserve is less than zero, then the number of reserved 01620 ** bytes per page is left unchanged. 01621 */ 01622 int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){ 01623 int rc = SQLITE_OK; 01624 BtShared *pBt = p->pBt; 01625 sqlite3BtreeEnter(p); 01626 if( pBt->pageSizeFixed ){ 01627 sqlite3BtreeLeave(p); 01628 return SQLITE_READONLY; 01629 } 01630 if( nReserve<0 ){ 01631 nReserve = pBt->pageSize - pBt->usableSize; 01632 } 01633 if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && 01634 ((pageSize-1)&pageSize)==0 ){ 01635 assert( (pageSize & 7)==0 ); 01636 assert( !pBt->pPage1 && !pBt->pCursor ); 01637 pBt->pageSize = pageSize; 01638 freeTempSpace(pBt); 01639 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); 01640 } 01641 pBt->usableSize = pBt->pageSize - nReserve; 01642 sqlite3BtreeLeave(p); 01643 return rc; 01644 } 01645 01646 /* 01647 ** Return the currently defined page size 01648 */ 01649 int sqlite3BtreeGetPageSize(Btree *p){ 01650 return p->pBt->pageSize; 01651 } 01652 int sqlite3BtreeGetReserve(Btree *p){ 01653 int n; 01654 sqlite3BtreeEnter(p); 01655 n = p->pBt->pageSize - p->pBt->usableSize; 01656 sqlite3BtreeLeave(p); 01657 return n; 01658 } 01659 01660 /* 01661 ** Set the maximum page count for a database if mxPage is positive. 01662 ** No changes are made if mxPage is 0 or negative. 01663 ** Regardless of the value of mxPage, return the maximum page count. 01664 */ 01665 int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){ 01666 int n; 01667 sqlite3BtreeEnter(p); 01668 n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); 01669 sqlite3BtreeLeave(p); 01670 return n; 01671 } 01672 #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */ 01673 01674 /* 01675 ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' 01676 ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it 01677 ** is disabled. The default value for the auto-vacuum property is 01678 ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. 01679 */ 01680 int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ 01681 #ifdef SQLITE_OMIT_AUTOVACUUM 01682 return SQLITE_READONLY; 01683 #else 01684 BtShared *pBt = p->pBt; 01685 int rc = SQLITE_OK; 01686 int av = (autoVacuum?1:0); 01687 01688 sqlite3BtreeEnter(p); 01689 if( pBt->pageSizeFixed && av!=pBt->autoVacuum ){ 01690 rc = SQLITE_READONLY; 01691 }else{ 01692 pBt->autoVacuum = av; 01693 } 01694 sqlite3BtreeLeave(p); 01695 return rc; 01696 #endif 01697 } 01698 01699 /* 01700 ** Return the value of the 'auto-vacuum' property. If auto-vacuum is 01701 ** enabled 1 is returned. Otherwise 0. 01702 */ 01703 int sqlite3BtreeGetAutoVacuum(Btree *p){ 01704 #ifdef SQLITE_OMIT_AUTOVACUUM 01705 return BTREE_AUTOVACUUM_NONE; 01706 #else 01707 int rc; 01708 sqlite3BtreeEnter(p); 01709 rc = ( 01710 (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE: 01711 (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL: 01712 BTREE_AUTOVACUUM_INCR 01713 ); 01714 sqlite3BtreeLeave(p); 01715 return rc; 01716 #endif 01717 } 01718 01719 01720 /* 01721 ** Get a reference to pPage1 of the database file. This will 01722 ** also acquire a readlock on that file. 01723 ** 01724 ** SQLITE_OK is returned on success. If the file is not a 01725 ** well-formed database file, then SQLITE_CORRUPT is returned. 01726 ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM 01727 ** is returned if we run out of memory. 01728 */ 01729 static int lockBtree(BtShared *pBt){ 01730 int rc; 01731 MemPage *pPage1; 01732 int nPage; 01733 01734 assert( sqlite3_mutex_held(pBt->mutex) ); 01735 if( pBt->pPage1 ) return SQLITE_OK; 01736 rc = sqlite3BtreeGetPage(pBt, 1, &pPage1, 0); 01737 if( rc!=SQLITE_OK ) return rc; 01738 01739 /* Do some checking to help insure the file we opened really is 01740 ** a valid database file. 01741 */ 01742 rc = sqlite3PagerPagecount(pBt->pPager, &nPage); 01743 if( rc!=SQLITE_OK ){ 01744 goto page1_init_failed; 01745 }else if( nPage>0 ){ 01746 int pageSize; 01747 int usableSize; 01748 u8 *page1 = pPage1->aData; 01749 rc = SQLITE_NOTADB; 01750 if( memcmp(page1, zMagicHeader, 16)!=0 ){ 01751 goto page1_init_failed; 01752 } 01753 if( page1[18]>1 ){ 01754 pBt->readOnly = 1; 01755 } 01756 if( page1[19]>1 ){ 01757 goto page1_init_failed; 01758 } 01759 01760 /* The maximum embedded fraction must be exactly 25%. And the minimum 01761 ** embedded fraction must be 12.5% for both leaf-data and non-leaf-data. 01762 ** The original design allowed these amounts to vary, but as of 01763 ** version 3.6.0, we require them to be fixed. 01764 */ 01765 if( memcmp(&page1[21], "\100\040\040",3)!=0 ){ 01766 goto page1_init_failed; 01767 } 01768 pageSize = get2byte(&page1[16]); 01769 if( ((pageSize-1)&pageSize)!=0 || pageSize<512 || 01770 (SQLITE_MAX_PAGE_SIZE<32768 && pageSize>SQLITE_MAX_PAGE_SIZE) 01771 ){ 01772 goto page1_init_failed; 01773 } 01774 assert( (pageSize & 7)==0 ); 01775 usableSize = pageSize - page1[20]; 01776 if( pageSize!=pBt->pageSize ){ 01777 /* After reading the first page of the database assuming a page size 01778 ** of BtShared.pageSize, we have discovered that the page-size is 01779 ** actually pageSize. Unlock the database, leave pBt->pPage1 at 01780 ** zero and return SQLITE_OK. The caller will call this function 01781 ** again with the correct page-size. 01782 */ 01783 releasePage(pPage1); 01784 pBt->usableSize = usableSize; 01785 pBt->pageSize = pageSize; 01786 freeTempSpace(pBt); 01787 sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); 01788 return SQLITE_OK; 01789 } 01790 if( usableSize<500 ){ 01791 goto page1_init_failed; 01792 } 01793 pBt->pageSize = pageSize; 01794 pBt->usableSize = usableSize; 01795 #ifndef SQLITE_OMIT_AUTOVACUUM 01796 pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); 01797 pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); 01798 #endif 01799 } 01800 01801 /* maxLocal is the maximum amount of payload to store locally for 01802 ** a cell. Make sure it is small enough so that at least minFanout 01803 ** cells can will fit on one page. We assume a 10-byte page header. 01804 ** Besides the payload, the cell must store: 01805 ** 2-byte pointer to the cell 01806 ** 4-byte child pointer 01807 ** 9-byte nKey value 01808 ** 4-byte nData value 01809 ** 4-byte overflow page pointer 01810 ** So a cell consists of a 2-byte poiner, a header which is as much as 01811 ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow 01812 ** page pointer. 01813 */ 01814 pBt->maxLocal = (pBt->usableSize-12)*64/255 - 23; 01815 pBt->minLocal = (pBt->usableSize-12)*32/255 - 23; 01816 pBt->maxLeaf = pBt->usableSize - 35; 01817 pBt->minLeaf = (pBt->usableSize-12)*32/255 - 23; 01818 assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); 01819 pBt->pPage1 = pPage1; 01820 return SQLITE_OK; 01821 01822 page1_init_failed: 01823 releasePage(pPage1); 01824 pBt->pPage1 = 0; 01825 return rc; 01826 } 01827 01828 /* 01829 ** This routine works like lockBtree() except that it also invokes the 01830 ** busy callback if there is lock contention. 01831 */ 01832 static int lockBtreeWithRetry(Btree *pRef){ 01833 int rc = SQLITE_OK; 01834 01835 assert( sqlite3BtreeHoldsMutex(pRef) ); 01836 if( pRef->inTrans==TRANS_NONE ){ 01837 u8 inTransaction = pRef->pBt->inTransaction; 01838 btreeIntegrity(pRef); 01839 rc = sqlite3BtreeBeginTrans(pRef, 0); 01840 pRef->pBt->inTransaction = inTransaction; 01841 pRef->inTrans = TRANS_NONE; 01842 if( rc==SQLITE_OK ){ 01843 pRef->pBt->nTransaction--; 01844 } 01845 btreeIntegrity(pRef); 01846 } 01847 return rc; 01848 } 01849 01850 01851 /* 01852 ** If there are no outstanding cursors and we are not in the middle 01853 ** of a transaction but there is a read lock on the database, then 01854 ** this routine unrefs the first page of the database file which 01855 ** has the effect of releasing the read lock. 01856 ** 01857 ** If there are any outstanding cursors, this routine is a no-op. 01858 ** 01859 ** If there is a transaction in progress, this routine is a no-op. 01860 */ 01861 static void unlockBtreeIfUnused(BtShared *pBt){ 01862 assert( sqlite3_mutex_held(pBt->mutex) ); 01863 if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){ 01864 if( sqlite3PagerRefcount(pBt->pPager)>=1 ){ 01865 assert( pBt->pPage1->aData ); 01866 #if 0 01867 if( pBt->pPage1->aData==0 ){ 01868 MemPage *pPage = pBt->pPage1; 01869 pPage->aData = sqlite3PagerGetData(pPage->pDbPage); 01870 pPage->pBt = pBt; 01871 pPage->pgno = 1; 01872 } 01873 #endif 01874 releasePage(pBt->pPage1); 01875 } 01876 pBt->pPage1 = 0; 01877 pBt->inStmt = 0; 01878 } 01879 } 01880 01881 /* 01882 ** Create a new database by initializing the first page of the 01883 ** file. 01884 */ 01885 static int newDatabase(BtShared *pBt){ 01886 MemPage *pP1; 01887 unsigned char *data; 01888 int rc; 01889 int nPage; 01890 01891 assert( sqlite3_mutex_held(pBt->mutex) ); 01892 rc = sqlite3PagerPagecount(pBt->pPager, &nPage); 01893 if( rc!=SQLITE_OK || nPage>0 ){ 01894 return rc; 01895 } 01896 pP1 = pBt->pPage1; 01897 assert( pP1!=0 ); 01898 data = pP1->aData; 01899 rc = sqlite3PagerWrite(pP1->pDbPage); 01900 if( rc ) return rc; 01901 memcpy(data, zMagicHeader, sizeof(zMagicHeader)); 01902 assert( sizeof(zMagicHeader)==16 ); 01903 put2byte(&data[16], pBt->pageSize); 01904 data[18] = 1; 01905 data[19] = 1; 01906 data[20] = pBt->pageSize - pBt->usableSize; 01907 data[21] = 64; 01908 data[22] = 32; 01909 data[23] = 32; 01910 memset(&data[24], 0, 100-24); 01911 zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); 01912 pBt->pageSizeFixed = 1; 01913 #ifndef SQLITE_OMIT_AUTOVACUUM 01914 assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); 01915 assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); 01916 put4byte(&data[36 + 4*4], pBt->autoVacuum); 01917 put4byte(&data[36 + 7*4], pBt->incrVacuum); 01918 #endif 01919 return SQLITE_OK; 01920 } 01921 01922 /* 01923 ** Attempt to start a new transaction. A write-transaction 01924 ** is started if the second argument is nonzero, otherwise a read- 01925 ** transaction. If the second argument is 2 or more and exclusive 01926 ** transaction is started, meaning that no other process is allowed 01927 ** to access the database. A preexisting transaction may not be 01928 ** upgraded to exclusive by calling this routine a second time - the 01929 ** exclusivity flag only works for a new transaction. 01930 ** 01931 ** A write-transaction must be started before attempting any 01932 ** changes to the database. None of the following routines 01933 ** will work unless a transaction is started first: 01934 ** 01935 ** sqlite3BtreeCreateTable() 01936 ** sqlite3BtreeCreateIndex() 01937 ** sqlite3BtreeClearTable() 01938 ** sqlite3BtreeDropTable() 01939 ** sqlite3BtreeInsert() 01940 ** sqlite3BtreeDelete() 01941 ** sqlite3BtreeUpdateMeta() 01942 ** 01943 ** If an initial attempt to acquire the lock fails because of lock contention 01944 ** and the database was previously unlocked, then invoke the busy handler 01945 ** if there is one. But if there was previously a read-lock, do not 01946 ** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is 01947 ** returned when there is already a read-lock in order to avoid a deadlock. 01948 ** 01949 ** Suppose there are two processes A and B. A has a read lock and B has 01950 ** a reserved lock. B tries to promote to exclusive but is blocked because 01951 ** of A's read lock. A tries to promote to reserved but is blocked by B. 01952 ** One or the other of the two processes must give way or there can be 01953 ** no progress. By returning SQLITE_BUSY and not invoking the busy callback 01954 ** when A already has a read lock, we encourage A to give up and let B 01955 ** proceed. 01956 */ 01957 int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ 01958 BtShared *pBt = p->pBt; 01959 int rc = SQLITE_OK; 01960 01961 sqlite3BtreeEnter(p); 01962 pBt->db = p->db; 01963 btreeIntegrity(p); 01964 01965 /* If the btree is already in a write-transaction, or it 01966 ** is already in a read-transaction and a read-transaction 01967 ** is requested, this is a no-op. 01968 */ 01969 if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ 01970 goto trans_begun; 01971 } 01972 01973 /* Write transactions are not possible on a read-only database */ 01974 if( pBt->readOnly && wrflag ){ 01975 rc = SQLITE_READONLY; 01976 goto trans_begun; 01977 } 01978 01979 /* If another database handle has already opened a write transaction 01980 ** on this shared-btree structure and a second write transaction is 01981 ** requested, return SQLITE_BUSY. 01982 */ 01983 if( pBt->inTransaction==TRANS_WRITE && wrflag ){ 01984 rc = SQLITE_BUSY; 01985 goto trans_begun; 01986 } 01987 01988 #ifndef SQLITE_OMIT_SHARED_CACHE 01989 if( wrflag>1 ){ 01990 BtLock *pIter; 01991 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ 01992 if( pIter->pBtree!=p ){ 01993 rc = SQLITE_BUSY; 01994 goto trans_begun; 01995 } 01996 } 01997 } 01998 #endif 01999 02000 do { 02001 if( pBt->pPage1==0 ){ 02002 do{ 02003 rc = lockBtree(pBt); 02004 }while( pBt->pPage1==0 && rc==SQLITE_OK ); 02005 } 02006 02007 if( rc==SQLITE_OK && wrflag ){ 02008 if( pBt->readOnly ){ 02009 rc = SQLITE_READONLY; 02010 }else{ 02011 rc = sqlite3PagerBegin(pBt->pPage1->pDbPage, wrflag>1); 02012 if( rc==SQLITE_OK ){ 02013 rc = newDatabase(pBt); 02014 } 02015 } 02016 } 02017 02018 if( rc==SQLITE_OK ){ 02019 if( wrflag ) pBt->inStmt = 0; 02020 }else{ 02021 unlockBtreeIfUnused(pBt); 02022 } 02023 }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && 02024 sqlite3BtreeInvokeBusyHandler(pBt, 0) ); 02025 02026 if( rc==SQLITE_OK ){ 02027 if( p->inTrans==TRANS_NONE ){ 02028 pBt->nTransaction++; 02029 } 02030 p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); 02031 if( p->inTrans>pBt->inTransaction ){ 02032 pBt->inTransaction = p->inTrans; 02033 } 02034 #ifndef SQLITE_OMIT_SHARED_CACHE 02035 if( wrflag>1 ){ 02036 assert( !pBt->pExclusive ); 02037 pBt->pExclusive = p; 02038 } 02039 #endif 02040 } 02041 02042 02043 trans_begun: 02044 btreeIntegrity(p); 02045 sqlite3BtreeLeave(p); 02046 return rc; 02047 } 02048 02049 #ifndef SQLITE_OMIT_AUTOVACUUM 02050 02051 /* 02052 ** Set the pointer-map entries for all children of page pPage. Also, if 02053 ** pPage contains cells that point to overflow pages, set the pointer 02054 ** map entries for the overflow pages as well. 02055 */ 02056 static int setChildPtrmaps(MemPage *pPage){ 02057 int i; /* Counter variable */ 02058 int nCell; /* Number of cells in page pPage */ 02059 int rc; /* Return code */ 02060 BtShared *pBt = pPage->pBt; 02061 int isInitOrig = pPage->isInit; 02062 Pgno pgno = pPage->pgno; 02063 02064 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 02065 rc = sqlite3BtreeInitPage(pPage); 02066 if( rc!=SQLITE_OK ){ 02067 goto set_child_ptrmaps_out; 02068 } 02069 nCell = pPage->nCell; 02070 02071 for(i=0; i<nCell; i++){ 02072 u8 *pCell = findCell(pPage, i); 02073 02074 rc = ptrmapPutOvflPtr(pPage, pCell); 02075 if( rc!=SQLITE_OK ){ 02076 goto set_child_ptrmaps_out; 02077 } 02078 02079 if( !pPage->leaf ){ 02080 Pgno childPgno = get4byte(pCell); 02081 rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno); 02082 if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out; 02083 } 02084 } 02085 02086 if( !pPage->leaf ){ 02087 Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); 02088 rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno); 02089 } 02090 02091 set_child_ptrmaps_out: 02092 pPage->isInit = isInitOrig; 02093 return rc; 02094 } 02095 02096 /* 02097 ** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow 02098 ** page, is a pointer to page iFrom. Modify this pointer so that it points to 02099 ** iTo. Parameter eType describes the type of pointer to be modified, as 02100 ** follows: 02101 ** 02102 ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child 02103 ** page of pPage. 02104 ** 02105 ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow 02106 ** page pointed to by one of the cells on pPage. 02107 ** 02108 ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next 02109 ** overflow page in the list. 02110 */ 02111 static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ 02112 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 02113 if( eType==PTRMAP_OVERFLOW2 ){ 02114 /* The pointer is always the first 4 bytes of the page in this case. */ 02115 if( get4byte(pPage->aData)!=iFrom ){ 02116 return SQLITE_CORRUPT_BKPT; 02117 } 02118 put4byte(pPage->aData, iTo); 02119 }else{ 02120 int isInitOrig = pPage->isInit; 02121 int i; 02122 int nCell; 02123 02124 sqlite3BtreeInitPage(pPage); 02125 nCell = pPage->nCell; 02126 02127 for(i=0; i<nCell; i++){ 02128 u8 *pCell = findCell(pPage, i); 02129 if( eType==PTRMAP_OVERFLOW1 ){ 02130 CellInfo info; 02131 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 02132 if( info.iOverflow ){ 02133 if( iFrom==get4byte(&pCell[info.iOverflow]) ){ 02134 put4byte(&pCell[info.iOverflow], iTo); 02135 break; 02136 } 02137 } 02138 }else{ 02139 if( get4byte(pCell)==iFrom ){ 02140 put4byte(pCell, iTo); 02141 break; 02142 } 02143 } 02144 } 02145 02146 if( i==nCell ){ 02147 if( eType!=PTRMAP_BTREE || 02148 get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ 02149 return SQLITE_CORRUPT_BKPT; 02150 } 02151 put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); 02152 } 02153 02154 pPage->isInit = isInitOrig; 02155 } 02156 return SQLITE_OK; 02157 } 02158 02159 02160 /* 02161 ** Move the open database page pDbPage to location iFreePage in the 02162 ** database. The pDbPage reference remains valid. 02163 */ 02164 static int relocatePage( 02165 BtShared *pBt, /* Btree */ 02166 MemPage *pDbPage, /* Open page to move */ 02167 u8 eType, /* Pointer map 'type' entry for pDbPage */ 02168 Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */ 02169 Pgno iFreePage, /* The location to move pDbPage to */ 02170 int isCommit 02171 ){ 02172 MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */ 02173 Pgno iDbPage = pDbPage->pgno; 02174 Pager *pPager = pBt->pPager; 02175 int rc; 02176 02177 assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || 02178 eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ); 02179 assert( sqlite3_mutex_held(pBt->mutex) ); 02180 assert( pDbPage->pBt==pBt ); 02181 02182 /* Move page iDbPage from its current location to page number iFreePage */ 02183 TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", 02184 iDbPage, iFreePage, iPtrPage, eType)); 02185 rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit); 02186 if( rc!=SQLITE_OK ){ 02187 return rc; 02188 } 02189 pDbPage->pgno = iFreePage; 02190 02191 /* If pDbPage was a btree-page, then it may have child pages and/or cells 02192 ** that point to overflow pages. The pointer map entries for all these 02193 ** pages need to be changed. 02194 ** 02195 ** If pDbPage is an overflow page, then the first 4 bytes may store a 02196 ** pointer to a subsequent overflow page. If this is the case, then 02197 ** the pointer map needs to be updated for the subsequent overflow page. 02198 */ 02199 if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){ 02200 rc = setChildPtrmaps(pDbPage); 02201 if( rc!=SQLITE_OK ){ 02202 return rc; 02203 } 02204 }else{ 02205 Pgno nextOvfl = get4byte(pDbPage->aData); 02206 if( nextOvfl!=0 ){ 02207 rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage); 02208 if( rc!=SQLITE_OK ){ 02209 return rc; 02210 } 02211 } 02212 } 02213 02214 /* Fix the database pointer on page iPtrPage that pointed at iDbPage so 02215 ** that it points at iFreePage. Also fix the pointer map entry for 02216 ** iPtrPage. 02217 */ 02218 if( eType!=PTRMAP_ROOTPAGE ){ 02219 rc = sqlite3BtreeGetPage(pBt, iPtrPage, &pPtrPage, 0); 02220 if( rc!=SQLITE_OK ){ 02221 return rc; 02222 } 02223 rc = sqlite3PagerWrite(pPtrPage->pDbPage); 02224 if( rc!=SQLITE_OK ){ 02225 releasePage(pPtrPage); 02226 return rc; 02227 } 02228 rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); 02229 releasePage(pPtrPage); 02230 if( rc==SQLITE_OK ){ 02231 rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage); 02232 } 02233 } 02234 return rc; 02235 } 02236 02237 /* Forward declaration required by incrVacuumStep(). */ 02238 static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); 02239 02240 /* 02241 ** Perform a single step of an incremental-vacuum. If successful, 02242 ** return SQLITE_OK. If there is no work to do (and therefore no 02243 ** point in calling this function again), return SQLITE_DONE. 02244 ** 02245 ** More specificly, this function attempts to re-organize the 02246 ** database so that the last page of the file currently in use 02247 ** is no longer in use. 02248 ** 02249 ** If the nFin parameter is non-zero, the implementation assumes 02250 ** that the caller will keep calling incrVacuumStep() until 02251 ** it returns SQLITE_DONE or an error, and that nFin is the 02252 ** number of pages the database file will contain after this 02253 ** process is complete. 02254 */ 02255 static int incrVacuumStep(BtShared *pBt, Pgno nFin){ 02256 Pgno iLastPg; /* Last page in the database */ 02257 Pgno nFreeList; /* Number of pages still on the free-list */ 02258 02259 assert( sqlite3_mutex_held(pBt->mutex) ); 02260 iLastPg = pBt->nTrunc; 02261 if( iLastPg==0 ){ 02262 iLastPg = pagerPagecount(pBt->pPager); 02263 } 02264 02265 if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ 02266 int rc; 02267 u8 eType; 02268 Pgno iPtrPage; 02269 02270 nFreeList = get4byte(&pBt->pPage1->aData[36]); 02271 if( nFreeList==0 || nFin==iLastPg ){ 02272 return SQLITE_DONE; 02273 } 02274 02275 rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage); 02276 if( rc!=SQLITE_OK ){ 02277 return rc; 02278 } 02279 if( eType==PTRMAP_ROOTPAGE ){ 02280 return SQLITE_CORRUPT_BKPT; 02281 } 02282 02283 if( eType==PTRMAP_FREEPAGE ){ 02284 if( nFin==0 ){ 02285 /* Remove the page from the files free-list. This is not required 02286 ** if nFin is non-zero. In that case, the free-list will be 02287 ** truncated to zero after this function returns, so it doesn't 02288 ** matter if it still contains some garbage entries. 02289 */ 02290 Pgno iFreePg; 02291 MemPage *pFreePg; 02292 rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1); 02293 if( rc!=SQLITE_OK ){ 02294 return rc; 02295 } 02296 assert( iFreePg==iLastPg ); 02297 releasePage(pFreePg); 02298 } 02299 } else { 02300 Pgno iFreePg; /* Index of free page to move pLastPg to */ 02301 MemPage *pLastPg; 02302 02303 rc = sqlite3BtreeGetPage(pBt, iLastPg, &pLastPg, 0); 02304 if( rc!=SQLITE_OK ){ 02305 return rc; 02306 } 02307 02308 /* If nFin is zero, this loop runs exactly once and page pLastPg 02309 ** is swapped with the first free page pulled off the free list. 02310 ** 02311 ** On the other hand, if nFin is greater than zero, then keep 02312 ** looping until a free-page located within the first nFin pages 02313 ** of the file is found. 02314 */ 02315 do { 02316 MemPage *pFreePg; 02317 rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0); 02318 if( rc!=SQLITE_OK ){ 02319 releasePage(pLastPg); 02320 return rc; 02321 } 02322 releasePage(pFreePg); 02323 }while( nFin!=0 && iFreePg>nFin ); 02324 assert( iFreePg<iLastPg ); 02325 02326 rc = sqlite3PagerWrite(pLastPg->pDbPage); 02327 if( rc==SQLITE_OK ){ 02328 rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0); 02329 } 02330 releasePage(pLastPg); 02331 if( rc!=SQLITE_OK ){ 02332 return rc; 02333 } 02334 } 02335 } 02336 02337 pBt->nTrunc = iLastPg - 1; 02338 while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){ 02339 pBt->nTrunc--; 02340 } 02341 return SQLITE_OK; 02342 } 02343 02344 /* 02345 ** A write-transaction must be opened before calling this function. 02346 ** It performs a single unit of work towards an incremental vacuum. 02347 ** 02348 ** If the incremental vacuum is finished after this function has run, 02349 ** SQLITE_DONE is returned. If it is not finished, but no error occured, 02350 ** SQLITE_OK is returned. Otherwise an SQLite error code. 02351 */ 02352 int sqlite3BtreeIncrVacuum(Btree *p){ 02353 int rc; 02354 BtShared *pBt = p->pBt; 02355 02356 sqlite3BtreeEnter(p); 02357 pBt->db = p->db; 02358 assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); 02359 if( !pBt->autoVacuum ){ 02360 rc = SQLITE_DONE; 02361 }else{ 02362 invalidateAllOverflowCache(pBt); 02363 rc = incrVacuumStep(pBt, 0); 02364 } 02365 sqlite3BtreeLeave(p); 02366 return rc; 02367 } 02368 02369 /* 02370 ** This routine is called prior to sqlite3PagerCommit when a transaction 02371 ** is commited for an auto-vacuum database. 02372 ** 02373 ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages 02374 ** the database file should be truncated to during the commit process. 02375 ** i.e. the database has been reorganized so that only the first *pnTrunc 02376 ** pages are in use. 02377 */ 02378 static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){ 02379 int rc = SQLITE_OK; 02380 Pager *pPager = pBt->pPager; 02381 VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager) ); 02382 02383 assert( sqlite3_mutex_held(pBt->mutex) ); 02384 invalidateAllOverflowCache(pBt); 02385 assert(pBt->autoVacuum); 02386 if( !pBt->incrVacuum ){ 02387 Pgno nFin = 0; 02388 02389 if( pBt->nTrunc==0 ){ 02390 Pgno nFree; 02391 Pgno nPtrmap; 02392 const int pgsz = pBt->pageSize; 02393 int nOrig = pagerPagecount(pBt->pPager); 02394 02395 if( PTRMAP_ISPAGE(pBt, nOrig) ){ 02396 return SQLITE_CORRUPT_BKPT; 02397 } 02398 if( nOrig==PENDING_BYTE_PAGE(pBt) ){ 02399 nOrig--; 02400 } 02401 nFree = get4byte(&pBt->pPage1->aData[36]); 02402 nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5); 02403 nFin = nOrig - nFree - nPtrmap; 02404 if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){ 02405 nFin--; 02406 } 02407 while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){ 02408 nFin--; 02409 } 02410 } 02411 02412 while( rc==SQLITE_OK ){ 02413 rc = incrVacuumStep(pBt, nFin); 02414 } 02415 if( rc==SQLITE_DONE ){ 02416 assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc); 02417 rc = SQLITE_OK; 02418 if( pBt->nTrunc && nFin ){ 02419 rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); 02420 put4byte(&pBt->pPage1->aData[32], 0); 02421 put4byte(&pBt->pPage1->aData[36], 0); 02422 pBt->nTrunc = nFin; 02423 } 02424 } 02425 if( rc!=SQLITE_OK ){ 02426 sqlite3PagerRollback(pPager); 02427 } 02428 } 02429 02430 if( rc==SQLITE_OK ){ 02431 *pnTrunc = pBt->nTrunc; 02432 pBt->nTrunc = 0; 02433 } 02434 assert( nRef==sqlite3PagerRefcount(pPager) ); 02435 return rc; 02436 } 02437 02438 #endif /* ifndef SQLITE_OMIT_AUTOVACUUM */ 02439 02440 /* 02441 ** This routine does the first phase of a two-phase commit. This routine 02442 ** causes a rollback journal to be created (if it does not already exist) 02443 ** and populated with enough information so that if a power loss occurs 02444 ** the database can be restored to its original state by playing back 02445 ** the journal. Then the contents of the journal are flushed out to 02446 ** the disk. After the journal is safely on oxide, the changes to the 02447 ** database are written into the database file and flushed to oxide. 02448 ** At the end of this call, the rollback journal still exists on the 02449 ** disk and we are still holding all locks, so the transaction has not 02450 ** committed. See sqlite3BtreeCommit() for the second phase of the 02451 ** commit process. 02452 ** 02453 ** This call is a no-op if no write-transaction is currently active on pBt. 02454 ** 02455 ** Otherwise, sync the database file for the btree pBt. zMaster points to 02456 ** the name of a master journal file that should be written into the 02457 ** individual journal file, or is NULL, indicating no master journal file 02458 ** (single database transaction). 02459 ** 02460 ** When this is called, the master journal should already have been 02461 ** created, populated with this journal pointer and synced to disk. 02462 ** 02463 ** Once this is routine has returned, the only thing required to commit 02464 ** the write-transaction for this database file is to delete the journal. 02465 */ 02466 int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ 02467 int rc = SQLITE_OK; 02468 if( p->inTrans==TRANS_WRITE ){ 02469 BtShared *pBt = p->pBt; 02470 Pgno nTrunc = 0; 02471 sqlite3BtreeEnter(p); 02472 pBt->db = p->db; 02473 #ifndef SQLITE_OMIT_AUTOVACUUM 02474 if( pBt->autoVacuum ){ 02475 rc = autoVacuumCommit(pBt, &nTrunc); 02476 if( rc!=SQLITE_OK ){ 02477 sqlite3BtreeLeave(p); 02478 return rc; 02479 } 02480 } 02481 #endif 02482 rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc, 0); 02483 sqlite3BtreeLeave(p); 02484 } 02485 return rc; 02486 } 02487 02488 /* 02489 ** Commit the transaction currently in progress. 02490 ** 02491 ** This routine implements the second phase of a 2-phase commit. The 02492 ** sqlite3BtreeSync() routine does the first phase and should be invoked 02493 ** prior to calling this routine. The sqlite3BtreeSync() routine did 02494 ** all the work of writing information out to disk and flushing the 02495 ** contents so that they are written onto the disk platter. All this 02496 ** routine has to do is delete or truncate the rollback journal 02497 ** (which causes the transaction to commit) and drop locks. 02498 ** 02499 ** This will release the write lock on the database file. If there 02500 ** are no active cursors, it also releases the read lock. 02501 */ 02502 int sqlite3BtreeCommitPhaseTwo(Btree *p){ 02503 BtShared *pBt = p->pBt; 02504 02505 sqlite3BtreeEnter(p); 02506 pBt->db = p->db; 02507 btreeIntegrity(p); 02508 02509 /* If the handle has a write-transaction open, commit the shared-btrees 02510 ** transaction and set the shared state to TRANS_READ. 02511 */ 02512 if( p->inTrans==TRANS_WRITE ){ 02513 int rc; 02514 assert( pBt->inTransaction==TRANS_WRITE ); 02515 assert( pBt->nTransaction>0 ); 02516 rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); 02517 if( rc!=SQLITE_OK ){ 02518 sqlite3BtreeLeave(p); 02519 return rc; 02520 } 02521 pBt->inTransaction = TRANS_READ; 02522 pBt->inStmt = 0; 02523 } 02524 unlockAllTables(p); 02525 02526 /* If the handle has any kind of transaction open, decrement the transaction 02527 ** count of the shared btree. If the transaction count reaches 0, set 02528 ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below 02529 ** will unlock the pager. 02530 */ 02531 if( p->inTrans!=TRANS_NONE ){ 02532 pBt->nTransaction--; 02533 if( 0==pBt->nTransaction ){ 02534 pBt->inTransaction = TRANS_NONE; 02535 } 02536 } 02537 02538 /* Set the handles current transaction state to TRANS_NONE and unlock 02539 ** the pager if this call closed the only read or write transaction. 02540 */ 02541 p->inTrans = TRANS_NONE; 02542 unlockBtreeIfUnused(pBt); 02543 02544 btreeIntegrity(p); 02545 sqlite3BtreeLeave(p); 02546 return SQLITE_OK; 02547 } 02548 02549 /* 02550 ** Do both phases of a commit. 02551 */ 02552 int sqlite3BtreeCommit(Btree *p){ 02553 int rc; 02554 sqlite3BtreeEnter(p); 02555 rc = sqlite3BtreeCommitPhaseOne(p, 0); 02556 if( rc==SQLITE_OK ){ 02557 rc = sqlite3BtreeCommitPhaseTwo(p); 02558 } 02559 sqlite3BtreeLeave(p); 02560 return rc; 02561 } 02562 02563 #ifndef NDEBUG 02564 /* 02565 ** Return the number of write-cursors open on this handle. This is for use 02566 ** in assert() expressions, so it is only compiled if NDEBUG is not 02567 ** defined. 02568 ** 02569 ** For the purposes of this routine, a write-cursor is any cursor that 02570 ** is capable of writing to the databse. That means the cursor was 02571 ** originally opened for writing and the cursor has not be disabled 02572 ** by having its state changed to CURSOR_FAULT. 02573 */ 02574 static int countWriteCursors(BtShared *pBt){ 02575 BtCursor *pCur; 02576 int r = 0; 02577 for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ 02578 if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++; 02579 } 02580 return r; 02581 } 02582 #endif 02583 02584 /* 02585 ** This routine sets the state to CURSOR_FAULT and the error 02586 ** code to errCode for every cursor on BtShared that pBtree 02587 ** references. 02588 ** 02589 ** Every cursor is tripped, including cursors that belong 02590 ** to other database connections that happen to be sharing 02591 ** the cache with pBtree. 02592 ** 02593 ** This routine gets called when a rollback occurs. 02594 ** All cursors using the same cache must be tripped 02595 ** to prevent them from trying to use the btree after 02596 ** the rollback. The rollback may have deleted tables 02597 ** or moved root pages, so it is not sufficient to 02598 ** save the state of the cursor. The cursor must be 02599 ** invalidated. 02600 */ 02601 void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){ 02602 BtCursor *p; 02603 sqlite3BtreeEnter(pBtree); 02604 for(p=pBtree->pBt->pCursor; p; p=p->pNext){ 02605 sqlite3BtreeClearCursor(p); 02606 p->eState = CURSOR_FAULT; 02607 p->skip = errCode; 02608 } 02609 sqlite3BtreeLeave(pBtree); 02610 } 02611 02612 /* 02613 ** Rollback the transaction in progress. All cursors will be 02614 ** invalided by this operation. Any attempt to use a cursor 02615 ** that was open at the beginning of this operation will result 02616 ** in an error. 02617 ** 02618 ** This will release the write lock on the database file. If there 02619 ** are no active cursors, it also releases the read lock. 02620 */ 02621 int sqlite3BtreeRollback(Btree *p){ 02622 int rc; 02623 BtShared *pBt = p->pBt; 02624 MemPage *pPage1; 02625 02626 sqlite3BtreeEnter(p); 02627 pBt->db = p->db; 02628 rc = saveAllCursors(pBt, 0, 0); 02629 #ifndef SQLITE_OMIT_SHARED_CACHE 02630 if( rc!=SQLITE_OK ){ 02631 /* This is a horrible situation. An IO or malloc() error occured whilst 02632 ** trying to save cursor positions. If this is an automatic rollback (as 02633 ** the result of a constraint, malloc() failure or IO error) then 02634 ** the cache may be internally inconsistent (not contain valid trees) so 02635 ** we cannot simply return the error to the caller. Instead, abort 02636 ** all queries that may be using any of the cursors that failed to save. 02637 */ 02638 sqlite3BtreeTripAllCursors(p, rc); 02639 } 02640 #endif 02641 btreeIntegrity(p); 02642 unlockAllTables(p); 02643 02644 if( p->inTrans==TRANS_WRITE ){ 02645 int rc2; 02646 02647 #ifndef SQLITE_OMIT_AUTOVACUUM 02648 pBt->nTrunc = 0; 02649 #endif 02650 02651 assert( TRANS_WRITE==pBt->inTransaction ); 02652 rc2 = sqlite3PagerRollback(pBt->pPager); 02653 if( rc2!=SQLITE_OK ){ 02654 rc = rc2; 02655 } 02656 02657 /* The rollback may have destroyed the pPage1->aData value. So 02658 ** call sqlite3BtreeGetPage() on page 1 again to make 02659 ** sure pPage1->aData is set correctly. */ 02660 if( sqlite3BtreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ 02661 releasePage(pPage1); 02662 } 02663 assert( countWriteCursors(pBt)==0 ); 02664 pBt->inTransaction = TRANS_READ; 02665 } 02666 02667 if( p->inTrans!=TRANS_NONE ){ 02668 assert( pBt->nTransaction>0 ); 02669 pBt->nTransaction--; 02670 if( 0==pBt->nTransaction ){ 02671 pBt->inTransaction = TRANS_NONE; 02672 } 02673 } 02674 02675 p->inTrans = TRANS_NONE; 02676 pBt->inStmt = 0; 02677 unlockBtreeIfUnused(pBt); 02678 02679 btreeIntegrity(p); 02680 sqlite3BtreeLeave(p); 02681 return rc; 02682 } 02683 02684 /* 02685 ** Start a statement subtransaction. The subtransaction can 02686 ** can be rolled back independently of the main transaction. 02687 ** You must start a transaction before starting a subtransaction. 02688 ** The subtransaction is ended automatically if the main transaction 02689 ** commits or rolls back. 02690 ** 02691 ** Only one subtransaction may be active at a time. It is an error to try 02692 ** to start a new subtransaction if another subtransaction is already active. 02693 ** 02694 ** Statement subtransactions are used around individual SQL statements 02695 ** that are contained within a BEGIN...COMMIT block. If a constraint 02696 ** error occurs within the statement, the effect of that one statement 02697 ** can be rolled back without having to rollback the entire transaction. 02698 */ 02699 int sqlite3BtreeBeginStmt(Btree *p){ 02700 int rc; 02701 BtShared *pBt = p->pBt; 02702 sqlite3BtreeEnter(p); 02703 pBt->db = p->db; 02704 if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){ 02705 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 02706 }else{ 02707 assert( pBt->inTransaction==TRANS_WRITE ); 02708 rc = pBt->readOnly ? SQLITE_OK : sqlite3PagerStmtBegin(pBt->pPager); 02709 pBt->inStmt = 1; 02710 } 02711 sqlite3BtreeLeave(p); 02712 return rc; 02713 } 02714 02715 02716 /* 02717 ** Commit the statment subtransaction currently in progress. If no 02718 ** subtransaction is active, this is a no-op. 02719 */ 02720 int sqlite3BtreeCommitStmt(Btree *p){ 02721 int rc; 02722 BtShared *pBt = p->pBt; 02723 sqlite3BtreeEnter(p); 02724 pBt->db = p->db; 02725 if( pBt->inStmt && !pBt->readOnly ){ 02726 rc = sqlite3PagerStmtCommit(pBt->pPager); 02727 }else{ 02728 rc = SQLITE_OK; 02729 } 02730 pBt->inStmt = 0; 02731 sqlite3BtreeLeave(p); 02732 return rc; 02733 } 02734 02735 /* 02736 ** Rollback the active statement subtransaction. If no subtransaction 02737 ** is active this routine is a no-op. 02738 ** 02739 ** All cursors will be invalidated by this operation. Any attempt 02740 ** to use a cursor that was open at the beginning of this operation 02741 ** will result in an error. 02742 */ 02743 int sqlite3BtreeRollbackStmt(Btree *p){ 02744 int rc = SQLITE_OK; 02745 BtShared *pBt = p->pBt; 02746 sqlite3BtreeEnter(p); 02747 pBt->db = p->db; 02748 if( pBt->inStmt && !pBt->readOnly ){ 02749 rc = sqlite3PagerStmtRollback(pBt->pPager); 02750 pBt->inStmt = 0; 02751 } 02752 sqlite3BtreeLeave(p); 02753 return rc; 02754 } 02755 02756 /* 02757 ** Create a new cursor for the BTree whose root is on the page 02758 ** iTable. The act of acquiring a cursor gets a read lock on 02759 ** the database file. 02760 ** 02761 ** If wrFlag==0, then the cursor can only be used for reading. 02762 ** If wrFlag==1, then the cursor can be used for reading or for 02763 ** writing if other conditions for writing are also met. These 02764 ** are the conditions that must be met in order for writing to 02765 ** be allowed: 02766 ** 02767 ** 1: The cursor must have been opened with wrFlag==1 02768 ** 02769 ** 2: Other database connections that share the same pager cache 02770 ** but which are not in the READ_UNCOMMITTED state may not have 02771 ** cursors open with wrFlag==0 on the same table. Otherwise 02772 ** the changes made by this write cursor would be visible to 02773 ** the read cursors in the other database connection. 02774 ** 02775 ** 3: The database must be writable (not on read-only media) 02776 ** 02777 ** 4: There must be an active transaction. 02778 ** 02779 ** No checking is done to make sure that page iTable really is the 02780 ** root page of a b-tree. If it is not, then the cursor acquired 02781 ** will not work correctly. 02782 ** 02783 ** It is assumed that the sqlite3BtreeCursorSize() bytes of memory 02784 ** pointed to by pCur have been zeroed by the caller. 02785 */ 02786 static int btreeCursor( 02787 Btree *p, /* The btree */ 02788 int iTable, /* Root page of table to open */ 02789 int wrFlag, /* 1 to write. 0 read-only */ 02790 struct KeyInfo *pKeyInfo, /* First arg to comparison function */ 02791 BtCursor *pCur /* Space for new cursor */ 02792 ){ 02793 int rc; 02794 BtShared *pBt = p->pBt; 02795 02796 assert( sqlite3BtreeHoldsMutex(p) ); 02797 if( wrFlag ){ 02798 if( pBt->readOnly ){ 02799 return SQLITE_READONLY; 02800 } 02801 if( checkReadLocks(p, iTable, 0, 0) ){ 02802 return SQLITE_LOCKED; 02803 } 02804 } 02805 02806 if( pBt->pPage1==0 ){ 02807 rc = lockBtreeWithRetry(p); 02808 if( rc!=SQLITE_OK ){ 02809 return rc; 02810 } 02811 if( pBt->readOnly && wrFlag ){ 02812 return SQLITE_READONLY; 02813 } 02814 } 02815 pCur->pgnoRoot = (Pgno)iTable; 02816 if( iTable==1 && pagerPagecount(pBt->pPager)==0 ){ 02817 rc = SQLITE_EMPTY; 02818 goto create_cursor_exception; 02819 } 02820 rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]); 02821 if( rc!=SQLITE_OK ){ 02822 goto create_cursor_exception; 02823 } 02824 02825 /* Now that no other errors can occur, finish filling in the BtCursor 02826 ** variables, link the cursor into the BtShared list and set *ppCur (the 02827 ** output argument to this function). 02828 */ 02829 pCur->pKeyInfo = pKeyInfo; 02830 pCur->pBtree = p; 02831 pCur->pBt = pBt; 02832 pCur->wrFlag = wrFlag; 02833 pCur->pNext = pBt->pCursor; 02834 if( pCur->pNext ){ 02835 pCur->pNext->pPrev = pCur; 02836 } 02837 pBt->pCursor = pCur; 02838 pCur->eState = CURSOR_INVALID; 02839 02840 return SQLITE_OK; 02841 02842 create_cursor_exception: 02843 releasePage(pCur->apPage[0]); 02844 unlockBtreeIfUnused(pBt); 02845 return rc; 02846 } 02847 int sqlite3BtreeCursor( 02848 Btree *p, /* The btree */ 02849 int iTable, /* Root page of table to open */ 02850 int wrFlag, /* 1 to write. 0 read-only */ 02851 struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ 02852 BtCursor *pCur /* Write new cursor here */ 02853 ){ 02854 int rc; 02855 sqlite3BtreeEnter(p); 02856 p->pBt->db = p->db; 02857 rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); 02858 sqlite3BtreeLeave(p); 02859 return rc; 02860 } 02861 int sqlite3BtreeCursorSize(){ 02862 return sizeof(BtCursor); 02863 } 02864 02865 02866 02867 /* 02868 ** Close a cursor. The read lock on the database file is released 02869 ** when the last cursor is closed. 02870 */ 02871 int sqlite3BtreeCloseCursor(BtCursor *pCur){ 02872 Btree *pBtree = pCur->pBtree; 02873 if( pBtree ){ 02874 int i; 02875 BtShared *pBt = pCur->pBt; 02876 sqlite3BtreeEnter(pBtree); 02877 pBt->db = pBtree->db; 02878 sqlite3BtreeClearCursor(pCur); 02879 if( pCur->pPrev ){ 02880 pCur->pPrev->pNext = pCur->pNext; 02881 }else{ 02882 pBt->pCursor = pCur->pNext; 02883 } 02884 if( pCur->pNext ){ 02885 pCur->pNext->pPrev = pCur->pPrev; 02886 } 02887 for(i=0; i<=pCur->iPage; i++){ 02888 releasePage(pCur->apPage[i]); 02889 } 02890 unlockBtreeIfUnused(pBt); 02891 invalidateOverflowCache(pCur); 02892 /* sqlite3_free(pCur); */ 02893 sqlite3BtreeLeave(pBtree); 02894 } 02895 return SQLITE_OK; 02896 } 02897 02898 /* 02899 ** Make a temporary cursor by filling in the fields of pTempCur. 02900 ** The temporary cursor is not on the cursor list for the Btree. 02901 */ 02902 void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){ 02903 int i; 02904 assert( cursorHoldsMutex(pCur) ); 02905 memcpy(pTempCur, pCur, sizeof(BtCursor)); 02906 pTempCur->pNext = 0; 02907 pTempCur->pPrev = 0; 02908 for(i=0; i<=pTempCur->iPage; i++){ 02909 sqlite3PagerRef(pTempCur->apPage[i]->pDbPage); 02910 } 02911 } 02912 02913 /* 02914 ** Delete a temporary cursor such as was made by the CreateTemporaryCursor() 02915 ** function above. 02916 */ 02917 void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){ 02918 int i; 02919 assert( cursorHoldsMutex(pCur) ); 02920 for(i=0; i<=pCur->iPage; i++){ 02921 sqlite3PagerUnref(pCur->apPage[i]->pDbPage); 02922 } 02923 } 02924 02925 /* 02926 ** Make sure the BtCursor* given in the argument has a valid 02927 ** BtCursor.info structure. If it is not already valid, call 02928 ** sqlite3BtreeParseCell() to fill it in. 02929 ** 02930 ** BtCursor.info is a cache of the information in the current cell. 02931 ** Using this cache reduces the number of calls to sqlite3BtreeParseCell(). 02932 ** 02933 ** 2007-06-25: There is a bug in some versions of MSVC that cause the 02934 ** compiler to crash when getCellInfo() is implemented as a macro. 02935 ** But there is a measureable speed advantage to using the macro on gcc 02936 ** (when less compiler optimizations like -Os or -O0 are used and the 02937 ** compiler is not doing agressive inlining.) So we use a real function 02938 ** for MSVC and a macro for everything else. Ticket #2457. 02939 */ 02940 #ifndef NDEBUG 02941 static void assertCellInfo(BtCursor *pCur){ 02942 CellInfo info; 02943 int iPage = pCur->iPage; 02944 memset(&info, 0, sizeof(info)); 02945 sqlite3BtreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info); 02946 assert( memcmp(&info, &pCur->info, sizeof(info))==0 ); 02947 } 02948 #else 02949 #define assertCellInfo(x) 02950 #endif 02951 #ifdef _MSC_VER 02952 /* Use a real function in MSVC to work around bugs in that compiler. */ 02953 static void getCellInfo(BtCursor *pCur){ 02954 if( pCur->info.nSize==0 ){ 02955 int iPage = pCur->iPage; 02956 sqlite3BtreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); 02957 pCur->validNKey = 1; 02958 }else{ 02959 assertCellInfo(pCur); 02960 } 02961 } 02962 #else /* if not _MSC_VER */ 02963 /* Use a macro in all other compilers so that the function is inlined */ 02964 #define getCellInfo(pCur) \ 02965 if( pCur->info.nSize==0 ){ \ 02966 int iPage = pCur->iPage; \ 02967 sqlite3BtreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \ 02968 pCur->validNKey = 1; \ 02969 }else{ \ 02970 assertCellInfo(pCur); \ 02971 } 02972 #endif /* _MSC_VER */ 02973 02974 /* 02975 ** Set *pSize to the size of the buffer needed to hold the value of 02976 ** the key for the current entry. If the cursor is not pointing 02977 ** to a valid entry, *pSize is set to 0. 02978 ** 02979 ** For a table with the INTKEY flag set, this routine returns the key 02980 ** itself, not the number of bytes in the key. 02981 */ 02982 int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ 02983 int rc; 02984 02985 assert( cursorHoldsMutex(pCur) ); 02986 rc = restoreCursorPosition(pCur); 02987 if( rc==SQLITE_OK ){ 02988 assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID ); 02989 if( pCur->eState==CURSOR_INVALID ){ 02990 *pSize = 0; 02991 }else{ 02992 getCellInfo(pCur); 02993 *pSize = pCur->info.nKey; 02994 } 02995 } 02996 return rc; 02997 } 02998 02999 /* 03000 ** Set *pSize to the number of bytes of data in the entry the 03001 ** cursor currently points to. Always return SQLITE_OK. 03002 ** Failure is not possible. If the cursor is not currently 03003 ** pointing to an entry (which can happen, for example, if 03004 ** the database is empty) then *pSize is set to 0. 03005 */ 03006 int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ 03007 int rc; 03008 03009 assert( cursorHoldsMutex(pCur) ); 03010 rc = restoreCursorPosition(pCur); 03011 if( rc==SQLITE_OK ){ 03012 assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID ); 03013 if( pCur->eState==CURSOR_INVALID ){ 03014 /* Not pointing at a valid entry - set *pSize to 0. */ 03015 *pSize = 0; 03016 }else{ 03017 getCellInfo(pCur); 03018 *pSize = pCur->info.nData; 03019 } 03020 } 03021 return rc; 03022 } 03023 03024 /* 03025 ** Given the page number of an overflow page in the database (parameter 03026 ** ovfl), this function finds the page number of the next page in the 03027 ** linked list of overflow pages. If possible, it uses the auto-vacuum 03028 ** pointer-map data instead of reading the content of page ovfl to do so. 03029 ** 03030 ** If an error occurs an SQLite error code is returned. Otherwise: 03031 ** 03032 ** Unless pPgnoNext is NULL, the page number of the next overflow 03033 ** page in the linked list is written to *pPgnoNext. If page ovfl 03034 ** is the last page in its linked list, *pPgnoNext is set to zero. 03035 ** 03036 ** If ppPage is not NULL, *ppPage is set to the MemPage* handle 03037 ** for page ovfl. The underlying pager page may have been requested 03038 ** with the noContent flag set, so the page data accessable via 03039 ** this handle may not be trusted. 03040 */ 03041 static int getOverflowPage( 03042 BtShared *pBt, 03043 Pgno ovfl, /* Overflow page */ 03044 MemPage **ppPage, /* OUT: MemPage handle */ 03045 Pgno *pPgnoNext /* OUT: Next overflow page number */ 03046 ){ 03047 Pgno next = 0; 03048 int rc; 03049 03050 assert( sqlite3_mutex_held(pBt->mutex) ); 03051 /* One of these must not be NULL. Otherwise, why call this function? */ 03052 assert(ppPage || pPgnoNext); 03053 03054 /* If pPgnoNext is NULL, then this function is being called to obtain 03055 ** a MemPage* reference only. No page-data is required in this case. 03056 */ 03057 if( !pPgnoNext ){ 03058 return sqlite3BtreeGetPage(pBt, ovfl, ppPage, 1); 03059 } 03060 03061 #ifndef SQLITE_OMIT_AUTOVACUUM 03062 /* Try to find the next page in the overflow list using the 03063 ** autovacuum pointer-map pages. Guess that the next page in 03064 ** the overflow list is page number (ovfl+1). If that guess turns 03065 ** out to be wrong, fall back to loading the data of page 03066 ** number ovfl to determine the next page number. 03067 */ 03068 if( pBt->autoVacuum ){ 03069 Pgno pgno; 03070 Pgno iGuess = ovfl+1; 03071 u8 eType; 03072 03073 while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){ 03074 iGuess++; 03075 } 03076 03077 if( iGuess<=pagerPagecount(pBt->pPager) ){ 03078 rc = ptrmapGet(pBt, iGuess, &eType, &pgno); 03079 if( rc!=SQLITE_OK ){ 03080 return rc; 03081 } 03082 if( eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){ 03083 next = iGuess; 03084 } 03085 } 03086 } 03087 #endif 03088 03089 if( next==0 || ppPage ){ 03090 MemPage *pPage = 0; 03091 03092 rc = sqlite3BtreeGetPage(pBt, ovfl, &pPage, next!=0); 03093 assert(rc==SQLITE_OK || pPage==0); 03094 if( next==0 && rc==SQLITE_OK ){ 03095 next = get4byte(pPage->aData); 03096 } 03097 03098 if( ppPage ){ 03099 *ppPage = pPage; 03100 }else{ 03101 releasePage(pPage); 03102 } 03103 } 03104 *pPgnoNext = next; 03105 03106 return rc; 03107 } 03108 03109 /* 03110 ** Copy data from a buffer to a page, or from a page to a buffer. 03111 ** 03112 ** pPayload is a pointer to data stored on database page pDbPage. 03113 ** If argument eOp is false, then nByte bytes of data are copied 03114 ** from pPayload to the buffer pointed at by pBuf. If eOp is true, 03115 ** then sqlite3PagerWrite() is called on pDbPage and nByte bytes 03116 ** of data are copied from the buffer pBuf to pPayload. 03117 ** 03118 ** SQLITE_OK is returned on success, otherwise an error code. 03119 */ 03120 static int copyPayload( 03121 void *pPayload, /* Pointer to page data */ 03122 void *pBuf, /* Pointer to buffer */ 03123 int nByte, /* Number of bytes to copy */ 03124 int eOp, /* 0 -> copy from page, 1 -> copy to page */ 03125 DbPage *pDbPage /* Page containing pPayload */ 03126 ){ 03127 if( eOp ){ 03128 /* Copy data from buffer to page (a write operation) */ 03129 int rc = sqlite3PagerWrite(pDbPage); 03130 if( rc!=SQLITE_OK ){ 03131 return rc; 03132 } 03133 memcpy(pPayload, pBuf, nByte); 03134 }else{ 03135 /* Copy data from page to buffer (a read operation) */ 03136 memcpy(pBuf, pPayload, nByte); 03137 } 03138 return SQLITE_OK; 03139 } 03140 03141 /* 03142 ** This function is used to read or overwrite payload information 03143 ** for the entry that the pCur cursor is pointing to. If the eOp 03144 ** parameter is 0, this is a read operation (data copied into 03145 ** buffer pBuf). If it is non-zero, a write (data copied from 03146 ** buffer pBuf). 03147 ** 03148 ** A total of "amt" bytes are read or written beginning at "offset". 03149 ** Data is read to or from the buffer pBuf. 03150 ** 03151 ** This routine does not make a distinction between key and data. 03152 ** It just reads or writes bytes from the payload area. Data might 03153 ** appear on the main page or be scattered out on multiple overflow 03154 ** pages. 03155 ** 03156 ** If the BtCursor.isIncrblobHandle flag is set, and the current 03157 ** cursor entry uses one or more overflow pages, this function 03158 ** allocates space for and lazily popluates the overflow page-list 03159 ** cache array (BtCursor.aOverflow). Subsequent calls use this 03160 ** cache to make seeking to the supplied offset more efficient. 03161 ** 03162 ** Once an overflow page-list cache has been allocated, it may be 03163 ** invalidated if some other cursor writes to the same table, or if 03164 ** the cursor is moved to a different row. Additionally, in auto-vacuum 03165 ** mode, the following events may invalidate an overflow page-list cache. 03166 ** 03167 ** * An incremental vacuum, 03168 ** * A commit in auto_vacuum="full" mode, 03169 ** * Creating a table (may require moving an overflow page). 03170 */ 03171 static int accessPayload( 03172 BtCursor *pCur, /* Cursor pointing to entry to read from */ 03173 int offset, /* Begin reading this far into payload */ 03174 int amt, /* Read this many bytes */ 03175 unsigned char *pBuf, /* Write the bytes into this buffer */ 03176 int skipKey, /* offset begins at data if this is true */ 03177 int eOp /* zero to read. non-zero to write. */ 03178 ){ 03179 unsigned char *aPayload; 03180 int rc = SQLITE_OK; 03181 u32 nKey; 03182 int iIdx = 0; 03183 MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ 03184 BtShared *pBt; /* Btree this cursor belongs to */ 03185 03186 assert( pPage ); 03187 assert( pCur->eState==CURSOR_VALID ); 03188 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); 03189 assert( offset>=0 ); 03190 assert( cursorHoldsMutex(pCur) ); 03191 03192 getCellInfo(pCur); 03193 aPayload = pCur->info.pCell + pCur->info.nHeader; 03194 nKey = (pPage->intKey ? 0 : pCur->info.nKey); 03195 03196 if( skipKey ){ 03197 offset += nKey; 03198 } 03199 if( offset+amt > nKey+pCur->info.nData ){ 03200 /* Trying to read or write past the end of the data is an error */ 03201 return SQLITE_CORRUPT_BKPT; 03202 } 03203 03204 /* Check if data must be read/written to/from the btree page itself. */ 03205 if( offset<pCur->info.nLocal ){ 03206 int a = amt; 03207 if( a+offset>pCur->info.nLocal ){ 03208 a = pCur->info.nLocal - offset; 03209 } 03210 rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); 03211 offset = 0; 03212 pBuf += a; 03213 amt -= a; 03214 }else{ 03215 offset -= pCur->info.nLocal; 03216 } 03217 03218 pBt = pCur->pBt; 03219 if( rc==SQLITE_OK && amt>0 ){ 03220 const int ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ 03221 Pgno nextPage; 03222 03223 nextPage = get4byte(&aPayload[pCur->info.nLocal]); 03224 03225 #ifndef SQLITE_OMIT_INCRBLOB 03226 /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[] 03227 ** has not been allocated, allocate it now. The array is sized at 03228 ** one entry for each overflow page in the overflow chain. The 03229 ** page number of the first overflow page is stored in aOverflow[0], 03230 ** etc. A value of 0 in the aOverflow[] array means "not yet known" 03231 ** (the cache is lazily populated). 03232 */ 03233 if( pCur->isIncrblobHandle && !pCur->aOverflow ){ 03234 int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; 03235 pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl); 03236 if( nOvfl && !pCur->aOverflow ){ 03237 rc = SQLITE_NOMEM; 03238 } 03239 } 03240 03241 /* If the overflow page-list cache has been allocated and the 03242 ** entry for the first required overflow page is valid, skip 03243 ** directly to it. 03244 */ 03245 if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){ 03246 iIdx = (offset/ovflSize); 03247 nextPage = pCur->aOverflow[iIdx]; 03248 offset = (offset%ovflSize); 03249 } 03250 #endif 03251 03252 for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ 03253 03254 #ifndef SQLITE_OMIT_INCRBLOB 03255 /* If required, populate the overflow page-list cache. */ 03256 if( pCur->aOverflow ){ 03257 assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); 03258 pCur->aOverflow[iIdx] = nextPage; 03259 } 03260 #endif 03261 03262 if( offset>=ovflSize ){ 03263 /* The only reason to read this page is to obtain the page 03264 ** number for the next page in the overflow chain. The page 03265 ** data is not required. So first try to lookup the overflow 03266 ** page-list cache, if any, then fall back to the getOverflowPage() 03267 ** function. 03268 */ 03269 #ifndef SQLITE_OMIT_INCRBLOB 03270 if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){ 03271 nextPage = pCur->aOverflow[iIdx+1]; 03272 } else 03273 #endif 03274 rc = getOverflowPage(pBt, nextPage, 0, &nextPage); 03275 offset -= ovflSize; 03276 }else{ 03277 /* Need to read this page properly. It contains some of the 03278 ** range of data that is being read (eOp==0) or written (eOp!=0). 03279 */ 03280 DbPage *pDbPage; 03281 int a = amt; 03282 rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage); 03283 if( rc==SQLITE_OK ){ 03284 aPayload = sqlite3PagerGetData(pDbPage); 03285 nextPage = get4byte(aPayload); 03286 if( a + offset > ovflSize ){ 03287 a = ovflSize - offset; 03288 } 03289 rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); 03290 sqlite3PagerUnref(pDbPage); 03291 offset = 0; 03292 amt -= a; 03293 pBuf += a; 03294 } 03295 } 03296 } 03297 } 03298 03299 if( rc==SQLITE_OK && amt>0 ){ 03300 return SQLITE_CORRUPT_BKPT; 03301 } 03302 return rc; 03303 } 03304 03305 /* 03306 ** Read part of the key associated with cursor pCur. Exactly 03307 ** "amt" bytes will be transfered into pBuf[]. The transfer 03308 ** begins at "offset". 03309 ** 03310 ** Return SQLITE_OK on success or an error code if anything goes 03311 ** wrong. An error is returned if "offset+amt" is larger than 03312 ** the available payload. 03313 */ 03314 int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ 03315 int rc; 03316 03317 assert( cursorHoldsMutex(pCur) ); 03318 rc = restoreCursorPosition(pCur); 03319 if( rc==SQLITE_OK ){ 03320 assert( pCur->eState==CURSOR_VALID ); 03321 assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); 03322 if( pCur->apPage[0]->intKey ){ 03323 return SQLITE_CORRUPT_BKPT; 03324 } 03325 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 03326 rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0); 03327 } 03328 return rc; 03329 } 03330 03331 /* 03332 ** Read part of the data associated with cursor pCur. Exactly 03333 ** "amt" bytes will be transfered into pBuf[]. The transfer 03334 ** begins at "offset". 03335 ** 03336 ** Return SQLITE_OK on success or an error code if anything goes 03337 ** wrong. An error is returned if "offset+amt" is larger than 03338 ** the available payload. 03339 */ 03340 int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ 03341 int rc; 03342 03343 #ifndef SQLITE_OMIT_INCRBLOB 03344 if ( pCur->eState==CURSOR_INVALID ){ 03345 return SQLITE_ABORT; 03346 } 03347 #endif 03348 03349 assert( cursorHoldsMutex(pCur) ); 03350 rc = restoreCursorPosition(pCur); 03351 if( rc==SQLITE_OK ){ 03352 assert( pCur->eState==CURSOR_VALID ); 03353 assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); 03354 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 03355 rc = accessPayload(pCur, offset, amt, pBuf, 1, 0); 03356 } 03357 return rc; 03358 } 03359 03360 /* 03361 ** Return a pointer to payload information from the entry that the 03362 ** pCur cursor is pointing to. The pointer is to the beginning of 03363 ** the key if skipKey==0 and it points to the beginning of data if 03364 ** skipKey==1. The number of bytes of available key/data is written 03365 ** into *pAmt. If *pAmt==0, then the value returned will not be 03366 ** a valid pointer. 03367 ** 03368 ** This routine is an optimization. It is common for the entire key 03369 ** and data to fit on the local page and for there to be no overflow 03370 ** pages. When that is so, this routine can be used to access the 03371 ** key and data without making a copy. If the key and/or data spills 03372 ** onto overflow pages, then accessPayload() must be used to reassembly 03373 ** the key/data and copy it into a preallocated buffer. 03374 ** 03375 ** The pointer returned by this routine looks directly into the cached 03376 ** page of the database. The data might change or move the next time 03377 ** any btree routine is called. 03378 */ 03379 static const unsigned char *fetchPayload( 03380 BtCursor *pCur, /* Cursor pointing to entry to read from */ 03381 int *pAmt, /* Write the number of available bytes here */ 03382 int skipKey /* read beginning at data if this is true */ 03383 ){ 03384 unsigned char *aPayload; 03385 MemPage *pPage; 03386 u32 nKey; 03387 int nLocal; 03388 03389 assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); 03390 assert( pCur->eState==CURSOR_VALID ); 03391 assert( cursorHoldsMutex(pCur) ); 03392 pPage = pCur->apPage[pCur->iPage]; 03393 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); 03394 getCellInfo(pCur); 03395 aPayload = pCur->info.pCell; 03396 aPayload += pCur->info.nHeader; 03397 if( pPage->intKey ){ 03398 nKey = 0; 03399 }else{ 03400 nKey = pCur->info.nKey; 03401 } 03402 if( skipKey ){ 03403 aPayload += nKey; 03404 nLocal = pCur->info.nLocal - nKey; 03405 }else{ 03406 nLocal = pCur->info.nLocal; 03407 if( nLocal>nKey ){ 03408 nLocal = nKey; 03409 } 03410 } 03411 *pAmt = nLocal; 03412 return aPayload; 03413 } 03414 03415 03416 /* 03417 ** For the entry that cursor pCur is point to, return as 03418 ** many bytes of the key or data as are available on the local 03419 ** b-tree page. Write the number of available bytes into *pAmt. 03420 ** 03421 ** The pointer returned is ephemeral. The key/data may move 03422 ** or be destroyed on the next call to any Btree routine, 03423 ** including calls from other threads against the same cache. 03424 ** Hence, a mutex on the BtShared should be held prior to calling 03425 ** this routine. 03426 ** 03427 ** These routines is used to get quick access to key and data 03428 ** in the common case where no overflow pages are used. 03429 */ 03430 const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){ 03431 assert( cursorHoldsMutex(pCur) ); 03432 if( pCur->eState==CURSOR_VALID ){ 03433 return (const void*)fetchPayload(pCur, pAmt, 0); 03434 } 03435 return 0; 03436 } 03437 const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){ 03438 assert( cursorHoldsMutex(pCur) ); 03439 if( pCur->eState==CURSOR_VALID ){ 03440 return (const void*)fetchPayload(pCur, pAmt, 1); 03441 } 03442 return 0; 03443 } 03444 03445 03446 /* 03447 ** Move the cursor down to a new child page. The newPgno argument is the 03448 ** page number of the child page to move to. 03449 */ 03450 static int moveToChild(BtCursor *pCur, u32 newPgno){ 03451 int rc; 03452 int i = pCur->iPage; 03453 MemPage *pNewPage; 03454 BtShared *pBt = pCur->pBt; 03455 03456 assert( cursorHoldsMutex(pCur) ); 03457 assert( pCur->eState==CURSOR_VALID ); 03458 assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); 03459 if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ 03460 return SQLITE_CORRUPT_BKPT; 03461 } 03462 rc = getAndInitPage(pBt, newPgno, &pNewPage); 03463 if( rc ) return rc; 03464 pCur->apPage[i+1] = pNewPage; 03465 pCur->aiIdx[i+1] = 0; 03466 pCur->iPage++; 03467 03468 pCur->info.nSize = 0; 03469 pCur->validNKey = 0; 03470 if( pNewPage->nCell<1 ){ 03471 return SQLITE_CORRUPT_BKPT; 03472 } 03473 return SQLITE_OK; 03474 } 03475 03476 #ifndef NDEBUG 03477 /* 03478 ** Page pParent is an internal (non-leaf) tree page. This function 03479 ** asserts that page number iChild is the left-child if the iIdx'th 03480 ** cell in page pParent. Or, if iIdx is equal to the total number of 03481 ** cells in pParent, that page number iChild is the right-child of 03482 ** the page. 03483 */ 03484 static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ 03485 assert( iIdx<=pParent->nCell ); 03486 if( iIdx==pParent->nCell ){ 03487 assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); 03488 }else{ 03489 assert( get4byte(findCell(pParent, iIdx))==iChild ); 03490 } 03491 } 03492 #else 03493 # define assertParentIndex(x,y,z) 03494 #endif 03495 03496 /* 03497 ** Move the cursor up to the parent page. 03498 ** 03499 ** pCur->idx is set to the cell index that contains the pointer 03500 ** to the page we are coming from. If we are coming from the 03501 ** right-most child page then pCur->idx is set to one more than 03502 ** the largest cell index. 03503 */ 03504 void sqlite3BtreeMoveToParent(BtCursor *pCur){ 03505 assert( cursorHoldsMutex(pCur) ); 03506 assert( pCur->eState==CURSOR_VALID ); 03507 assert( pCur->iPage>0 ); 03508 assert( pCur->apPage[pCur->iPage] ); 03509 assertParentIndex( 03510 pCur->apPage[pCur->iPage-1], 03511 pCur->aiIdx[pCur->iPage-1], 03512 pCur->apPage[pCur->iPage]->pgno 03513 ); 03514 releasePage(pCur->apPage[pCur->iPage]); 03515 pCur->iPage--; 03516 pCur->info.nSize = 0; 03517 pCur->validNKey = 0; 03518 } 03519 03520 /* 03521 ** Move the cursor to the root page 03522 */ 03523 static int moveToRoot(BtCursor *pCur){ 03524 MemPage *pRoot; 03525 int rc = SQLITE_OK; 03526 Btree *p = pCur->pBtree; 03527 BtShared *pBt = p->pBt; 03528 03529 assert( cursorHoldsMutex(pCur) ); 03530 assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); 03531 assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); 03532 assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); 03533 if( pCur->eState>=CURSOR_REQUIRESEEK ){ 03534 if( pCur->eState==CURSOR_FAULT ){ 03535 return pCur->skip; 03536 } 03537 sqlite3BtreeClearCursor(pCur); 03538 } 03539 03540 if( pCur->iPage>=0 ){ 03541 int i; 03542 for(i=1; i<=pCur->iPage; i++){ 03543 releasePage(pCur->apPage[i]); 03544 } 03545 }else{ 03546 if( 03547 SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0])) 03548 ){ 03549 pCur->eState = CURSOR_INVALID; 03550 return rc; 03551 } 03552 } 03553 03554 pRoot = pCur->apPage[0]; 03555 assert( pRoot->pgno==pCur->pgnoRoot ); 03556 pCur->iPage = 0; 03557 pCur->aiIdx[0] = 0; 03558 pCur->info.nSize = 0; 03559 pCur->atLast = 0; 03560 pCur->validNKey = 0; 03561 03562 if( pRoot->nCell==0 && !pRoot->leaf ){ 03563 Pgno subpage; 03564 assert( pRoot->pgno==1 ); 03565 subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); 03566 assert( subpage>0 ); 03567 pCur->eState = CURSOR_VALID; 03568 rc = moveToChild(pCur, subpage); 03569 }else{ 03570 pCur->eState = ((pRoot->nCell>0)?CURSOR_VALID:CURSOR_INVALID); 03571 } 03572 return rc; 03573 } 03574 03575 /* 03576 ** Move the cursor down to the left-most leaf entry beneath the 03577 ** entry to which it is currently pointing. 03578 ** 03579 ** The left-most leaf is the one with the smallest key - the first 03580 ** in ascending order. 03581 */ 03582 static int moveToLeftmost(BtCursor *pCur){ 03583 Pgno pgno; 03584 int rc = SQLITE_OK; 03585 MemPage *pPage; 03586 03587 assert( cursorHoldsMutex(pCur) ); 03588 assert( pCur->eState==CURSOR_VALID ); 03589 while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ 03590 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); 03591 pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage])); 03592 rc = moveToChild(pCur, pgno); 03593 } 03594 return rc; 03595 } 03596 03597 /* 03598 ** Move the cursor down to the right-most leaf entry beneath the 03599 ** page to which it is currently pointing. Notice the difference 03600 ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() 03601 ** finds the left-most entry beneath the *entry* whereas moveToRightmost() 03602 ** finds the right-most entry beneath the *page*. 03603 ** 03604 ** The right-most entry is the one with the largest key - the last 03605 ** key in ascending order. 03606 */ 03607 static int moveToRightmost(BtCursor *pCur){ 03608 Pgno pgno; 03609 int rc = SQLITE_OK; 03610 MemPage *pPage; 03611 03612 assert( cursorHoldsMutex(pCur) ); 03613 assert( pCur->eState==CURSOR_VALID ); 03614 while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ 03615 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); 03616 pCur->aiIdx[pCur->iPage] = pPage->nCell; 03617 rc = moveToChild(pCur, pgno); 03618 } 03619 if( rc==SQLITE_OK ){ 03620 pCur->aiIdx[pCur->iPage] = pPage->nCell-1; 03621 pCur->info.nSize = 0; 03622 pCur->validNKey = 0; 03623 } 03624 return rc; 03625 } 03626 03627 /* Move the cursor to the first entry in the table. Return SQLITE_OK 03628 ** on success. Set *pRes to 0 if the cursor actually points to something 03629 ** or set *pRes to 1 if the table is empty. 03630 */ 03631 int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ 03632 int rc; 03633 03634 assert( cursorHoldsMutex(pCur) ); 03635 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 03636 rc = moveToRoot(pCur); 03637 if( rc==SQLITE_OK ){ 03638 if( pCur->eState==CURSOR_INVALID ){ 03639 assert( pCur->apPage[pCur->iPage]->nCell==0 ); 03640 *pRes = 1; 03641 rc = SQLITE_OK; 03642 }else{ 03643 assert( pCur->apPage[pCur->iPage]->nCell>0 ); 03644 *pRes = 0; 03645 rc = moveToLeftmost(pCur); 03646 } 03647 } 03648 return rc; 03649 } 03650 03651 /* Move the cursor to the last entry in the table. Return SQLITE_OK 03652 ** on success. Set *pRes to 0 if the cursor actually points to something 03653 ** or set *pRes to 1 if the table is empty. 03654 */ 03655 int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ 03656 int rc; 03657 03658 assert( cursorHoldsMutex(pCur) ); 03659 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 03660 rc = moveToRoot(pCur); 03661 if( rc==SQLITE_OK ){ 03662 if( CURSOR_INVALID==pCur->eState ){ 03663 assert( pCur->apPage[pCur->iPage]->nCell==0 ); 03664 *pRes = 1; 03665 }else{ 03666 assert( pCur->eState==CURSOR_VALID ); 03667 *pRes = 0; 03668 rc = moveToRightmost(pCur); 03669 getCellInfo(pCur); 03670 pCur->atLast = rc==SQLITE_OK; 03671 } 03672 } 03673 return rc; 03674 } 03675 03676 /* Move the cursor so that it points to an entry near the key 03677 ** specified by pIdxKey or intKey. Return a success code. 03678 ** 03679 ** For INTKEY tables, the intKey parameter is used. pIdxKey 03680 ** must be NULL. For index tables, pIdxKey is used and intKey 03681 ** is ignored. 03682 ** 03683 ** If an exact match is not found, then the cursor is always 03684 ** left pointing at a leaf page which would hold the entry if it 03685 ** were present. The cursor might point to an entry that comes 03686 ** before or after the key. 03687 ** 03688 ** The result of comparing the key with the entry to which the 03689 ** cursor is written to *pRes if pRes!=NULL. The meaning of 03690 ** this value is as follows: 03691 ** 03692 ** *pRes<0 The cursor is left pointing at an entry that 03693 ** is smaller than pKey or if the table is empty 03694 ** and the cursor is therefore left point to nothing. 03695 ** 03696 ** *pRes==0 The cursor is left pointing at an entry that 03697 ** exactly matches pKey. 03698 ** 03699 ** *pRes>0 The cursor is left pointing at an entry that 03700 ** is larger than pKey. 03701 ** 03702 */ 03703 int sqlite3BtreeMovetoUnpacked( 03704 BtCursor *pCur, /* The cursor to be moved */ 03705 UnpackedRecord *pIdxKey, /* Unpacked index key */ 03706 i64 intKey, /* The table key */ 03707 int biasRight, /* If true, bias the search to the high end */ 03708 int *pRes /* Write search results here */ 03709 ){ 03710 int rc; 03711 03712 assert( cursorHoldsMutex(pCur) ); 03713 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 03714 03715 /* If the cursor is already positioned at the point we are trying 03716 ** to move to, then just return without doing any work */ 03717 if( pCur->eState==CURSOR_VALID && pCur->validNKey 03718 && pCur->apPage[0]->intKey 03719 ){ 03720 if( pCur->info.nKey==intKey ){ 03721 *pRes = 0; 03722 return SQLITE_OK; 03723 } 03724 if( pCur->atLast && pCur->info.nKey<intKey ){ 03725 *pRes = -1; 03726 return SQLITE_OK; 03727 } 03728 } 03729 03730 rc = moveToRoot(pCur); 03731 if( rc ){ 03732 return rc; 03733 } 03734 assert( pCur->apPage[pCur->iPage] ); 03735 assert( pCur->apPage[pCur->iPage]->isInit ); 03736 if( pCur->eState==CURSOR_INVALID ){ 03737 *pRes = -1; 03738 assert( pCur->apPage[pCur->iPage]->nCell==0 ); 03739 return SQLITE_OK; 03740 } 03741 assert( pCur->apPage[0]->intKey || pIdxKey ); 03742 for(;;){ 03743 int lwr, upr; 03744 Pgno chldPg; 03745 MemPage *pPage = pCur->apPage[pCur->iPage]; 03746 int c = -1; /* pRes return if table is empty must be -1 */ 03747 lwr = 0; 03748 upr = pPage->nCell-1; 03749 if( !pPage->intKey && pIdxKey==0 ){ 03750 rc = SQLITE_CORRUPT_BKPT; 03751 goto moveto_finish; 03752 } 03753 if( biasRight ){ 03754 pCur->aiIdx[pCur->iPage] = upr; 03755 }else{ 03756 pCur->aiIdx[pCur->iPage] = (upr+lwr)/2; 03757 } 03758 if( lwr<=upr ) for(;;){ 03759 void *pCellKey; 03760 i64 nCellKey; 03761 int idx = pCur->aiIdx[pCur->iPage]; 03762 pCur->info.nSize = 0; 03763 pCur->validNKey = 1; 03764 if( pPage->intKey ){ 03765 u8 *pCell; 03766 pCell = findCell(pPage, idx) + pPage->childPtrSize; 03767 if( pPage->hasData ){ 03768 u32 dummy; 03769 pCell += getVarint32(pCell, dummy); 03770 } 03771 getVarint(pCell, (u64*)&nCellKey); 03772 if( nCellKey==intKey ){ 03773 c = 0; 03774 }else if( nCellKey<intKey ){ 03775 c = -1; 03776 }else{ 03777 assert( nCellKey>intKey ); 03778 c = +1; 03779 } 03780 }else{ 03781 int available; 03782 pCellKey = (void *)fetchPayload(pCur, &available, 0); 03783 nCellKey = pCur->info.nKey; 03784 if( available>=nCellKey ){ 03785 c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pIdxKey); 03786 }else{ 03787 pCellKey = sqlite3Malloc( nCellKey ); 03788 if( pCellKey==0 ){ 03789 rc = SQLITE_NOMEM; 03790 goto moveto_finish; 03791 } 03792 rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey); 03793 c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pIdxKey); 03794 sqlite3_free(pCellKey); 03795 if( rc ) goto moveto_finish; 03796 } 03797 } 03798 if( c==0 ){ 03799 pCur->info.nKey = nCellKey; 03800 if( pPage->intKey && !pPage->leaf ){ 03801 lwr = idx; 03802 upr = lwr - 1; 03803 break; 03804 }else{ 03805 if( pRes ) *pRes = 0; 03806 rc = SQLITE_OK; 03807 goto moveto_finish; 03808 } 03809 } 03810 if( c<0 ){ 03811 lwr = idx+1; 03812 }else{ 03813 upr = idx-1; 03814 } 03815 if( lwr>upr ){ 03816 pCur->info.nKey = nCellKey; 03817 break; 03818 } 03819 pCur->aiIdx[pCur->iPage] = (lwr+upr)/2; 03820 } 03821 assert( lwr==upr+1 ); 03822 assert( pPage->isInit ); 03823 if( pPage->leaf ){ 03824 chldPg = 0; 03825 }else if( lwr>=pPage->nCell ){ 03826 chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); 03827 }else{ 03828 chldPg = get4byte(findCell(pPage, lwr)); 03829 } 03830 if( chldPg==0 ){ 03831 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 03832 if( pRes ) *pRes = c; 03833 rc = SQLITE_OK; 03834 goto moveto_finish; 03835 } 03836 pCur->aiIdx[pCur->iPage] = lwr; 03837 pCur->info.nSize = 0; 03838 pCur->validNKey = 0; 03839 rc = moveToChild(pCur, chldPg); 03840 if( rc ) goto moveto_finish; 03841 } 03842 moveto_finish: 03843 return rc; 03844 } 03845 03846 /* 03847 ** In this version of BtreeMoveto, pKey is a packed index record 03848 ** such as is generated by the OP_MakeRecord opcode. Unpack the 03849 ** record and then call BtreeMovetoUnpacked() to do the work. 03850 */ 03851 int sqlite3BtreeMoveto( 03852 BtCursor *pCur, /* Cursor open on the btree to be searched */ 03853 const void *pKey, /* Packed key if the btree is an index */ 03854 i64 nKey, /* Integer key for tables. Size of pKey for indices */ 03855 int bias, /* Bias search to the high end */ 03856 int *pRes /* Write search results here */ 03857 ){ 03858 int rc; /* Status code */ 03859 UnpackedRecord *pIdxKey; /* Unpacked index key */ 03860 UnpackedRecord aSpace[16]; /* Temp space for pIdxKey - to avoid a malloc */ 03861 03862 if( pKey ){ 03863 pIdxKey = sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nKey, pKey, 03864 aSpace, sizeof(aSpace)); 03865 if( pIdxKey==0 ) return SQLITE_NOMEM; 03866 }else{ 03867 pIdxKey = 0; 03868 } 03869 rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes); 03870 if( pKey ){ 03871 sqlite3VdbeDeleteUnpackedRecord(pIdxKey); 03872 } 03873 return rc; 03874 } 03875 03876 03877 /* 03878 ** Return TRUE if the cursor is not pointing at an entry of the table. 03879 ** 03880 ** TRUE will be returned after a call to sqlite3BtreeNext() moves 03881 ** past the last entry in the table or sqlite3BtreePrev() moves past 03882 ** the first entry. TRUE is also returned if the table is empty. 03883 */ 03884 int sqlite3BtreeEof(BtCursor *pCur){ 03885 /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries 03886 ** have been deleted? This API will need to change to return an error code 03887 ** as well as the boolean result value. 03888 */ 03889 return (CURSOR_VALID!=pCur->eState); 03890 } 03891 03892 /* 03893 ** Return the database connection handle for a cursor. 03894 */ 03895 sqlite3 *sqlite3BtreeCursorDb(const BtCursor *pCur){ 03896 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 03897 return pCur->pBtree->db; 03898 } 03899 03900 /* 03901 ** Advance the cursor to the next entry in the database. If 03902 ** successful then set *pRes=0. If the cursor 03903 ** was already pointing to the last entry in the database before 03904 ** this routine was called, then set *pRes=1. 03905 */ 03906 int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ 03907 int rc; 03908 int idx; 03909 MemPage *pPage; 03910 03911 assert( cursorHoldsMutex(pCur) ); 03912 rc = restoreCursorPosition(pCur); 03913 if( rc!=SQLITE_OK ){ 03914 return rc; 03915 } 03916 assert( pRes!=0 ); 03917 if( CURSOR_INVALID==pCur->eState ){ 03918 *pRes = 1; 03919 return SQLITE_OK; 03920 } 03921 if( pCur->skip>0 ){ 03922 pCur->skip = 0; 03923 *pRes = 0; 03924 return SQLITE_OK; 03925 } 03926 pCur->skip = 0; 03927 03928 pPage = pCur->apPage[pCur->iPage]; 03929 idx = ++pCur->aiIdx[pCur->iPage]; 03930 assert( pPage->isInit ); 03931 assert( idx<=pPage->nCell ); 03932 03933 pCur->info.nSize = 0; 03934 pCur->validNKey = 0; 03935 if( idx>=pPage->nCell ){ 03936 if( !pPage->leaf ){ 03937 rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); 03938 if( rc ) return rc; 03939 rc = moveToLeftmost(pCur); 03940 *pRes = 0; 03941 return rc; 03942 } 03943 do{ 03944 if( pCur->iPage==0 ){ 03945 *pRes = 1; 03946 pCur->eState = CURSOR_INVALID; 03947 return SQLITE_OK; 03948 } 03949 sqlite3BtreeMoveToParent(pCur); 03950 pPage = pCur->apPage[pCur->iPage]; 03951 }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell ); 03952 *pRes = 0; 03953 if( pPage->intKey ){ 03954 rc = sqlite3BtreeNext(pCur, pRes); 03955 }else{ 03956 rc = SQLITE_OK; 03957 } 03958 return rc; 03959 } 03960 *pRes = 0; 03961 if( pPage->leaf ){ 03962 return SQLITE_OK; 03963 } 03964 rc = moveToLeftmost(pCur); 03965 return rc; 03966 } 03967 03968 03969 /* 03970 ** Step the cursor to the back to the previous entry in the database. If 03971 ** successful then set *pRes=0. If the cursor 03972 ** was already pointing to the first entry in the database before 03973 ** this routine was called, then set *pRes=1. 03974 */ 03975 int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ 03976 int rc; 03977 MemPage *pPage; 03978 03979 assert( cursorHoldsMutex(pCur) ); 03980 rc = restoreCursorPosition(pCur); 03981 if( rc!=SQLITE_OK ){ 03982 return rc; 03983 } 03984 pCur->atLast = 0; 03985 if( CURSOR_INVALID==pCur->eState ){ 03986 *pRes = 1; 03987 return SQLITE_OK; 03988 } 03989 if( pCur->skip<0 ){ 03990 pCur->skip = 0; 03991 *pRes = 0; 03992 return SQLITE_OK; 03993 } 03994 pCur->skip = 0; 03995 03996 pPage = pCur->apPage[pCur->iPage]; 03997 assert( pPage->isInit ); 03998 if( !pPage->leaf ){ 03999 int idx = pCur->aiIdx[pCur->iPage]; 04000 rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); 04001 if( rc ){ 04002 return rc; 04003 } 04004 rc = moveToRightmost(pCur); 04005 }else{ 04006 while( pCur->aiIdx[pCur->iPage]==0 ){ 04007 if( pCur->iPage==0 ){ 04008 pCur->eState = CURSOR_INVALID; 04009 *pRes = 1; 04010 return SQLITE_OK; 04011 } 04012 sqlite3BtreeMoveToParent(pCur); 04013 } 04014 pCur->info.nSize = 0; 04015 pCur->validNKey = 0; 04016 04017 pCur->aiIdx[pCur->iPage]--; 04018 pPage = pCur->apPage[pCur->iPage]; 04019 if( pPage->intKey && !pPage->leaf ){ 04020 rc = sqlite3BtreePrevious(pCur, pRes); 04021 }else{ 04022 rc = SQLITE_OK; 04023 } 04024 } 04025 *pRes = 0; 04026 return rc; 04027 } 04028 04029 /* 04030 ** Allocate a new page from the database file. 04031 ** 04032 ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() 04033 ** has already been called on the new page.) The new page has also 04034 ** been referenced and the calling routine is responsible for calling 04035 ** sqlite3PagerUnref() on the new page when it is done. 04036 ** 04037 ** SQLITE_OK is returned on success. Any other return value indicates 04038 ** an error. *ppPage and *pPgno are undefined in the event of an error. 04039 ** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned. 04040 ** 04041 ** If the "nearby" parameter is not 0, then a (feeble) effort is made to 04042 ** locate a page close to the page number "nearby". This can be used in an 04043 ** attempt to keep related pages close to each other in the database file, 04044 ** which in turn can make database access faster. 04045 ** 04046 ** If the "exact" parameter is not 0, and the page-number nearby exists 04047 ** anywhere on the free-list, then it is guarenteed to be returned. This 04048 ** is only used by auto-vacuum databases when allocating a new table. 04049 */ 04050 static int allocateBtreePage( 04051 BtShared *pBt, 04052 MemPage **ppPage, 04053 Pgno *pPgno, 04054 Pgno nearby, 04055 u8 exact 04056 ){ 04057 MemPage *pPage1; 04058 int rc; 04059 int n; /* Number of pages on the freelist */ 04060 int k; /* Number of leaves on the trunk of the freelist */ 04061 MemPage *pTrunk = 0; 04062 MemPage *pPrevTrunk = 0; 04063 04064 assert( sqlite3_mutex_held(pBt->mutex) ); 04065 pPage1 = pBt->pPage1; 04066 n = get4byte(&pPage1->aData[36]); 04067 if( n>0 ){ 04068 /* There are pages on the freelist. Reuse one of those pages. */ 04069 Pgno iTrunk; 04070 u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ 04071 04072 /* If the 'exact' parameter was true and a query of the pointer-map 04073 ** shows that the page 'nearby' is somewhere on the free-list, then 04074 ** the entire-list will be searched for that page. 04075 */ 04076 #ifndef SQLITE_OMIT_AUTOVACUUM 04077 if( exact && nearby<=pagerPagecount(pBt->pPager) ){ 04078 u8 eType; 04079 assert( nearby>0 ); 04080 assert( pBt->autoVacuum ); 04081 rc = ptrmapGet(pBt, nearby, &eType, 0); 04082 if( rc ) return rc; 04083 if( eType==PTRMAP_FREEPAGE ){ 04084 searchList = 1; 04085 } 04086 *pPgno = nearby; 04087 } 04088 #endif 04089 04090 /* Decrement the free-list count by 1. Set iTrunk to the index of the 04091 ** first free-list trunk page. iPrevTrunk is initially 1. 04092 */ 04093 rc = sqlite3PagerWrite(pPage1->pDbPage); 04094 if( rc ) return rc; 04095 put4byte(&pPage1->aData[36], n-1); 04096 04097 /* The code within this loop is run only once if the 'searchList' variable 04098 ** is not true. Otherwise, it runs once for each trunk-page on the 04099 ** free-list until the page 'nearby' is located. 04100 */ 04101 do { 04102 pPrevTrunk = pTrunk; 04103 if( pPrevTrunk ){ 04104 iTrunk = get4byte(&pPrevTrunk->aData[0]); 04105 }else{ 04106 iTrunk = get4byte(&pPage1->aData[32]); 04107 } 04108 rc = sqlite3BtreeGetPage(pBt, iTrunk, &pTrunk, 0); 04109 if( rc ){ 04110 pTrunk = 0; 04111 goto end_allocate_page; 04112 } 04113 04114 k = get4byte(&pTrunk->aData[4]); 04115 if( k==0 && !searchList ){ 04116 /* The trunk has no leaves and the list is not being searched. 04117 ** So extract the trunk page itself and use it as the newly 04118 ** allocated page */ 04119 assert( pPrevTrunk==0 ); 04120 rc = sqlite3PagerWrite(pTrunk->pDbPage); 04121 if( rc ){ 04122 goto end_allocate_page; 04123 } 04124 *pPgno = iTrunk; 04125 memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); 04126 *ppPage = pTrunk; 04127 pTrunk = 0; 04128 TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); 04129 }else if( k>pBt->usableSize/4 - 2 ){ 04130 /* Value of k is out of range. Database corruption */ 04131 rc = SQLITE_CORRUPT_BKPT; 04132 goto end_allocate_page; 04133 #ifndef SQLITE_OMIT_AUTOVACUUM 04134 }else if( searchList && nearby==iTrunk ){ 04135 /* The list is being searched and this trunk page is the page 04136 ** to allocate, regardless of whether it has leaves. 04137 */ 04138 assert( *pPgno==iTrunk ); 04139 *ppPage = pTrunk; 04140 searchList = 0; 04141 rc = sqlite3PagerWrite(pTrunk->pDbPage); 04142 if( rc ){ 04143 goto end_allocate_page; 04144 } 04145 if( k==0 ){ 04146 if( !pPrevTrunk ){ 04147 memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); 04148 }else{ 04149 memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); 04150 } 04151 }else{ 04152 /* The trunk page is required by the caller but it contains 04153 ** pointers to free-list leaves. The first leaf becomes a trunk 04154 ** page in this case. 04155 */ 04156 MemPage *pNewTrunk; 04157 Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); 04158 rc = sqlite3BtreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); 04159 if( rc!=SQLITE_OK ){ 04160 goto end_allocate_page; 04161 } 04162 rc = sqlite3PagerWrite(pNewTrunk->pDbPage); 04163 if( rc!=SQLITE_OK ){ 04164 releasePage(pNewTrunk); 04165 goto end_allocate_page; 04166 } 04167 memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); 04168 put4byte(&pNewTrunk->aData[4], k-1); 04169 memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4); 04170 releasePage(pNewTrunk); 04171 if( !pPrevTrunk ){ 04172 put4byte(&pPage1->aData[32], iNewTrunk); 04173 }else{ 04174 rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); 04175 if( rc ){ 04176 goto end_allocate_page; 04177 } 04178 put4byte(&pPrevTrunk->aData[0], iNewTrunk); 04179 } 04180 } 04181 pTrunk = 0; 04182 TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); 04183 #endif 04184 }else{ 04185 /* Extract a leaf from the trunk */ 04186 int closest; 04187 Pgno iPage; 04188 unsigned char *aData = pTrunk->aData; 04189 rc = sqlite3PagerWrite(pTrunk->pDbPage); 04190 if( rc ){ 04191 goto end_allocate_page; 04192 } 04193 if( nearby>0 ){ 04194 int i, dist; 04195 closest = 0; 04196 dist = get4byte(&aData[8]) - nearby; 04197 if( dist<0 ) dist = -dist; 04198 for(i=1; i<k; i++){ 04199 int d2 = get4byte(&aData[8+i*4]) - nearby; 04200 if( d2<0 ) d2 = -d2; 04201 if( d2<dist ){ 04202 closest = i; 04203 dist = d2; 04204 } 04205 } 04206 }else{ 04207 closest = 0; 04208 } 04209 04210 iPage = get4byte(&aData[8+closest*4]); 04211 if( !searchList || iPage==nearby ){ 04212 int nPage; 04213 *pPgno = iPage; 04214 nPage = pagerPagecount(pBt->pPager); 04215 if( *pPgno>nPage ){ 04216 /* Free page off the end of the file */ 04217 rc = SQLITE_CORRUPT_BKPT; 04218 goto end_allocate_page; 04219 } 04220 TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" 04221 ": %d more free pages\n", 04222 *pPgno, closest+1, k, pTrunk->pgno, n-1)); 04223 if( closest<k-1 ){ 04224 memcpy(&aData[8+closest*4], &aData[4+k*4], 4); 04225 } 04226 put4byte(&aData[4], k-1); 04227 rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 1); 04228 if( rc==SQLITE_OK ){ 04229 sqlite3PagerDontRollback((*ppPage)->pDbPage); 04230 rc = sqlite3PagerWrite((*ppPage)->pDbPage); 04231 if( rc!=SQLITE_OK ){ 04232 releasePage(*ppPage); 04233 } 04234 } 04235 searchList = 0; 04236 } 04237 } 04238 releasePage(pPrevTrunk); 04239 pPrevTrunk = 0; 04240 }while( searchList ); 04241 }else{ 04242 /* There are no pages on the freelist, so create a new page at the 04243 ** end of the file */ 04244 int nPage = pagerPagecount(pBt->pPager); 04245 *pPgno = nPage + 1; 04246 04247 #ifndef SQLITE_OMIT_AUTOVACUUM 04248 if( pBt->nTrunc ){ 04249 /* An incr-vacuum has already run within this transaction. So the 04250 ** page to allocate is not from the physical end of the file, but 04251 ** at pBt->nTrunc. 04252 */ 04253 *pPgno = pBt->nTrunc+1; 04254 if( *pPgno==PENDING_BYTE_PAGE(pBt) ){ 04255 (*pPgno)++; 04256 } 04257 } 04258 if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){ 04259 /* If *pPgno refers to a pointer-map page, allocate two new pages 04260 ** at the end of the file instead of one. The first allocated page 04261 ** becomes a new pointer-map page, the second is used by the caller. 04262 */ 04263 TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno)); 04264 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); 04265 (*pPgno)++; 04266 if( *pPgno==PENDING_BYTE_PAGE(pBt) ){ (*pPgno)++; } 04267 } 04268 if( pBt->nTrunc ){ 04269 pBt->nTrunc = *pPgno; 04270 } 04271 #endif 04272 04273 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); 04274 rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 0); 04275 if( rc ) return rc; 04276 rc = sqlite3PagerWrite((*ppPage)->pDbPage); 04277 if( rc!=SQLITE_OK ){ 04278 releasePage(*ppPage); 04279 } 04280 TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); 04281 } 04282 04283 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); 04284 04285 end_allocate_page: 04286 releasePage(pTrunk); 04287 releasePage(pPrevTrunk); 04288 if( rc==SQLITE_OK && sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ 04289 releasePage(*ppPage); 04290 return SQLITE_CORRUPT_BKPT; 04291 } 04292 return rc; 04293 } 04294 04295 /* 04296 ** Add a page of the database file to the freelist. 04297 ** 04298 ** sqlite3PagerUnref() is NOT called for pPage. 04299 */ 04300 static int freePage(MemPage *pPage){ 04301 BtShared *pBt = pPage->pBt; 04302 MemPage *pPage1 = pBt->pPage1; 04303 int rc, n, k; 04304 04305 /* Prepare the page for freeing */ 04306 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04307 assert( pPage->pgno>1 ); 04308 pPage->isInit = 0; 04309 04310 /* Increment the free page count on pPage1 */ 04311 rc = sqlite3PagerWrite(pPage1->pDbPage); 04312 if( rc ) return rc; 04313 n = get4byte(&pPage1->aData[36]); 04314 put4byte(&pPage1->aData[36], n+1); 04315 04316 #ifdef SQLITE_SECURE_DELETE 04317 /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then 04318 ** always fully overwrite deleted information with zeros. 04319 */ 04320 rc = sqlite3PagerWrite(pPage->pDbPage); 04321 if( rc ) return rc; 04322 memset(pPage->aData, 0, pPage->pBt->pageSize); 04323 #endif 04324 04325 /* If the database supports auto-vacuum, write an entry in the pointer-map 04326 ** to indicate that the page is free. 04327 */ 04328 if( ISAUTOVACUUM ){ 04329 rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0); 04330 if( rc ) return rc; 04331 } 04332 04333 if( n==0 ){ 04334 /* This is the first free page */ 04335 rc = sqlite3PagerWrite(pPage->pDbPage); 04336 if( rc ) return rc; 04337 memset(pPage->aData, 0, 8); 04338 put4byte(&pPage1->aData[32], pPage->pgno); 04339 TRACE(("FREE-PAGE: %d first\n", pPage->pgno)); 04340 }else{ 04341 /* Other free pages already exist. Retrive the first trunk page 04342 ** of the freelist and find out how many leaves it has. */ 04343 MemPage *pTrunk; 04344 rc = sqlite3BtreeGetPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0); 04345 if( rc ) return rc; 04346 k = get4byte(&pTrunk->aData[4]); 04347 if( k>=pBt->usableSize/4 - 8 ){ 04348 /* The trunk is full. Turn the page being freed into a new 04349 ** trunk page with no leaves. 04350 ** 04351 ** Note that the trunk page is not really full until it contains 04352 ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have 04353 ** coded. But due to a coding error in versions of SQLite prior to 04354 ** 3.6.0, databases with freelist trunk pages holding more than 04355 ** usableSize/4 - 8 entries will be reported as corrupt. In order 04356 ** to maintain backwards compatibility with older versions of SQLite, 04357 ** we will contain to restrict the number of entries to usableSize/4 - 8 04358 ** for now. At some point in the future (once everyone has upgraded 04359 ** to 3.6.0 or later) we should consider fixing the conditional above 04360 ** to read "usableSize/4-2" instead of "usableSize/4-8". 04361 */ 04362 rc = sqlite3PagerWrite(pPage->pDbPage); 04363 if( rc==SQLITE_OK ){ 04364 put4byte(pPage->aData, pTrunk->pgno); 04365 put4byte(&pPage->aData[4], 0); 04366 put4byte(&pPage1->aData[32], pPage->pgno); 04367 TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", 04368 pPage->pgno, pTrunk->pgno)); 04369 } 04370 }else if( k<0 ){ 04371 rc = SQLITE_CORRUPT; 04372 }else{ 04373 /* Add the newly freed page as a leaf on the current trunk */ 04374 rc = sqlite3PagerWrite(pTrunk->pDbPage); 04375 if( rc==SQLITE_OK ){ 04376 put4byte(&pTrunk->aData[4], k+1); 04377 put4byte(&pTrunk->aData[8+k*4], pPage->pgno); 04378 #ifndef SQLITE_SECURE_DELETE 04379 rc = sqlite3PagerDontWrite(pPage->pDbPage); 04380 #endif 04381 } 04382 TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno)); 04383 } 04384 releasePage(pTrunk); 04385 } 04386 return rc; 04387 } 04388 04389 /* 04390 ** Free any overflow pages associated with the given Cell. 04391 */ 04392 static int clearCell(MemPage *pPage, unsigned char *pCell){ 04393 BtShared *pBt = pPage->pBt; 04394 CellInfo info; 04395 Pgno ovflPgno; 04396 int rc; 04397 int nOvfl; 04398 int ovflPageSize; 04399 04400 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04401 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 04402 if( info.iOverflow==0 ){ 04403 return SQLITE_OK; /* No overflow pages. Return without doing anything */ 04404 } 04405 ovflPgno = get4byte(&pCell[info.iOverflow]); 04406 ovflPageSize = pBt->usableSize - 4; 04407 nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; 04408 assert( ovflPgno==0 || nOvfl>0 ); 04409 while( nOvfl-- ){ 04410 MemPage *pOvfl; 04411 if( ovflPgno==0 || ovflPgno>pagerPagecount(pBt->pPager) ){ 04412 return SQLITE_CORRUPT_BKPT; 04413 } 04414 04415 rc = getOverflowPage(pBt, ovflPgno, &pOvfl, (nOvfl==0)?0:&ovflPgno); 04416 if( rc ) return rc; 04417 rc = freePage(pOvfl); 04418 sqlite3PagerUnref(pOvfl->pDbPage); 04419 if( rc ) return rc; 04420 } 04421 return SQLITE_OK; 04422 } 04423 04424 /* 04425 ** Create the byte sequence used to represent a cell on page pPage 04426 ** and write that byte sequence into pCell[]. Overflow pages are 04427 ** allocated and filled in as necessary. The calling procedure 04428 ** is responsible for making sure sufficient space has been allocated 04429 ** for pCell[]. 04430 ** 04431 ** Note that pCell does not necessary need to point to the pPage->aData 04432 ** area. pCell might point to some temporary storage. The cell will 04433 ** be constructed in this temporary area then copied into pPage->aData 04434 ** later. 04435 */ 04436 static int fillInCell( 04437 MemPage *pPage, /* The page that contains the cell */ 04438 unsigned char *pCell, /* Complete text of the cell */ 04439 const void *pKey, i64 nKey, /* The key */ 04440 const void *pData,int nData, /* The data */ 04441 int nZero, /* Extra zero bytes to append to pData */ 04442 int *pnSize /* Write cell size here */ 04443 ){ 04444 int nPayload; 04445 const u8 *pSrc; 04446 int nSrc, n, rc; 04447 int spaceLeft; 04448 MemPage *pOvfl = 0; 04449 MemPage *pToRelease = 0; 04450 unsigned char *pPrior; 04451 unsigned char *pPayload; 04452 BtShared *pBt = pPage->pBt; 04453 Pgno pgnoOvfl = 0; 04454 int nHeader; 04455 CellInfo info; 04456 04457 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04458 04459 /* Fill in the header. */ 04460 nHeader = 0; 04461 if( !pPage->leaf ){ 04462 nHeader += 4; 04463 } 04464 if( pPage->hasData ){ 04465 nHeader += putVarint(&pCell[nHeader], nData+nZero); 04466 }else{ 04467 nData = nZero = 0; 04468 } 04469 nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); 04470 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 04471 assert( info.nHeader==nHeader ); 04472 assert( info.nKey==nKey ); 04473 assert( info.nData==nData+nZero ); 04474 04475 /* Fill in the payload */ 04476 nPayload = nData + nZero; 04477 if( pPage->intKey ){ 04478 pSrc = pData; 04479 nSrc = nData; 04480 nData = 0; 04481 }else{ 04482 nPayload += nKey; 04483 pSrc = pKey; 04484 nSrc = nKey; 04485 } 04486 *pnSize = info.nSize; 04487 spaceLeft = info.nLocal; 04488 pPayload = &pCell[nHeader]; 04489 pPrior = &pCell[info.iOverflow]; 04490 04491 while( nPayload>0 ){ 04492 if( spaceLeft==0 ){ 04493 int isExact = 0; 04494 #ifndef SQLITE_OMIT_AUTOVACUUM 04495 Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ 04496 if( pBt->autoVacuum ){ 04497 do{ 04498 pgnoOvfl++; 04499 } while( 04500 PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) 04501 ); 04502 if( pgnoOvfl>1 ){ 04503 /* isExact = 1; */ 04504 } 04505 } 04506 #endif 04507 rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, isExact); 04508 #ifndef SQLITE_OMIT_AUTOVACUUM 04509 /* If the database supports auto-vacuum, and the second or subsequent 04510 ** overflow page is being allocated, add an entry to the pointer-map 04511 ** for that page now. 04512 ** 04513 ** If this is the first overflow page, then write a partial entry 04514 ** to the pointer-map. If we write nothing to this pointer-map slot, 04515 ** then the optimistic overflow chain processing in clearCell() 04516 ** may misinterpret the uninitialised values and delete the 04517 ** wrong pages from the database. 04518 */ 04519 if( pBt->autoVacuum && rc==SQLITE_OK ){ 04520 u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); 04521 rc = ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap); 04522 if( rc ){ 04523 releasePage(pOvfl); 04524 } 04525 } 04526 #endif 04527 if( rc ){ 04528 releasePage(pToRelease); 04529 return rc; 04530 } 04531 put4byte(pPrior, pgnoOvfl); 04532 releasePage(pToRelease); 04533 pToRelease = pOvfl; 04534 pPrior = pOvfl->aData; 04535 put4byte(pPrior, 0); 04536 pPayload = &pOvfl->aData[4]; 04537 spaceLeft = pBt->usableSize - 4; 04538 } 04539 n = nPayload; 04540 if( n>spaceLeft ) n = spaceLeft; 04541 if( nSrc>0 ){ 04542 if( n>nSrc ) n = nSrc; 04543 assert( pSrc ); 04544 memcpy(pPayload, pSrc, n); 04545 }else{ 04546 memset(pPayload, 0, n); 04547 } 04548 nPayload -= n; 04549 pPayload += n; 04550 pSrc += n; 04551 nSrc -= n; 04552 spaceLeft -= n; 04553 if( nSrc==0 ){ 04554 nSrc = nData; 04555 pSrc = pData; 04556 } 04557 } 04558 releasePage(pToRelease); 04559 return SQLITE_OK; 04560 } 04561 04562 /* 04563 ** Remove the i-th cell from pPage. This routine effects pPage only. 04564 ** The cell content is not freed or deallocated. It is assumed that 04565 ** the cell content has been copied someplace else. This routine just 04566 ** removes the reference to the cell from pPage. 04567 ** 04568 ** "sz" must be the number of bytes in the cell. 04569 */ 04570 static int dropCell(MemPage *pPage, int idx, int sz){ 04571 int i; /* Loop counter */ 04572 int pc; /* Offset to cell content of cell being deleted */ 04573 u8 *data; /* pPage->aData */ 04574 u8 *ptr; /* Used to move bytes around within data[] */ 04575 04576 assert( idx>=0 && idx<pPage->nCell ); 04577 assert( sz==cellSize(pPage, idx) ); 04578 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 04579 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04580 data = pPage->aData; 04581 ptr = &data[pPage->cellOffset + 2*idx]; 04582 pc = get2byte(ptr); 04583 if ( pc<=10 || pc+sz>pPage->pBt->usableSize ) { 04584 return SQLITE_CORRUPT_BKPT; 04585 } 04586 freeSpace(pPage, pc, sz); 04587 for(i=idx+1; i<pPage->nCell; i++, ptr+=2){ 04588 ptr[0] = ptr[2]; 04589 ptr[1] = ptr[3]; 04590 } 04591 pPage->nCell--; 04592 put2byte(&data[pPage->hdrOffset+3], pPage->nCell); 04593 pPage->nFree += 2; 04594 return SQLITE_OK; 04595 } 04596 04597 /* 04598 ** Insert a new cell on pPage at cell index "i". pCell points to the 04599 ** content of the cell. 04600 ** 04601 ** If the cell content will fit on the page, then put it there. If it 04602 ** will not fit, then make a copy of the cell content into pTemp if 04603 ** pTemp is not null. Regardless of pTemp, allocate a new entry 04604 ** in pPage->aOvfl[] and make it point to the cell content (either 04605 ** in pTemp or the original pCell) and also record its index. 04606 ** Allocating a new entry in pPage->aCell[] implies that 04607 ** pPage->nOverflow is incremented. 04608 ** 04609 ** If nSkip is non-zero, then do not copy the first nSkip bytes of the 04610 ** cell. The caller will overwrite them after this function returns. If 04611 ** nSkip is non-zero, then pCell may not point to an invalid memory location 04612 ** (but pCell+nSkip is always valid). 04613 */ 04614 static int insertCell( 04615 MemPage *pPage, /* Page into which we are copying */ 04616 int i, /* New cell becomes the i-th cell of the page */ 04617 u8 *pCell, /* Content of the new cell */ 04618 int sz, /* Bytes of content in pCell */ 04619 u8 *pTemp, /* Temp storage space for pCell, if needed */ 04620 u8 nSkip /* Do not write the first nSkip bytes of the cell */ 04621 ){ 04622 int idx; /* Where to write new cell content in data[] */ 04623 int j; /* Loop counter */ 04624 int top; /* First byte of content for any cell in data[] */ 04625 int end; /* First byte past the last cell pointer in data[] */ 04626 int ins; /* Index in data[] where new cell pointer is inserted */ 04627 int hdr; /* Offset into data[] of the page header */ 04628 int cellOffset; /* Address of first cell pointer in data[] */ 04629 u8 *data; /* The content of the whole page */ 04630 u8 *ptr; /* Used for moving information around in data[] */ 04631 04632 assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); 04633 assert( sz==cellSizePtr(pPage, pCell) ); 04634 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04635 if( pPage->nOverflow || sz+2>pPage->nFree ){ 04636 if( pTemp ){ 04637 memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip); 04638 pCell = pTemp; 04639 } 04640 j = pPage->nOverflow++; 04641 assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) ); 04642 pPage->aOvfl[j].pCell = pCell; 04643 pPage->aOvfl[j].idx = i; 04644 pPage->nFree = 0; 04645 }else{ 04646 int rc = sqlite3PagerWrite(pPage->pDbPage); 04647 if( rc!=SQLITE_OK ){ 04648 return rc; 04649 } 04650 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 04651 data = pPage->aData; 04652 hdr = pPage->hdrOffset; 04653 top = get2byte(&data[hdr+5]); 04654 cellOffset = pPage->cellOffset; 04655 end = cellOffset + 2*pPage->nCell + 2; 04656 ins = cellOffset + 2*i; 04657 if( end > top - sz ){ 04658 rc = defragmentPage(pPage); 04659 if( rc!=SQLITE_OK ){ 04660 return rc; 04661 } 04662 top = get2byte(&data[hdr+5]); 04663 assert( end + sz <= top ); 04664 } 04665 idx = allocateSpace(pPage, sz); 04666 assert( idx>0 ); 04667 assert( end <= get2byte(&data[hdr+5]) ); 04668 if (idx+sz > pPage->pBt->usableSize) { 04669 return SQLITE_CORRUPT_BKPT; 04670 } 04671 pPage->nCell++; 04672 pPage->nFree -= 2; 04673 memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip); 04674 for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){ 04675 ptr[0] = ptr[-2]; 04676 ptr[1] = ptr[-1]; 04677 } 04678 put2byte(&data[ins], idx); 04679 put2byte(&data[hdr+3], pPage->nCell); 04680 #ifndef SQLITE_OMIT_AUTOVACUUM 04681 if( pPage->pBt->autoVacuum ){ 04682 /* The cell may contain a pointer to an overflow page. If so, write 04683 ** the entry for the overflow page into the pointer map. 04684 */ 04685 CellInfo info; 04686 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 04687 assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload ); 04688 if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){ 04689 Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); 04690 rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno); 04691 if( rc!=SQLITE_OK ) return rc; 04692 } 04693 } 04694 #endif 04695 } 04696 04697 return SQLITE_OK; 04698 } 04699 04700 /* 04701 ** Add a list of cells to a page. The page should be initially empty. 04702 ** The cells are guaranteed to fit on the page. 04703 */ 04704 static void assemblePage( 04705 MemPage *pPage, /* The page to be assemblied */ 04706 int nCell, /* The number of cells to add to this page */ 04707 u8 **apCell, /* Pointers to cell bodies */ 04708 u16 *aSize /* Sizes of the cells */ 04709 ){ 04710 int i; /* Loop counter */ 04711 int totalSize; /* Total size of all cells */ 04712 int hdr; /* Index of page header */ 04713 int cellptr; /* Address of next cell pointer */ 04714 int cellbody; /* Address of next cell body */ 04715 u8 *data; /* Data for the page */ 04716 04717 assert( pPage->nOverflow==0 ); 04718 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04719 totalSize = 0; 04720 for(i=0; i<nCell; i++){ 04721 totalSize += aSize[i]; 04722 } 04723 assert( totalSize+2*nCell<=pPage->nFree ); 04724 assert( pPage->nCell==0 ); 04725 cellptr = pPage->cellOffset; 04726 data = pPage->aData; 04727 hdr = pPage->hdrOffset; 04728 put2byte(&data[hdr+3], nCell); 04729 if( nCell ){ 04730 cellbody = allocateSpace(pPage, totalSize); 04731 assert( cellbody>0 ); 04732 assert( pPage->nFree >= 2*nCell ); 04733 pPage->nFree -= 2*nCell; 04734 for(i=0; i<nCell; i++){ 04735 put2byte(&data[cellptr], cellbody); 04736 memcpy(&data[cellbody], apCell[i], aSize[i]); 04737 cellptr += 2; 04738 cellbody += aSize[i]; 04739 } 04740 assert( cellbody==pPage->pBt->usableSize ); 04741 } 04742 pPage->nCell = nCell; 04743 } 04744 04745 /* 04746 ** The following parameters determine how many adjacent pages get involved 04747 ** in a balancing operation. NN is the number of neighbors on either side 04748 ** of the page that participate in the balancing operation. NB is the 04749 ** total number of pages that participate, including the target page and 04750 ** NN neighbors on either side. 04751 ** 04752 ** The minimum value of NN is 1 (of course). Increasing NN above 1 04753 ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance 04754 ** in exchange for a larger degradation in INSERT and UPDATE performance. 04755 ** The value of NN appears to give the best results overall. 04756 */ 04757 #define NN 1 /* Number of neighbors on either side of pPage */ 04758 #define NB (NN*2+1) /* Total pages involved in the balance */ 04759 04760 /* Forward reference */ 04761 static int balance(BtCursor*, int); 04762 04763 #ifndef SQLITE_OMIT_QUICKBALANCE 04764 /* 04765 ** This version of balance() handles the common special case where 04766 ** a new entry is being inserted on the extreme right-end of the 04767 ** tree, in other words, when the new entry will become the largest 04768 ** entry in the tree. 04769 ** 04770 ** Instead of trying balance the 3 right-most leaf pages, just add 04771 ** a new page to the right-hand side and put the one new entry in 04772 ** that page. This leaves the right side of the tree somewhat 04773 ** unbalanced. But odds are that we will be inserting new entries 04774 ** at the end soon afterwards so the nearly empty page will quickly 04775 ** fill up. On average. 04776 ** 04777 ** pPage is the leaf page which is the right-most page in the tree. 04778 ** pParent is its parent. pPage must have a single overflow entry 04779 ** which is also the right-most entry on the page. 04780 */ 04781 static int balance_quick(BtCursor *pCur){ 04782 int rc; 04783 MemPage *pNew = 0; 04784 Pgno pgnoNew; 04785 u8 *pCell; 04786 u16 szCell; 04787 CellInfo info; 04788 MemPage *pPage = pCur->apPage[pCur->iPage]; 04789 MemPage *pParent = pCur->apPage[pCur->iPage-1]; 04790 BtShared *pBt = pPage->pBt; 04791 int parentIdx = pParent->nCell; /* pParent new divider cell index */ 04792 int parentSize; /* Size of new divider cell */ 04793 u8 parentCell[64]; /* Space for the new divider cell */ 04794 04795 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04796 04797 /* Allocate a new page. Insert the overflow cell from pPage 04798 ** into it. Then remove the overflow cell from pPage. 04799 */ 04800 rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); 04801 if( rc==SQLITE_OK ){ 04802 pCell = pPage->aOvfl[0].pCell; 04803 szCell = cellSizePtr(pPage, pCell); 04804 zeroPage(pNew, pPage->aData[0]); 04805 assemblePage(pNew, 1, &pCell, &szCell); 04806 pPage->nOverflow = 0; 04807 04808 /* pPage is currently the right-child of pParent. Change this 04809 ** so that the right-child is the new page allocated above and 04810 ** pPage is the next-to-right child. 04811 ** 04812 ** Ignore the return value of the call to fillInCell(). fillInCell() 04813 ** may only return other than SQLITE_OK if it is required to allocate 04814 ** one or more overflow pages. Since an internal table B-Tree cell 04815 ** may never spill over onto an overflow page (it is a maximum of 04816 ** 13 bytes in size), it is not neccessary to check the return code. 04817 ** 04818 ** Similarly, the insertCell() function cannot fail if the page 04819 ** being inserted into is already writable and the cell does not 04820 ** contain an overflow pointer. So ignore this return code too. 04821 */ 04822 assert( pPage->nCell>0 ); 04823 pCell = findCell(pPage, pPage->nCell-1); 04824 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 04825 fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize); 04826 assert( parentSize<64 ); 04827 assert( sqlite3PagerIswriteable(pParent->pDbPage) ); 04828 insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4); 04829 put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno); 04830 put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); 04831 04832 /* If this is an auto-vacuum database, update the pointer map 04833 ** with entries for the new page, and any pointer from the 04834 ** cell on the page to an overflow page. 04835 */ 04836 if( ISAUTOVACUUM ){ 04837 rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno); 04838 if( rc==SQLITE_OK ){ 04839 rc = ptrmapPutOvfl(pNew, 0); 04840 } 04841 } 04842 04843 /* Release the reference to the new page. */ 04844 releasePage(pNew); 04845 } 04846 04847 /* At this point the pPage->nFree variable is not set correctly with 04848 ** respect to the content of the page (because it was set to 0 by 04849 ** insertCell). So call sqlite3BtreeInitPage() to make sure it is 04850 ** correct. 04851 ** 04852 ** This has to be done even if an error will be returned. Normally, if 04853 ** an error occurs during tree balancing, the contents of MemPage are 04854 ** not important, as they will be recalculated when the page is rolled 04855 ** back. But here, in balance_quick(), it is possible that pPage has 04856 ** not yet been marked dirty or written into the journal file. Therefore 04857 ** it will not be rolled back and so it is important to make sure that 04858 ** the page data and contents of MemPage are consistent. 04859 */ 04860 pPage->isInit = 0; 04861 sqlite3BtreeInitPage(pPage); 04862 04863 /* If everything else succeeded, balance the parent page, in 04864 ** case the divider cell inserted caused it to become overfull. 04865 */ 04866 if( rc==SQLITE_OK ){ 04867 releasePage(pPage); 04868 pCur->iPage--; 04869 rc = balance(pCur, 0); 04870 } 04871 return rc; 04872 } 04873 #endif /* SQLITE_OMIT_QUICKBALANCE */ 04874 04875 /* 04876 ** This routine redistributes Cells on pPage and up to NN*2 siblings 04877 ** of pPage so that all pages have about the same amount of free space. 04878 ** Usually NN siblings on either side of pPage is used in the balancing, 04879 ** though more siblings might come from one side if pPage is the first 04880 ** or last child of its parent. If pPage has fewer than 2*NN siblings 04881 ** (something which can only happen if pPage is the root page or a 04882 ** child of root) then all available siblings participate in the balancing. 04883 ** 04884 ** The number of siblings of pPage might be increased or decreased by one or 04885 ** two in an effort to keep pages nearly full but not over full. The root page 04886 ** is special and is allowed to be nearly empty. If pPage is 04887 ** the root page, then the depth of the tree might be increased 04888 ** or decreased by one, as necessary, to keep the root page from being 04889 ** overfull or completely empty. 04890 ** 04891 ** Note that when this routine is called, some of the Cells on pPage 04892 ** might not actually be stored in pPage->aData[]. This can happen 04893 ** if the page is overfull. Part of the job of this routine is to 04894 ** make sure all Cells for pPage once again fit in pPage->aData[]. 04895 ** 04896 ** In the course of balancing the siblings of pPage, the parent of pPage 04897 ** might become overfull or underfull. If that happens, then this routine 04898 ** is called recursively on the parent. 04899 ** 04900 ** If this routine fails for any reason, it might leave the database 04901 ** in a corrupted state. So if this routine fails, the database should 04902 ** be rolled back. 04903 */ 04904 static int balance_nonroot(BtCursor *pCur){ 04905 MemPage *pPage; /* The over or underfull page to balance */ 04906 MemPage *pParent; /* The parent of pPage */ 04907 BtShared *pBt; /* The whole database */ 04908 int nCell = 0; /* Number of cells in apCell[] */ 04909 int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ 04910 int nOld; /* Number of pages in apOld[] */ 04911 int nNew; /* Number of pages in apNew[] */ 04912 int nDiv; /* Number of cells in apDiv[] */ 04913 int i, j, k; /* Loop counters */ 04914 int idx; /* Index of pPage in pParent->aCell[] */ 04915 int nxDiv; /* Next divider slot in pParent->aCell[] */ 04916 int rc; /* The return code */ 04917 int leafCorrection; /* 4 if pPage is a leaf. 0 if not */ 04918 int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ 04919 int usableSpace; /* Bytes in pPage beyond the header */ 04920 int pageFlags; /* Value of pPage->aData[0] */ 04921 int subtotal; /* Subtotal of bytes in cells on one page */ 04922 int iSpace1 = 0; /* First unused byte of aSpace1[] */ 04923 int iSpace2 = 0; /* First unused byte of aSpace2[] */ 04924 int szScratch; /* Size of scratch memory requested */ 04925 MemPage *apOld[NB]; /* pPage and up to two siblings */ 04926 Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */ 04927 MemPage *apCopy[NB]; /* Private copies of apOld[] pages */ 04928 MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ 04929 Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */ 04930 u8 *apDiv[NB]; /* Divider cells in pParent */ 04931 int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */ 04932 int szNew[NB+2]; /* Combined size of cells place on i-th page */ 04933 u8 **apCell = 0; /* All cells begin balanced */ 04934 u16 *szCell; /* Local size of all cells in apCell[] */ 04935 u8 *aCopy[NB]; /* Space for holding data of apCopy[] */ 04936 u8 *aSpace1; /* Space for copies of dividers cells before balance */ 04937 u8 *aSpace2 = 0; /* Space for overflow dividers cells after balance */ 04938 u8 *aFrom = 0; 04939 04940 pPage = pCur->apPage[pCur->iPage]; 04941 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 04942 VVA_ONLY( pCur->pagesShuffled = 1 ); 04943 04944 /* 04945 ** Find the parent page. 04946 */ 04947 assert( pCur->iPage>0 ); 04948 assert( pPage->isInit ); 04949 assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 ); 04950 pBt = pPage->pBt; 04951 pParent = pCur->apPage[pCur->iPage-1]; 04952 assert( pParent ); 04953 if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){ 04954 return rc; 04955 } 04956 04957 TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno)); 04958 04959 #ifndef SQLITE_OMIT_QUICKBALANCE 04960 /* 04961 ** A special case: If a new entry has just been inserted into a 04962 ** table (that is, a btree with integer keys and all data at the leaves) 04963 ** and the new entry is the right-most entry in the tree (it has the 04964 ** largest key) then use the special balance_quick() routine for 04965 ** balancing. balance_quick() is much faster and results in a tighter 04966 ** packing of data in the common case. 04967 */ 04968 if( pPage->leaf && 04969 pPage->intKey && 04970 pPage->nOverflow==1 && 04971 pPage->aOvfl[0].idx==pPage->nCell && 04972 pParent->pgno!=1 && 04973 get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno 04974 ){ 04975 assert( pPage->intKey ); 04976 /* 04977 ** TODO: Check the siblings to the left of pPage. It may be that 04978 ** they are not full and no new page is required. 04979 */ 04980 return balance_quick(pCur); 04981 } 04982 #endif 04983 04984 if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){ 04985 return rc; 04986 } 04987 04988 /* 04989 ** Find the cell in the parent page whose left child points back 04990 ** to pPage. The "idx" variable is the index of that cell. If pPage 04991 ** is the rightmost child of pParent then set idx to pParent->nCell 04992 */ 04993 idx = pCur->aiIdx[pCur->iPage-1]; 04994 assertParentIndex(pParent, idx, pPage->pgno); 04995 04996 /* 04997 ** Initialize variables so that it will be safe to jump 04998 ** directly to balance_cleanup at any moment. 04999 */ 05000 nOld = nNew = 0; 05001 05002 /* 05003 ** Find sibling pages to pPage and the cells in pParent that divide 05004 ** the siblings. An attempt is made to find NN siblings on either 05005 ** side of pPage. More siblings are taken from one side, however, if 05006 ** pPage there are fewer than NN siblings on the other side. If pParent 05007 ** has NB or fewer children then all children of pParent are taken. 05008 */ 05009 nxDiv = idx - NN; 05010 if( nxDiv + NB > pParent->nCell ){ 05011 nxDiv = pParent->nCell - NB + 1; 05012 } 05013 if( nxDiv<0 ){ 05014 nxDiv = 0; 05015 } 05016 nDiv = 0; 05017 for(i=0, k=nxDiv; i<NB; i++, k++){ 05018 if( k<pParent->nCell ){ 05019 apDiv[i] = findCell(pParent, k); 05020 nDiv++; 05021 assert( !pParent->leaf ); 05022 pgnoOld[i] = get4byte(apDiv[i]); 05023 }else if( k==pParent->nCell ){ 05024 pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]); 05025 }else{ 05026 break; 05027 } 05028 rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i]); 05029 if( rc ) goto balance_cleanup; 05030 /* apOld[i]->idxParent = k; */ 05031 apCopy[i] = 0; 05032 assert( i==nOld ); 05033 nOld++; 05034 nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; 05035 } 05036 05037 /* Make nMaxCells a multiple of 4 in order to preserve 8-byte 05038 ** alignment */ 05039 nMaxCells = (nMaxCells + 3)&~3; 05040 05041 /* 05042 ** Allocate space for memory structures 05043 */ 05044 szScratch = 05045 nMaxCells*sizeof(u8*) /* apCell */ 05046 + nMaxCells*sizeof(u16) /* szCell */ 05047 + (ROUND8(sizeof(MemPage))+pBt->pageSize)*NB /* aCopy */ 05048 + pBt->pageSize /* aSpace1 */ 05049 + (ISAUTOVACUUM ? nMaxCells : 0); /* aFrom */ 05050 apCell = sqlite3ScratchMalloc( szScratch ); 05051 if( apCell==0 ){ 05052 rc = SQLITE_NOMEM; 05053 goto balance_cleanup; 05054 } 05055 szCell = (u16*)&apCell[nMaxCells]; 05056 aCopy[0] = (u8*)&szCell[nMaxCells]; 05057 assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */ 05058 for(i=1; i<NB; i++){ 05059 aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))]; 05060 assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */ 05061 } 05062 aSpace1 = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))]; 05063 assert( ((aSpace1 - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */ 05064 if( ISAUTOVACUUM ){ 05065 aFrom = &aSpace1[pBt->pageSize]; 05066 } 05067 aSpace2 = sqlite3PageMalloc(pBt->pageSize); 05068 if( aSpace2==0 ){ 05069 rc = SQLITE_NOMEM; 05070 goto balance_cleanup; 05071 } 05072 05073 /* 05074 ** Make copies of the content of pPage and its siblings into aOld[]. 05075 ** The rest of this function will use data from the copies rather 05076 ** that the original pages since the original pages will be in the 05077 ** process of being overwritten. 05078 */ 05079 for(i=0; i<nOld; i++){ 05080 MemPage *p = apCopy[i] = (MemPage*)aCopy[i]; 05081 memcpy(p, apOld[i], sizeof(MemPage)); 05082 p->aData = (void*)&p[1]; 05083 memcpy(p->aData, apOld[i]->aData, pBt->pageSize); 05084 } 05085 05086 /* 05087 ** Load pointers to all cells on sibling pages and the divider cells 05088 ** into the local apCell[] array. Make copies of the divider cells 05089 ** into space obtained form aSpace1[] and remove the the divider Cells 05090 ** from pParent. 05091 ** 05092 ** If the siblings are on leaf pages, then the child pointers of the 05093 ** divider cells are stripped from the cells before they are copied 05094 ** into aSpace1[]. In this way, all cells in apCell[] are without 05095 ** child pointers. If siblings are not leaves, then all cell in 05096 ** apCell[] include child pointers. Either way, all cells in apCell[] 05097 ** are alike. 05098 ** 05099 ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. 05100 ** leafData: 1 if pPage holds key+data and pParent holds only keys. 05101 */ 05102 nCell = 0; 05103 leafCorrection = pPage->leaf*4; 05104 leafData = pPage->hasData; 05105 for(i=0; i<nOld; i++){ 05106 MemPage *pOld = apCopy[i]; 05107 int limit = pOld->nCell+pOld->nOverflow; 05108 for(j=0; j<limit; j++){ 05109 assert( nCell<nMaxCells ); 05110 apCell[nCell] = findOverflowCell(pOld, j); 05111 szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); 05112 if( ISAUTOVACUUM ){ 05113 int a; 05114 aFrom[nCell] = i; 05115 for(a=0; a<pOld->nOverflow; a++){ 05116 if( pOld->aOvfl[a].pCell==apCell[nCell] ){ 05117 aFrom[nCell] = 0xFF; 05118 break; 05119 } 05120 } 05121 } 05122 nCell++; 05123 } 05124 if( i<nOld-1 ){ 05125 u16 sz = cellSizePtr(pParent, apDiv[i]); 05126 if( leafData ){ 05127 /* With the LEAFDATA flag, pParent cells hold only INTKEYs that 05128 ** are duplicates of keys on the child pages. We need to remove 05129 ** the divider cells from pParent, but the dividers cells are not 05130 ** added to apCell[] because they are duplicates of child cells. 05131 */ 05132 dropCell(pParent, nxDiv, sz); 05133 }else{ 05134 u8 *pTemp; 05135 assert( nCell<nMaxCells ); 05136 szCell[nCell] = sz; 05137 pTemp = &aSpace1[iSpace1]; 05138 iSpace1 += sz; 05139 assert( sz<=pBt->pageSize/4 ); 05140 assert( iSpace1<=pBt->pageSize ); 05141 memcpy(pTemp, apDiv[i], sz); 05142 apCell[nCell] = pTemp+leafCorrection; 05143 if( ISAUTOVACUUM ){ 05144 aFrom[nCell] = 0xFF; 05145 } 05146 dropCell(pParent, nxDiv, sz); 05147 szCell[nCell] -= leafCorrection; 05148 assert( get4byte(pTemp)==pgnoOld[i] ); 05149 if( !pOld->leaf ){ 05150 assert( leafCorrection==0 ); 05151 /* The right pointer of the child page pOld becomes the left 05152 ** pointer of the divider cell */ 05153 memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4); 05154 }else{ 05155 assert( leafCorrection==4 ); 05156 if( szCell[nCell]<4 ){ 05157 /* Do not allow any cells smaller than 4 bytes. */ 05158 szCell[nCell] = 4; 05159 } 05160 } 05161 nCell++; 05162 } 05163 } 05164 } 05165 05166 /* 05167 ** Figure out the number of pages needed to hold all nCell cells. 05168 ** Store this number in "k". Also compute szNew[] which is the total 05169 ** size of all cells on the i-th page and cntNew[] which is the index 05170 ** in apCell[] of the cell that divides page i from page i+1. 05171 ** cntNew[k] should equal nCell. 05172 ** 05173 ** Values computed by this block: 05174 ** 05175 ** k: The total number of sibling pages 05176 ** szNew[i]: Spaced used on the i-th sibling page. 05177 ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to 05178 ** the right of the i-th sibling page. 05179 ** usableSpace: Number of bytes of space available on each sibling. 05180 ** 05181 */ 05182 usableSpace = pBt->usableSize - 12 + leafCorrection; 05183 for(subtotal=k=i=0; i<nCell; i++){ 05184 assert( i<nMaxCells ); 05185 subtotal += szCell[i] + 2; 05186 if( subtotal > usableSpace ){ 05187 szNew[k] = subtotal - szCell[i]; 05188 cntNew[k] = i; 05189 if( leafData ){ i--; } 05190 subtotal = 0; 05191 k++; 05192 } 05193 } 05194 szNew[k] = subtotal; 05195 cntNew[k] = nCell; 05196 k++; 05197 05198 /* 05199 ** The packing computed by the previous block is biased toward the siblings 05200 ** on the left side. The left siblings are always nearly full, while the 05201 ** right-most sibling might be nearly empty. This block of code attempts 05202 ** to adjust the packing of siblings to get a better balance. 05203 ** 05204 ** This adjustment is more than an optimization. The packing above might 05205 ** be so out of balance as to be illegal. For example, the right-most 05206 ** sibling might be completely empty. This adjustment is not optional. 05207 */ 05208 for(i=k-1; i>0; i--){ 05209 int szRight = szNew[i]; /* Size of sibling on the right */ 05210 int szLeft = szNew[i-1]; /* Size of sibling on the left */ 05211 int r; /* Index of right-most cell in left sibling */ 05212 int d; /* Index of first cell to the left of right sibling */ 05213 05214 r = cntNew[i-1] - 1; 05215 d = r + 1 - leafData; 05216 assert( d<nMaxCells ); 05217 assert( r<nMaxCells ); 05218 while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){ 05219 szRight += szCell[d] + 2; 05220 szLeft -= szCell[r] + 2; 05221 cntNew[i-1]--; 05222 r = cntNew[i-1] - 1; 05223 d = r + 1 - leafData; 05224 } 05225 szNew[i] = szRight; 05226 szNew[i-1] = szLeft; 05227 } 05228 05229 /* Either we found one or more cells (cntnew[0])>0) or we are the 05230 ** a virtual root page. A virtual root page is when the real root 05231 ** page is page 1 and we are the only child of that page. 05232 */ 05233 assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) ); 05234 05235 /* 05236 ** Allocate k new pages. Reuse old pages where possible. 05237 */ 05238 assert( pPage->pgno>1 ); 05239 pageFlags = pPage->aData[0]; 05240 for(i=0; i<k; i++){ 05241 MemPage *pNew; 05242 if( i<nOld ){ 05243 pNew = apNew[i] = apOld[i]; 05244 pgnoNew[i] = pgnoOld[i]; 05245 apOld[i] = 0; 05246 rc = sqlite3PagerWrite(pNew->pDbPage); 05247 nNew++; 05248 if( rc ) goto balance_cleanup; 05249 }else{ 05250 assert( i>0 ); 05251 rc = allocateBtreePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0); 05252 if( rc ) goto balance_cleanup; 05253 apNew[i] = pNew; 05254 nNew++; 05255 } 05256 } 05257 05258 /* Free any old pages that were not reused as new pages. 05259 */ 05260 while( i<nOld ){ 05261 rc = freePage(apOld[i]); 05262 if( rc ) goto balance_cleanup; 05263 releasePage(apOld[i]); 05264 apOld[i] = 0; 05265 i++; 05266 } 05267 05268 /* 05269 ** Put the new pages in accending order. This helps to 05270 ** keep entries in the disk file in order so that a scan 05271 ** of the table is a linear scan through the file. That 05272 ** in turn helps the operating system to deliver pages 05273 ** from the disk more rapidly. 05274 ** 05275 ** An O(n^2) insertion sort algorithm is used, but since 05276 ** n is never more than NB (a small constant), that should 05277 ** not be a problem. 05278 ** 05279 ** When NB==3, this one optimization makes the database 05280 ** about 25% faster for large insertions and deletions. 05281 */ 05282 for(i=0; i<k-1; i++){ 05283 int minV = pgnoNew[i]; 05284 int minI = i; 05285 for(j=i+1; j<k; j++){ 05286 if( pgnoNew[j]<(unsigned)minV ){ 05287 minI = j; 05288 minV = pgnoNew[j]; 05289 } 05290 } 05291 if( minI>i ){ 05292 int t; 05293 MemPage *pT; 05294 t = pgnoNew[i]; 05295 pT = apNew[i]; 05296 pgnoNew[i] = pgnoNew[minI]; 05297 apNew[i] = apNew[minI]; 05298 pgnoNew[minI] = t; 05299 apNew[minI] = pT; 05300 } 05301 } 05302 TRACE(("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n", 05303 pgnoOld[0], 05304 nOld>=2 ? pgnoOld[1] : 0, 05305 nOld>=3 ? pgnoOld[2] : 0, 05306 pgnoNew[0], szNew[0], 05307 nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0, 05308 nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0, 05309 nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0, 05310 nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0)); 05311 05312 /* 05313 ** Evenly distribute the data in apCell[] across the new pages. 05314 ** Insert divider cells into pParent as necessary. 05315 */ 05316 j = 0; 05317 for(i=0; i<nNew; i++){ 05318 /* Assemble the new sibling page. */ 05319 MemPage *pNew = apNew[i]; 05320 assert( j<nMaxCells ); 05321 assert( pNew->pgno==pgnoNew[i] ); 05322 zeroPage(pNew, pageFlags); 05323 assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]); 05324 assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) ); 05325 assert( pNew->nOverflow==0 ); 05326 05327 /* If this is an auto-vacuum database, update the pointer map entries 05328 ** that point to the siblings that were rearranged. These can be: left 05329 ** children of cells, the right-child of the page, or overflow pages 05330 ** pointed to by cells. 05331 */ 05332 if( ISAUTOVACUUM ){ 05333 for(k=j; k<cntNew[i]; k++){ 05334 assert( k<nMaxCells ); 05335 if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){ 05336 rc = ptrmapPutOvfl(pNew, k-j); 05337 if( rc==SQLITE_OK && leafCorrection==0 ){ 05338 rc = ptrmapPut(pBt, get4byte(apCell[k]), PTRMAP_BTREE, pNew->pgno); 05339 } 05340 if( rc!=SQLITE_OK ){ 05341 goto balance_cleanup; 05342 } 05343 } 05344 } 05345 } 05346 05347 j = cntNew[i]; 05348 05349 /* If the sibling page assembled above was not the right-most sibling, 05350 ** insert a divider cell into the parent page. 05351 */ 05352 if( i<nNew-1 && j<nCell ){ 05353 u8 *pCell; 05354 u8 *pTemp; 05355 int sz; 05356 05357 assert( j<nMaxCells ); 05358 pCell = apCell[j]; 05359 sz = szCell[j] + leafCorrection; 05360 pTemp = &aSpace2[iSpace2]; 05361 if( !pNew->leaf ){ 05362 memcpy(&pNew->aData[8], pCell, 4); 05363 if( ISAUTOVACUUM 05364 && (aFrom[j]==0xFF || apCopy[aFrom[j]]->pgno!=pNew->pgno) 05365 ){ 05366 rc = ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno); 05367 if( rc!=SQLITE_OK ){ 05368 goto balance_cleanup; 05369 } 05370 } 05371 }else if( leafData ){ 05372 /* If the tree is a leaf-data tree, and the siblings are leaves, 05373 ** then there is no divider cell in apCell[]. Instead, the divider 05374 ** cell consists of the integer key for the right-most cell of 05375 ** the sibling-page assembled above only. 05376 */ 05377 CellInfo info; 05378 j--; 05379 sqlite3BtreeParseCellPtr(pNew, apCell[j], &info); 05380 pCell = pTemp; 05381 fillInCell(pParent, pCell, 0, info.nKey, 0, 0, 0, &sz); 05382 pTemp = 0; 05383 }else{ 05384 pCell -= 4; 05385 /* Obscure case for non-leaf-data trees: If the cell at pCell was 05386 ** previously stored on a leaf node, and its reported size was 4 05387 ** bytes, then it may actually be smaller than this 05388 ** (see sqlite3BtreeParseCellPtr(), 4 bytes is the minimum size of 05389 ** any cell). But it is important to pass the correct size to 05390 ** insertCell(), so reparse the cell now. 05391 ** 05392 ** Note that this can never happen in an SQLite data file, as all 05393 ** cells are at least 4 bytes. It only happens in b-trees used 05394 ** to evaluate "IN (SELECT ...)" and similar clauses. 05395 */ 05396 if( szCell[j]==4 ){ 05397 assert(leafCorrection==4); 05398 sz = cellSizePtr(pParent, pCell); 05399 } 05400 } 05401 iSpace2 += sz; 05402 assert( sz<=pBt->pageSize/4 ); 05403 assert( iSpace2<=pBt->pageSize ); 05404 rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4); 05405 if( rc!=SQLITE_OK ) goto balance_cleanup; 05406 put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno); 05407 05408 /* If this is an auto-vacuum database, and not a leaf-data tree, 05409 ** then update the pointer map with an entry for the overflow page 05410 ** that the cell just inserted points to (if any). 05411 */ 05412 if( ISAUTOVACUUM && !leafData ){ 05413 rc = ptrmapPutOvfl(pParent, nxDiv); 05414 if( rc!=SQLITE_OK ){ 05415 goto balance_cleanup; 05416 } 05417 } 05418 j++; 05419 nxDiv++; 05420 } 05421 05422 /* Set the pointer-map entry for the new sibling page. */ 05423 if( ISAUTOVACUUM ){ 05424 rc = ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno); 05425 if( rc!=SQLITE_OK ){ 05426 goto balance_cleanup; 05427 } 05428 } 05429 } 05430 assert( j==nCell ); 05431 assert( nOld>0 ); 05432 assert( nNew>0 ); 05433 if( (pageFlags & PTF_LEAF)==0 ){ 05434 u8 *zChild = &apCopy[nOld-1]->aData[8]; 05435 memcpy(&apNew[nNew-1]->aData[8], zChild, 4); 05436 if( ISAUTOVACUUM ){ 05437 rc = ptrmapPut(pBt, get4byte(zChild), PTRMAP_BTREE, apNew[nNew-1]->pgno); 05438 if( rc!=SQLITE_OK ){ 05439 goto balance_cleanup; 05440 } 05441 } 05442 } 05443 if( nxDiv==pParent->nCell+pParent->nOverflow ){ 05444 /* Right-most sibling is the right-most child of pParent */ 05445 put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]); 05446 }else{ 05447 /* Right-most sibling is the left child of the first entry in pParent 05448 ** past the right-most divider entry */ 05449 put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]); 05450 } 05451 05452 /* 05453 ** Balance the parent page. Note that the current page (pPage) might 05454 ** have been added to the freelist so it might no longer be initialized. 05455 ** But the parent page will always be initialized. 05456 */ 05457 assert( pParent->isInit ); 05458 sqlite3ScratchFree(apCell); 05459 apCell = 0; 05460 releasePage(pPage); 05461 pCur->iPage--; 05462 rc = balance(pCur, 0); 05463 05464 /* 05465 ** Cleanup before returning. 05466 */ 05467 balance_cleanup: 05468 sqlite3PageFree(aSpace2); 05469 sqlite3ScratchFree(apCell); 05470 for(i=0; i<nOld; i++){ 05471 releasePage(apOld[i]); 05472 } 05473 for(i=0; i<nNew; i++){ 05474 releasePage(apNew[i]); 05475 } 05476 05477 /* releasePage(pParent); */ 05478 TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n", 05479 pPage->pgno, nOld, nNew, nCell)); 05480 05481 return rc; 05482 } 05483 05484 /* 05485 ** This routine is called for the root page of a btree when the root 05486 ** page contains no cells. This is an opportunity to make the tree 05487 ** shallower by one level. 05488 */ 05489 static int balance_shallower(BtCursor *pCur){ 05490 MemPage *pPage; /* Root page of B-Tree */ 05491 MemPage *pChild; /* The only child page of pPage */ 05492 Pgno pgnoChild; /* Page number for pChild */ 05493 int rc = SQLITE_OK; /* Return code from subprocedures */ 05494 BtShared *pBt; /* The main BTree structure */ 05495 int mxCellPerPage; /* Maximum number of cells per page */ 05496 u8 **apCell; /* All cells from pages being balanced */ 05497 u16 *szCell; /* Local size of all cells */ 05498 05499 assert( pCur->iPage==0 ); 05500 pPage = pCur->apPage[0]; 05501 05502 assert( pPage->nCell==0 ); 05503 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 05504 pBt = pPage->pBt; 05505 mxCellPerPage = MX_CELL(pBt); 05506 apCell = sqlite3Malloc( mxCellPerPage*(sizeof(u8*)+sizeof(u16)) ); 05507 if( apCell==0 ) return SQLITE_NOMEM; 05508 szCell = (u16*)&apCell[mxCellPerPage]; 05509 if( pPage->leaf ){ 05510 /* The table is completely empty */ 05511 TRACE(("BALANCE: empty table %d\n", pPage->pgno)); 05512 }else{ 05513 /* The root page is empty but has one child. Transfer the 05514 ** information from that one child into the root page if it 05515 ** will fit. This reduces the depth of the tree by one. 05516 ** 05517 ** If the root page is page 1, it has less space available than 05518 ** its child (due to the 100 byte header that occurs at the beginning 05519 ** of the database fle), so it might not be able to hold all of the 05520 ** information currently contained in the child. If this is the 05521 ** case, then do not do the transfer. Leave page 1 empty except 05522 ** for the right-pointer to the child page. The child page becomes 05523 ** the virtual root of the tree. 05524 */ 05525 VVA_ONLY( pCur->pagesShuffled = 1 ); 05526 pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]); 05527 assert( pgnoChild>0 ); 05528 assert( pgnoChild<=pagerPagecount(pPage->pBt->pPager) ); 05529 rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0); 05530 if( rc ) goto end_shallow_balance; 05531 if( pPage->pgno==1 ){ 05532 rc = sqlite3BtreeInitPage(pChild); 05533 if( rc ) goto end_shallow_balance; 05534 assert( pChild->nOverflow==0 ); 05535 if( pChild->nFree>=100 ){ 05536 /* The child information will fit on the root page, so do the 05537 ** copy */ 05538 int i; 05539 zeroPage(pPage, pChild->aData[0]); 05540 for(i=0; i<pChild->nCell; i++){ 05541 apCell[i] = findCell(pChild,i); 05542 szCell[i] = cellSizePtr(pChild, apCell[i]); 05543 } 05544 assemblePage(pPage, pChild->nCell, apCell, szCell); 05545 /* Copy the right-pointer of the child to the parent. */ 05546 put4byte(&pPage->aData[pPage->hdrOffset+8], 05547 get4byte(&pChild->aData[pChild->hdrOffset+8])); 05548 freePage(pChild); 05549 TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno)); 05550 }else{ 05551 /* The child has more information that will fit on the root. 05552 ** The tree is already balanced. Do nothing. */ 05553 TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno)); 05554 } 05555 }else{ 05556 memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize); 05557 pPage->isInit = 0; 05558 rc = sqlite3BtreeInitPage(pPage); 05559 assert( rc==SQLITE_OK ); 05560 freePage(pChild); 05561 TRACE(("BALANCE: transfer child %d into root %d\n", 05562 pChild->pgno, pPage->pgno)); 05563 } 05564 assert( pPage->nOverflow==0 ); 05565 #ifndef SQLITE_OMIT_AUTOVACUUM 05566 if( ISAUTOVACUUM ){ 05567 rc = setChildPtrmaps(pPage); 05568 } 05569 #endif 05570 releasePage(pChild); 05571 } 05572 end_shallow_balance: 05573 sqlite3_free(apCell); 05574 return rc; 05575 } 05576 05577 05578 /* 05579 ** The root page is overfull 05580 ** 05581 ** When this happens, Create a new child page and copy the 05582 ** contents of the root into the child. Then make the root 05583 ** page an empty page with rightChild pointing to the new 05584 ** child. Finally, call balance_internal() on the new child 05585 ** to cause it to split. 05586 */ 05587 static int balance_deeper(BtCursor *pCur){ 05588 int rc; /* Return value from subprocedures */ 05589 MemPage *pPage; /* Pointer to the root page */ 05590 MemPage *pChild; /* Pointer to a new child page */ 05591 Pgno pgnoChild; /* Page number of the new child page */ 05592 BtShared *pBt; /* The BTree */ 05593 int usableSize; /* Total usable size of a page */ 05594 u8 *data; /* Content of the parent page */ 05595 u8 *cdata; /* Content of the child page */ 05596 int hdr; /* Offset to page header in parent */ 05597 int cbrk; /* Offset to content of first cell in parent */ 05598 05599 assert( pCur->iPage==0 ); 05600 assert( pCur->apPage[0]->nOverflow>0 ); 05601 05602 VVA_ONLY( pCur->pagesShuffled = 1 ); 05603 pPage = pCur->apPage[0]; 05604 pBt = pPage->pBt; 05605 assert( sqlite3_mutex_held(pBt->mutex) ); 05606 rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0); 05607 if( rc ) return rc; 05608 assert( sqlite3PagerIswriteable(pChild->pDbPage) ); 05609 usableSize = pBt->usableSize; 05610 data = pPage->aData; 05611 hdr = pPage->hdrOffset; 05612 cbrk = get2byte(&data[hdr+5]); 05613 cdata = pChild->aData; 05614 memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr); 05615 memcpy(&cdata[cbrk], &data[cbrk], usableSize-cbrk); 05616 05617 rc = sqlite3BtreeInitPage(pChild); 05618 if( rc==SQLITE_OK ){ 05619 int nCopy = pPage->nOverflow*sizeof(pPage->aOvfl[0]); 05620 memcpy(pChild->aOvfl, pPage->aOvfl, nCopy); 05621 pChild->nOverflow = pPage->nOverflow; 05622 if( pChild->nOverflow ){ 05623 pChild->nFree = 0; 05624 } 05625 assert( pChild->nCell==pPage->nCell ); 05626 zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF); 05627 put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild); 05628 TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno)); 05629 if( ISAUTOVACUUM ){ 05630 rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno); 05631 #ifndef SQLITE_OMIT_AUTOVACUUM 05632 if( rc==SQLITE_OK ){ 05633 rc = setChildPtrmaps(pChild); 05634 } 05635 #endif 05636 } 05637 } 05638 05639 if( rc==SQLITE_OK ){ 05640 pCur->iPage++; 05641 pCur->apPage[1] = pChild; 05642 pCur->aiIdx[0] = 0; 05643 rc = balance_nonroot(pCur); 05644 }else{ 05645 releasePage(pChild); 05646 } 05647 05648 return rc; 05649 } 05650 05651 /* 05652 ** The page that pCur currently points to has just been modified in 05653 ** some way. This function figures out if this modification means the 05654 ** tree needs to be balanced, and if so calls the appropriate balancing 05655 ** routine. 05656 ** 05657 ** Parameter isInsert is true if a new cell was just inserted into the 05658 ** page, or false otherwise. 05659 */ 05660 static int balance(BtCursor *pCur, int isInsert){ 05661 int rc = SQLITE_OK; 05662 MemPage *pPage = pCur->apPage[pCur->iPage]; 05663 05664 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 05665 if( pCur->iPage==0 ){ 05666 rc = sqlite3PagerWrite(pPage->pDbPage); 05667 if( rc==SQLITE_OK && pPage->nOverflow>0 ){ 05668 rc = balance_deeper(pCur); 05669 } 05670 if( rc==SQLITE_OK && pPage->nCell==0 ){ 05671 rc = balance_shallower(pCur); 05672 } 05673 }else{ 05674 if( pPage->nOverflow>0 || 05675 (!isInsert && pPage->nFree>pPage->pBt->usableSize*2/3) ){ 05676 rc = balance_nonroot(pCur); 05677 } 05678 } 05679 return rc; 05680 } 05681 05682 /* 05683 ** This routine checks all cursors that point to table pgnoRoot. 05684 ** If any of those cursors were opened with wrFlag==0 in a different 05685 ** database connection (a database connection that shares the pager 05686 ** cache with the current connection) and that other connection 05687 ** is not in the ReadUncommmitted state, then this routine returns 05688 ** SQLITE_LOCKED. 05689 ** 05690 ** As well as cursors with wrFlag==0, cursors with wrFlag==1 and 05691 ** isIncrblobHandle==1 are also considered 'read' cursors. Incremental 05692 ** blob cursors are used for both reading and writing. 05693 ** 05694 ** When pgnoRoot is the root page of an intkey table, this function is also 05695 ** responsible for invalidating incremental blob cursors when the table row 05696 ** on which they are opened is deleted or modified. Cursors are invalidated 05697 ** according to the following rules: 05698 ** 05699 ** 1) When BtreeClearTable() is called to completely delete the contents 05700 ** of a B-Tree table, pExclude is set to zero and parameter iRow is 05701 ** set to non-zero. In this case all incremental blob cursors open 05702 ** on the table rooted at pgnoRoot are invalidated. 05703 ** 05704 ** 2) When BtreeInsert(), BtreeDelete() or BtreePutData() is called to 05705 ** modify a table row via an SQL statement, pExclude is set to the 05706 ** write cursor used to do the modification and parameter iRow is set 05707 ** to the integer row id of the B-Tree entry being modified. Unless 05708 ** pExclude is itself an incremental blob cursor, then all incremental 05709 ** blob cursors open on row iRow of the B-Tree are invalidated. 05710 ** 05711 ** 3) If both pExclude and iRow are set to zero, no incremental blob 05712 ** cursors are invalidated. 05713 */ 05714 static int checkReadLocks( 05715 Btree *pBtree, 05716 Pgno pgnoRoot, 05717 BtCursor *pExclude, 05718 i64 iRow 05719 ){ 05720 BtCursor *p; 05721 BtShared *pBt = pBtree->pBt; 05722 sqlite3 *db = pBtree->db; 05723 assert( sqlite3BtreeHoldsMutex(pBtree) ); 05724 for(p=pBt->pCursor; p; p=p->pNext){ 05725 if( p==pExclude ) continue; 05726 if( p->pgnoRoot!=pgnoRoot ) continue; 05727 #ifndef SQLITE_OMIT_INCRBLOB 05728 if( p->isIncrblobHandle && ( 05729 (!pExclude && iRow) 05730 || (pExclude && !pExclude->isIncrblobHandle && p->info.nKey==iRow) 05731 )){ 05732 p->eState = CURSOR_INVALID; 05733 } 05734 #endif 05735 if( p->eState!=CURSOR_VALID ) continue; 05736 if( p->wrFlag==0 05737 #ifndef SQLITE_OMIT_INCRBLOB 05738 || p->isIncrblobHandle 05739 #endif 05740 ){ 05741 sqlite3 *dbOther = p->pBtree->db; 05742 if( dbOther==0 || 05743 (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){ 05744 return SQLITE_LOCKED; 05745 } 05746 } 05747 } 05748 return SQLITE_OK; 05749 } 05750 05751 /* 05752 ** Insert a new record into the BTree. The key is given by (pKey,nKey) 05753 ** and the data is given by (pData,nData). The cursor is used only to 05754 ** define what table the record should be inserted into. The cursor 05755 ** is left pointing at a random location. 05756 ** 05757 ** For an INTKEY table, only the nKey value of the key is used. pKey is 05758 ** ignored. For a ZERODATA table, the pData and nData are both ignored. 05759 */ 05760 int sqlite3BtreeInsert( 05761 BtCursor *pCur, /* Insert data into the table of this cursor */ 05762 const void *pKey, i64 nKey, /* The key of the new record */ 05763 const void *pData, int nData, /* The data of the new record */ 05764 int nZero, /* Number of extra 0 bytes to append to data */ 05765 int appendBias /* True if this is likely an append */ 05766 ){ 05767 int rc; 05768 int loc; 05769 int szNew; 05770 int idx; 05771 MemPage *pPage; 05772 Btree *p = pCur->pBtree; 05773 BtShared *pBt = p->pBt; 05774 unsigned char *oldCell; 05775 unsigned char *newCell = 0; 05776 05777 assert( cursorHoldsMutex(pCur) ); 05778 if( pBt->inTransaction!=TRANS_WRITE ){ 05779 /* Must start a transaction before doing an insert */ 05780 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 05781 return rc; 05782 } 05783 assert( !pBt->readOnly ); 05784 if( !pCur->wrFlag ){ 05785 return SQLITE_PERM; /* Cursor not open for writing */ 05786 } 05787 if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur, nKey) ){ 05788 return SQLITE_LOCKED; /* The table pCur points to has a read lock */ 05789 } 05790 if( pCur->eState==CURSOR_FAULT ){ 05791 return pCur->skip; 05792 } 05793 05794 /* Save the positions of any other cursors open on this table */ 05795 sqlite3BtreeClearCursor(pCur); 05796 if( 05797 SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) || 05798 SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, appendBias, &loc)) 05799 ){ 05800 return rc; 05801 } 05802 05803 pPage = pCur->apPage[pCur->iPage]; 05804 assert( pPage->intKey || nKey>=0 ); 05805 assert( pPage->leaf || !pPage->intKey ); 05806 TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", 05807 pCur->pgnoRoot, nKey, nData, pPage->pgno, 05808 loc==0 ? "overwrite" : "new entry")); 05809 assert( pPage->isInit ); 05810 allocateTempSpace(pBt); 05811 newCell = pBt->pTmpSpace; 05812 if( newCell==0 ) return SQLITE_NOMEM; 05813 rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); 05814 if( rc ) goto end_insert; 05815 assert( szNew==cellSizePtr(pPage, newCell) ); 05816 assert( szNew<=MX_CELL_SIZE(pBt) ); 05817 idx = pCur->aiIdx[pCur->iPage]; 05818 if( loc==0 && CURSOR_VALID==pCur->eState ){ 05819 u16 szOld; 05820 assert( idx<pPage->nCell ); 05821 rc = sqlite3PagerWrite(pPage->pDbPage); 05822 if( rc ){ 05823 goto end_insert; 05824 } 05825 oldCell = findCell(pPage, idx); 05826 if( !pPage->leaf ){ 05827 memcpy(newCell, oldCell, 4); 05828 } 05829 szOld = cellSizePtr(pPage, oldCell); 05830 rc = clearCell(pPage, oldCell); 05831 if( rc ) goto end_insert; 05832 rc = dropCell(pPage, idx, szOld); 05833 if( rc!=SQLITE_OK ) { 05834 goto end_insert; 05835 } 05836 }else if( loc<0 && pPage->nCell>0 ){ 05837 assert( pPage->leaf ); 05838 idx = ++pCur->aiIdx[pCur->iPage]; 05839 pCur->info.nSize = 0; 05840 pCur->validNKey = 0; 05841 }else{ 05842 assert( pPage->leaf ); 05843 } 05844 rc = insertCell(pPage, idx, newCell, szNew, 0, 0); 05845 if( rc!=SQLITE_OK ) goto end_insert; 05846 rc = balance(pCur, 1); 05847 if( rc==SQLITE_OK ){ 05848 moveToRoot(pCur); 05849 } 05850 end_insert: 05851 return rc; 05852 } 05853 05854 /* 05855 ** Delete the entry that the cursor is pointing to. The cursor 05856 ** is left pointing at a arbitrary location. 05857 */ 05858 int sqlite3BtreeDelete(BtCursor *pCur){ 05859 MemPage *pPage = pCur->apPage[pCur->iPage]; 05860 int idx; 05861 unsigned char *pCell; 05862 int rc; 05863 Pgno pgnoChild = 0; 05864 Btree *p = pCur->pBtree; 05865 BtShared *pBt = p->pBt; 05866 05867 assert( cursorHoldsMutex(pCur) ); 05868 assert( pPage->isInit ); 05869 if( pBt->inTransaction!=TRANS_WRITE ){ 05870 /* Must start a transaction before doing a delete */ 05871 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 05872 return rc; 05873 } 05874 assert( !pBt->readOnly ); 05875 if( pCur->eState==CURSOR_FAULT ){ 05876 return pCur->skip; 05877 } 05878 if( pCur->aiIdx[pCur->iPage]>=pPage->nCell ){ 05879 return SQLITE_ERROR; /* The cursor is not pointing to anything */ 05880 } 05881 if( !pCur->wrFlag ){ 05882 return SQLITE_PERM; /* Did not open this cursor for writing */ 05883 } 05884 if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur, pCur->info.nKey) ){ 05885 return SQLITE_LOCKED; /* The table pCur points to has a read lock */ 05886 } 05887 05888 /* Restore the current cursor position (a no-op if the cursor is not in 05889 ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors 05890 ** open on the same table. Then call sqlite3PagerWrite() on the page 05891 ** that the entry will be deleted from. 05892 */ 05893 if( 05894 (rc = restoreCursorPosition(pCur))!=0 || 05895 (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 || 05896 (rc = sqlite3PagerWrite(pPage->pDbPage))!=0 05897 ){ 05898 return rc; 05899 } 05900 05901 /* Locate the cell within its page and leave pCell pointing to the 05902 ** data. The clearCell() call frees any overflow pages associated with the 05903 ** cell. The cell itself is still intact. 05904 */ 05905 idx = pCur->aiIdx[pCur->iPage]; 05906 pCell = findCell(pPage, idx); 05907 if( !pPage->leaf ){ 05908 pgnoChild = get4byte(pCell); 05909 } 05910 rc = clearCell(pPage, pCell); 05911 if( rc ){ 05912 return rc; 05913 } 05914 05915 if( !pPage->leaf ){ 05916 /* 05917 ** The entry we are about to delete is not a leaf so if we do not 05918 ** do something we will leave a hole on an internal page. 05919 ** We have to fill the hole by moving in a cell from a leaf. The 05920 ** next Cell after the one to be deleted is guaranteed to exist and 05921 ** to be a leaf so we can use it. 05922 */ 05923 BtCursor leafCur; 05924 MemPage *pLeafPage; 05925 05926 unsigned char *pNext; 05927 int notUsed; 05928 unsigned char *tempCell = 0; 05929 assert( !pPage->intKey ); 05930 sqlite3BtreeGetTempCursor(pCur, &leafCur); 05931 rc = sqlite3BtreeNext(&leafCur, ¬Used); 05932 if( rc==SQLITE_OK ){ 05933 assert( leafCur.aiIdx[leafCur.iPage]==0 ); 05934 pLeafPage = leafCur.apPage[leafCur.iPage]; 05935 rc = sqlite3PagerWrite(pLeafPage->pDbPage); 05936 } 05937 if( rc==SQLITE_OK ){ 05938 int leafCursorInvalid = 0; 05939 u16 szNext; 05940 TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n", 05941 pCur->pgnoRoot, pPage->pgno, pLeafPage->pgno)); 05942 dropCell(pPage, idx, cellSizePtr(pPage, pCell)); 05943 pNext = findCell(pLeafPage, 0); 05944 szNext = cellSizePtr(pLeafPage, pNext); 05945 assert( MX_CELL_SIZE(pBt)>=szNext+4 ); 05946 allocateTempSpace(pBt); 05947 tempCell = pBt->pTmpSpace; 05948 if( tempCell==0 ){ 05949 rc = SQLITE_NOMEM; 05950 } 05951 if( rc==SQLITE_OK ){ 05952 rc = insertCell(pPage, idx, pNext-4, szNext+4, tempCell, 0); 05953 } 05954 05955 05956 /* The "if" statement in the next code block is critical. The 05957 ** slightest error in that statement would allow SQLite to operate 05958 ** correctly most of the time but produce very rare failures. To 05959 ** guard against this, the following macros help to verify that 05960 ** the "if" statement is well tested. 05961 */ 05962 testcase( pPage->nOverflow==0 && pPage->nFree<pBt->usableSize*2/3 05963 && pLeafPage->nFree+2+szNext > pBt->usableSize*2/3 ); 05964 testcase( pPage->nOverflow==0 && pPage->nFree==pBt->usableSize*2/3 05965 && pLeafPage->nFree+2+szNext > pBt->usableSize*2/3 ); 05966 testcase( pPage->nOverflow==0 && pPage->nFree==pBt->usableSize*2/3+1 05967 && pLeafPage->nFree+2+szNext > pBt->usableSize*2/3 ); 05968 testcase( pPage->nOverflow>0 && pPage->nFree<=pBt->usableSize*2/3 05969 && pLeafPage->nFree+2+szNext > pBt->usableSize*2/3 ); 05970 testcase( (pPage->nOverflow>0 || (pPage->nFree > pBt->usableSize*2/3)) 05971 && pLeafPage->nFree+2+szNext == pBt->usableSize*2/3 ); 05972 05973 05974 if( (pPage->nOverflow>0 || (pPage->nFree > pBt->usableSize*2/3)) && 05975 (pLeafPage->nFree+2+szNext > pBt->usableSize*2/3) 05976 ){ 05977 /* This branch is taken if the internal node is now either overflowing 05978 ** or underfull and the leaf node will be underfull after the just cell 05979 ** copied to the internal node is deleted from it. This is a special 05980 ** case because the call to balance() to correct the internal node 05981 ** may change the tree structure and invalidate the contents of 05982 ** the leafCur.apPage[] and leafCur.aiIdx[] arrays, which will be 05983 ** used by the balance() required to correct the underfull leaf 05984 ** node. 05985 ** 05986 ** The formula used in the expression above are based on facets of 05987 ** the SQLite file-format that do not change over time. 05988 */ 05989 testcase( pPage->nFree==pBt->usableSize*2/3+1 ); 05990 testcase( pLeafPage->nFree+2+szNext==pBt->usableSize*2/3+1 ); 05991 leafCursorInvalid = 1; 05992 } 05993 05994 if( rc==SQLITE_OK ){ 05995 put4byte(findOverflowCell(pPage, idx), pgnoChild); 05996 VVA_ONLY( pCur->pagesShuffled = 0 ); 05997 rc = balance(pCur, 0); 05998 } 05999 06000 if( rc==SQLITE_OK && leafCursorInvalid ){ 06001 /* The leaf-node is now underfull and so the tree needs to be 06002 ** rebalanced. However, the balance() operation on the internal 06003 ** node above may have modified the structure of the B-Tree and 06004 ** so the current contents of leafCur.apPage[] and leafCur.aiIdx[] 06005 ** may not be trusted. 06006 ** 06007 ** It is not possible to copy the ancestry from pCur, as the same 06008 ** balance() call has invalidated the pCur->apPage[] and aiIdx[] 06009 ** arrays. 06010 ** 06011 ** The call to saveCursorPosition() below internally saves the 06012 ** key that leafCur is currently pointing to. Currently, there 06013 ** are two copies of that key in the tree - one here on the leaf 06014 ** page and one on some internal node in the tree. The copy on 06015 ** the leaf node is always the next key in tree-order after the 06016 ** copy on the internal node. So, the call to sqlite3BtreeNext() 06017 ** calls restoreCursorPosition() to point the cursor to the copy 06018 ** stored on the internal node, then advances to the next entry, 06019 ** which happens to be the copy of the key on the internal node. 06020 ** Net effect: leafCur is pointing back to the duplicate cell 06021 ** that needs to be removed, and the leafCur.apPage[] and 06022 ** leafCur.aiIdx[] arrays are correct. 06023 */ 06024 VVA_ONLY( Pgno leafPgno = pLeafPage->pgno ); 06025 rc = saveCursorPosition(&leafCur); 06026 if( rc==SQLITE_OK ){ 06027 rc = sqlite3BtreeNext(&leafCur, ¬Used); 06028 } 06029 pLeafPage = leafCur.apPage[leafCur.iPage]; 06030 assert( pLeafPage->pgno==leafPgno ); 06031 assert( leafCur.aiIdx[leafCur.iPage]==0 ); 06032 } 06033 06034 if( rc==SQLITE_OK ){ 06035 dropCell(pLeafPage, 0, szNext); 06036 VVA_ONLY( leafCur.pagesShuffled = 0 ); 06037 rc = balance(&leafCur, 0); 06038 assert( leafCursorInvalid || !leafCur.pagesShuffled 06039 || !pCur->pagesShuffled ); 06040 } 06041 } 06042 sqlite3BtreeReleaseTempCursor(&leafCur); 06043 }else{ 06044 TRACE(("DELETE: table=%d delete from leaf %d\n", 06045 pCur->pgnoRoot, pPage->pgno)); 06046 dropCell(pPage, idx, cellSizePtr(pPage, pCell)); 06047 rc = balance(pCur, 0); 06048 } 06049 if( rc==SQLITE_OK ){ 06050 moveToRoot(pCur); 06051 } 06052 return rc; 06053 } 06054 06055 /* 06056 ** Create a new BTree table. Write into *piTable the page 06057 ** number for the root page of the new table. 06058 ** 06059 ** The type of type is determined by the flags parameter. Only the 06060 ** following values of flags are currently in use. Other values for 06061 ** flags might not work: 06062 ** 06063 ** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys 06064 ** BTREE_ZERODATA Used for SQL indices 06065 */ 06066 static int btreeCreateTable(Btree *p, int *piTable, int flags){ 06067 BtShared *pBt = p->pBt; 06068 MemPage *pRoot; 06069 Pgno pgnoRoot; 06070 int rc; 06071 06072 assert( sqlite3BtreeHoldsMutex(p) ); 06073 if( pBt->inTransaction!=TRANS_WRITE ){ 06074 /* Must start a transaction first */ 06075 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 06076 return rc; 06077 } 06078 assert( !pBt->readOnly ); 06079 06080 #ifdef SQLITE_OMIT_AUTOVACUUM 06081 rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); 06082 if( rc ){ 06083 return rc; 06084 } 06085 #else 06086 if( pBt->autoVacuum ){ 06087 Pgno pgnoMove; /* Move a page here to make room for the root-page */ 06088 MemPage *pPageMove; /* The page to move to. */ 06089 06090 /* Creating a new table may probably require moving an existing database 06091 ** to make room for the new tables root page. In case this page turns 06092 ** out to be an overflow page, delete all overflow page-map caches 06093 ** held by open cursors. 06094 */ 06095 invalidateAllOverflowCache(pBt); 06096 06097 /* Read the value of meta[3] from the database to determine where the 06098 ** root page of the new table should go. meta[3] is the largest root-page 06099 ** created so far, so the new root-page is (meta[3]+1). 06100 */ 06101 rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot); 06102 if( rc!=SQLITE_OK ){ 06103 return rc; 06104 } 06105 pgnoRoot++; 06106 06107 /* The new root-page may not be allocated on a pointer-map page, or the 06108 ** PENDING_BYTE page. 06109 */ 06110 while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || 06111 pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ 06112 pgnoRoot++; 06113 } 06114 assert( pgnoRoot>=3 ); 06115 06116 /* Allocate a page. The page that currently resides at pgnoRoot will 06117 ** be moved to the allocated page (unless the allocated page happens 06118 ** to reside at pgnoRoot). 06119 */ 06120 rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1); 06121 if( rc!=SQLITE_OK ){ 06122 return rc; 06123 } 06124 06125 if( pgnoMove!=pgnoRoot ){ 06126 /* pgnoRoot is the page that will be used for the root-page of 06127 ** the new table (assuming an error did not occur). But we were 06128 ** allocated pgnoMove. If required (i.e. if it was not allocated 06129 ** by extending the file), the current page at position pgnoMove 06130 ** is already journaled. 06131 */ 06132 u8 eType; 06133 Pgno iPtrPage; 06134 06135 releasePage(pPageMove); 06136 06137 /* Move the page currently at pgnoRoot to pgnoMove. */ 06138 rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0); 06139 if( rc!=SQLITE_OK ){ 06140 return rc; 06141 } 06142 rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); 06143 if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ 06144 releasePage(pRoot); 06145 return rc; 06146 } 06147 assert( eType!=PTRMAP_ROOTPAGE ); 06148 assert( eType!=PTRMAP_FREEPAGE ); 06149 rc = sqlite3PagerWrite(pRoot->pDbPage); 06150 if( rc!=SQLITE_OK ){ 06151 releasePage(pRoot); 06152 return rc; 06153 } 06154 rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0); 06155 releasePage(pRoot); 06156 06157 /* Obtain the page at pgnoRoot */ 06158 if( rc!=SQLITE_OK ){ 06159 return rc; 06160 } 06161 rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0); 06162 if( rc!=SQLITE_OK ){ 06163 return rc; 06164 } 06165 rc = sqlite3PagerWrite(pRoot->pDbPage); 06166 if( rc!=SQLITE_OK ){ 06167 releasePage(pRoot); 06168 return rc; 06169 } 06170 }else{ 06171 pRoot = pPageMove; 06172 } 06173 06174 /* Update the pointer-map and meta-data with the new root-page number. */ 06175 rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0); 06176 if( rc ){ 06177 releasePage(pRoot); 06178 return rc; 06179 } 06180 rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot); 06181 if( rc ){ 06182 releasePage(pRoot); 06183 return rc; 06184 } 06185 06186 }else{ 06187 rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); 06188 if( rc ) return rc; 06189 } 06190 #endif 06191 assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); 06192 zeroPage(pRoot, flags | PTF_LEAF); 06193 sqlite3PagerUnref(pRoot->pDbPage); 06194 *piTable = (int)pgnoRoot; 06195 return SQLITE_OK; 06196 } 06197 int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){ 06198 int rc; 06199 sqlite3BtreeEnter(p); 06200 p->pBt->db = p->db; 06201 rc = btreeCreateTable(p, piTable, flags); 06202 sqlite3BtreeLeave(p); 06203 return rc; 06204 } 06205 06206 /* 06207 ** Erase the given database page and all its children. Return 06208 ** the page to the freelist. 06209 */ 06210 static int clearDatabasePage( 06211 BtShared *pBt, /* The BTree that contains the table */ 06212 Pgno pgno, /* Page number to clear */ 06213 MemPage *pParent, /* Parent page. NULL for the root */ 06214 int freePageFlag, /* Deallocate page if true */ 06215 int *pnChange 06216 ){ 06217 MemPage *pPage = 0; 06218 int rc; 06219 unsigned char *pCell; 06220 int i; 06221 06222 assert( sqlite3_mutex_held(pBt->mutex) ); 06223 if( pgno>pagerPagecount(pBt->pPager) ){ 06224 return SQLITE_CORRUPT_BKPT; 06225 } 06226 06227 rc = getAndInitPage(pBt, pgno, &pPage); 06228 if( rc ) goto cleardatabasepage_out; 06229 for(i=0; i<pPage->nCell; i++){ 06230 pCell = findCell(pPage, i); 06231 if( !pPage->leaf ){ 06232 rc = clearDatabasePage(pBt, get4byte(pCell), pPage, 1, pnChange); 06233 if( rc ) goto cleardatabasepage_out; 06234 } 06235 rc = clearCell(pPage, pCell); 06236 if( rc ) goto cleardatabasepage_out; 06237 } 06238 if( !pPage->leaf ){ 06239 rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage, 1, pnChange); 06240 if( rc ) goto cleardatabasepage_out; 06241 }else if( pnChange ){ 06242 assert( pPage->intKey ); 06243 *pnChange += pPage->nCell; 06244 } 06245 if( freePageFlag ){ 06246 rc = freePage(pPage); 06247 }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ 06248 zeroPage(pPage, pPage->aData[0] | PTF_LEAF); 06249 } 06250 06251 cleardatabasepage_out: 06252 releasePage(pPage); 06253 return rc; 06254 } 06255 06256 /* 06257 ** Delete all information from a single table in the database. iTable is 06258 ** the page number of the root of the table. After this routine returns, 06259 ** the root page is empty, but still exists. 06260 ** 06261 ** This routine will fail with SQLITE_LOCKED if there are any open 06262 ** read cursors on the table. Open write cursors are moved to the 06263 ** root of the table. 06264 ** 06265 ** If pnChange is not NULL, then table iTable must be an intkey table. The 06266 ** integer value pointed to by pnChange is incremented by the number of 06267 ** entries in the table. 06268 */ 06269 int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){ 06270 int rc; 06271 BtShared *pBt = p->pBt; 06272 sqlite3BtreeEnter(p); 06273 pBt->db = p->db; 06274 if( p->inTrans!=TRANS_WRITE ){ 06275 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 06276 }else if( (rc = checkReadLocks(p, iTable, 0, 1))!=SQLITE_OK ){ 06277 /* nothing to do */ 06278 }else if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){ 06279 /* nothing to do */ 06280 }else{ 06281 rc = clearDatabasePage(pBt, (Pgno)iTable, 0, 0, pnChange); 06282 } 06283 sqlite3BtreeLeave(p); 06284 return rc; 06285 } 06286 06287 /* 06288 ** Erase all information in a table and add the root of the table to 06289 ** the freelist. Except, the root of the principle table (the one on 06290 ** page 1) is never added to the freelist. 06291 ** 06292 ** This routine will fail with SQLITE_LOCKED if there are any open 06293 ** cursors on the table. 06294 ** 06295 ** If AUTOVACUUM is enabled and the page at iTable is not the last 06296 ** root page in the database file, then the last root page 06297 ** in the database file is moved into the slot formerly occupied by 06298 ** iTable and that last slot formerly occupied by the last root page 06299 ** is added to the freelist instead of iTable. In this say, all 06300 ** root pages are kept at the beginning of the database file, which 06301 ** is necessary for AUTOVACUUM to work right. *piMoved is set to the 06302 ** page number that used to be the last root page in the file before 06303 ** the move. If no page gets moved, *piMoved is set to 0. 06304 ** The last root page is recorded in meta[3] and the value of 06305 ** meta[3] is updated by this procedure. 06306 */ 06307 static int btreeDropTable(Btree *p, int iTable, int *piMoved){ 06308 int rc; 06309 MemPage *pPage = 0; 06310 BtShared *pBt = p->pBt; 06311 06312 assert( sqlite3BtreeHoldsMutex(p) ); 06313 if( p->inTrans!=TRANS_WRITE ){ 06314 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 06315 } 06316 06317 /* It is illegal to drop a table if any cursors are open on the 06318 ** database. This is because in auto-vacuum mode the backend may 06319 ** need to move another root-page to fill a gap left by the deleted 06320 ** root page. If an open cursor was using this page a problem would 06321 ** occur. 06322 */ 06323 if( pBt->pCursor ){ 06324 return SQLITE_LOCKED; 06325 } 06326 06327 rc = sqlite3BtreeGetPage(pBt, (Pgno)iTable, &pPage, 0); 06328 if( rc ) return rc; 06329 rc = sqlite3BtreeClearTable(p, iTable, 0); 06330 if( rc ){ 06331 releasePage(pPage); 06332 return rc; 06333 } 06334 06335 *piMoved = 0; 06336 06337 if( iTable>1 ){ 06338 #ifdef SQLITE_OMIT_AUTOVACUUM 06339 rc = freePage(pPage); 06340 releasePage(pPage); 06341 #else 06342 if( pBt->autoVacuum ){ 06343 Pgno maxRootPgno; 06344 rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno); 06345 if( rc!=SQLITE_OK ){ 06346 releasePage(pPage); 06347 return rc; 06348 } 06349 06350 if( iTable==maxRootPgno ){ 06351 /* If the table being dropped is the table with the largest root-page 06352 ** number in the database, put the root page on the free list. 06353 */ 06354 rc = freePage(pPage); 06355 releasePage(pPage); 06356 if( rc!=SQLITE_OK ){ 06357 return rc; 06358 } 06359 }else{ 06360 /* The table being dropped does not have the largest root-page 06361 ** number in the database. So move the page that does into the 06362 ** gap left by the deleted root-page. 06363 */ 06364 MemPage *pMove; 06365 releasePage(pPage); 06366 rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0); 06367 if( rc!=SQLITE_OK ){ 06368 return rc; 06369 } 06370 rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0); 06371 releasePage(pMove); 06372 if( rc!=SQLITE_OK ){ 06373 return rc; 06374 } 06375 rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0); 06376 if( rc!=SQLITE_OK ){ 06377 return rc; 06378 } 06379 rc = freePage(pMove); 06380 releasePage(pMove); 06381 if( rc!=SQLITE_OK ){ 06382 return rc; 06383 } 06384 *piMoved = maxRootPgno; 06385 } 06386 06387 /* Set the new 'max-root-page' value in the database header. This 06388 ** is the old value less one, less one more if that happens to 06389 ** be a root-page number, less one again if that is the 06390 ** PENDING_BYTE_PAGE. 06391 */ 06392 maxRootPgno--; 06393 if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){ 06394 maxRootPgno--; 06395 } 06396 if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){ 06397 maxRootPgno--; 06398 } 06399 assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) ); 06400 06401 rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno); 06402 }else{ 06403 rc = freePage(pPage); 06404 releasePage(pPage); 06405 } 06406 #endif 06407 }else{ 06408 /* If sqlite3BtreeDropTable was called on page 1. */ 06409 zeroPage(pPage, PTF_INTKEY|PTF_LEAF ); 06410 releasePage(pPage); 06411 } 06412 return rc; 06413 } 06414 int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ 06415 int rc; 06416 sqlite3BtreeEnter(p); 06417 p->pBt->db = p->db; 06418 rc = btreeDropTable(p, iTable, piMoved); 06419 sqlite3BtreeLeave(p); 06420 return rc; 06421 } 06422 06423 06424 /* 06425 ** Read the meta-information out of a database file. Meta[0] 06426 ** is the number of free pages currently in the database. Meta[1] 06427 ** through meta[15] are available for use by higher layers. Meta[0] 06428 ** is read-only, the others are read/write. 06429 ** 06430 ** The schema layer numbers meta values differently. At the schema 06431 ** layer (and the SetCookie and ReadCookie opcodes) the number of 06432 ** free pages is not visible. So Cookie[0] is the same as Meta[1]. 06433 */ 06434 int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ 06435 DbPage *pDbPage; 06436 int rc; 06437 unsigned char *pP1; 06438 BtShared *pBt = p->pBt; 06439 06440 sqlite3BtreeEnter(p); 06441 pBt->db = p->db; 06442 06443 /* Reading a meta-data value requires a read-lock on page 1 (and hence 06444 ** the sqlite_master table. We grab this lock regardless of whether or 06445 ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page 06446 ** 1 is treated as a special case by queryTableLock() and lockTable()). 06447 */ 06448 rc = queryTableLock(p, 1, READ_LOCK); 06449 if( rc!=SQLITE_OK ){ 06450 sqlite3BtreeLeave(p); 06451 return rc; 06452 } 06453 06454 assert( idx>=0 && idx<=15 ); 06455 if( pBt->pPage1 ){ 06456 /* The b-tree is already holding a reference to page 1 of the database 06457 ** file. In this case the required meta-data value can be read directly 06458 ** from the page data of this reference. This is slightly faster than 06459 ** requesting a new reference from the pager layer. 06460 */ 06461 pP1 = (unsigned char *)pBt->pPage1->aData; 06462 }else{ 06463 /* The b-tree does not have a reference to page 1 of the database file. 06464 ** Obtain one from the pager layer. 06465 */ 06466 rc = sqlite3PagerGet(pBt->pPager, 1, &pDbPage); 06467 if( rc ){ 06468 sqlite3BtreeLeave(p); 06469 return rc; 06470 } 06471 pP1 = (unsigned char *)sqlite3PagerGetData(pDbPage); 06472 } 06473 *pMeta = get4byte(&pP1[36 + idx*4]); 06474 06475 /* If the b-tree is not holding a reference to page 1, then one was 06476 ** requested from the pager layer in the above block. Release it now. 06477 */ 06478 if( !pBt->pPage1 ){ 06479 sqlite3PagerUnref(pDbPage); 06480 } 06481 06482 /* If autovacuumed is disabled in this build but we are trying to 06483 ** access an autovacuumed database, then make the database readonly. 06484 */ 06485 #ifdef SQLITE_OMIT_AUTOVACUUM 06486 if( idx==4 && *pMeta>0 ) pBt->readOnly = 1; 06487 #endif 06488 06489 /* Grab the read-lock on page 1. */ 06490 rc = lockTable(p, 1, READ_LOCK); 06491 sqlite3BtreeLeave(p); 06492 return rc; 06493 } 06494 06495 /* 06496 ** Write meta-information back into the database. Meta[0] is 06497 ** read-only and may not be written. 06498 */ 06499 int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ 06500 BtShared *pBt = p->pBt; 06501 unsigned char *pP1; 06502 int rc; 06503 assert( idx>=1 && idx<=15 ); 06504 sqlite3BtreeEnter(p); 06505 pBt->db = p->db; 06506 if( p->inTrans!=TRANS_WRITE ){ 06507 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; 06508 }else{ 06509 assert( pBt->pPage1!=0 ); 06510 pP1 = pBt->pPage1->aData; 06511 rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); 06512 if( rc==SQLITE_OK ){ 06513 put4byte(&pP1[36 + idx*4], iMeta); 06514 #ifndef SQLITE_OMIT_AUTOVACUUM 06515 if( idx==7 ){ 06516 assert( pBt->autoVacuum || iMeta==0 ); 06517 assert( iMeta==0 || iMeta==1 ); 06518 pBt->incrVacuum = iMeta; 06519 } 06520 #endif 06521 } 06522 } 06523 sqlite3BtreeLeave(p); 06524 return rc; 06525 } 06526 06527 /* 06528 ** Return the flag byte at the beginning of the page that the cursor 06529 ** is currently pointing to. 06530 */ 06531 int sqlite3BtreeFlags(BtCursor *pCur){ 06532 /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call 06533 ** restoreCursorPosition() here. 06534 */ 06535 MemPage *pPage; 06536 restoreCursorPosition(pCur); 06537 pPage = pCur->apPage[pCur->iPage]; 06538 assert( cursorHoldsMutex(pCur) ); 06539 assert( pPage->pBt==pCur->pBt ); 06540 return pPage ? pPage->aData[pPage->hdrOffset] : 0; 06541 } 06542 06543 06544 /* 06545 ** Return the pager associated with a BTree. This routine is used for 06546 ** testing and debugging only. 06547 */ 06548 Pager *sqlite3BtreePager(Btree *p){ 06549 return p->pBt->pPager; 06550 } 06551 06552 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 06553 /* 06554 ** Append a message to the error message string. 06555 */ 06556 static void checkAppendMsg( 06557 IntegrityCk *pCheck, 06558 char *zMsg1, 06559 const char *zFormat, 06560 ... 06561 ){ 06562 va_list ap; 06563 if( !pCheck->mxErr ) return; 06564 pCheck->mxErr--; 06565 pCheck->nErr++; 06566 va_start(ap, zFormat); 06567 if( pCheck->errMsg.nChar ){ 06568 sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); 06569 } 06570 if( zMsg1 ){ 06571 sqlite3StrAccumAppend(&pCheck->errMsg, zMsg1, -1); 06572 } 06573 sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); 06574 va_end(ap); 06575 if( pCheck->errMsg.mallocFailed ){ 06576 pCheck->mallocFailed = 1; 06577 } 06578 } 06579 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 06580 06581 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 06582 /* 06583 ** Add 1 to the reference count for page iPage. If this is the second 06584 ** reference to the page, add an error message to pCheck->zErrMsg. 06585 ** Return 1 if there are 2 ore more references to the page and 0 if 06586 ** if this is the first reference to the page. 06587 ** 06588 ** Also check that the page number is in bounds. 06589 */ 06590 static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){ 06591 if( iPage==0 ) return 1; 06592 if( iPage>pCheck->nPage || iPage<0 ){ 06593 checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage); 06594 return 1; 06595 } 06596 if( pCheck->anRef[iPage]==1 ){ 06597 checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage); 06598 return 1; 06599 } 06600 return (pCheck->anRef[iPage]++)>1; 06601 } 06602 06603 #ifndef SQLITE_OMIT_AUTOVACUUM 06604 /* 06605 ** Check that the entry in the pointer-map for page iChild maps to 06606 ** page iParent, pointer type ptrType. If not, append an error message 06607 ** to pCheck. 06608 */ 06609 static void checkPtrmap( 06610 IntegrityCk *pCheck, /* Integrity check context */ 06611 Pgno iChild, /* Child page number */ 06612 u8 eType, /* Expected pointer map type */ 06613 Pgno iParent, /* Expected pointer map parent page number */ 06614 char *zContext /* Context description (used for error msg) */ 06615 ){ 06616 int rc; 06617 u8 ePtrmapType; 06618 Pgno iPtrmapParent; 06619 06620 rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); 06621 if( rc!=SQLITE_OK ){ 06622 checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild); 06623 return; 06624 } 06625 06626 if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ 06627 checkAppendMsg(pCheck, zContext, 06628 "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", 06629 iChild, eType, iParent, ePtrmapType, iPtrmapParent); 06630 } 06631 } 06632 #endif 06633 06634 /* 06635 ** Check the integrity of the freelist or of an overflow page list. 06636 ** Verify that the number of pages on the list is N. 06637 */ 06638 static void checkList( 06639 IntegrityCk *pCheck, /* Integrity checking context */ 06640 int isFreeList, /* True for a freelist. False for overflow page list */ 06641 int iPage, /* Page number for first page in the list */ 06642 int N, /* Expected number of pages in the list */ 06643 char *zContext /* Context for error messages */ 06644 ){ 06645 int i; 06646 int expected = N; 06647 int iFirst = iPage; 06648 while( N-- > 0 && pCheck->mxErr ){ 06649 DbPage *pOvflPage; 06650 unsigned char *pOvflData; 06651 if( iPage<1 ){ 06652 checkAppendMsg(pCheck, zContext, 06653 "%d of %d pages missing from overflow list starting at %d", 06654 N+1, expected, iFirst); 06655 break; 06656 } 06657 if( checkRef(pCheck, iPage, zContext) ) break; 06658 if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ 06659 checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage); 06660 break; 06661 } 06662 pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); 06663 if( isFreeList ){ 06664 int n = get4byte(&pOvflData[4]); 06665 #ifndef SQLITE_OMIT_AUTOVACUUM 06666 if( pCheck->pBt->autoVacuum ){ 06667 checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext); 06668 } 06669 #endif 06670 if( n>pCheck->pBt->usableSize/4-2 ){ 06671 checkAppendMsg(pCheck, zContext, 06672 "freelist leaf count too big on page %d", iPage); 06673 N--; 06674 }else{ 06675 for(i=0; i<n; i++){ 06676 Pgno iFreePage = get4byte(&pOvflData[8+i*4]); 06677 #ifndef SQLITE_OMIT_AUTOVACUUM 06678 if( pCheck->pBt->autoVacuum ){ 06679 checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext); 06680 } 06681 #endif 06682 checkRef(pCheck, iFreePage, zContext); 06683 } 06684 N -= n; 06685 } 06686 } 06687 #ifndef SQLITE_OMIT_AUTOVACUUM 06688 else{ 06689 /* If this database supports auto-vacuum and iPage is not the last 06690 ** page in this overflow list, check that the pointer-map entry for 06691 ** the following page matches iPage. 06692 */ 06693 if( pCheck->pBt->autoVacuum && N>0 ){ 06694 i = get4byte(pOvflData); 06695 checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext); 06696 } 06697 } 06698 #endif 06699 iPage = get4byte(pOvflData); 06700 sqlite3PagerUnref(pOvflPage); 06701 } 06702 } 06703 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 06704 06705 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 06706 /* 06707 ** Do various sanity checks on a single page of a tree. Return 06708 ** the tree depth. Root pages return 0. Parents of root pages 06709 ** return 1, and so forth. 06710 ** 06711 ** These checks are done: 06712 ** 06713 ** 1. Make sure that cells and freeblocks do not overlap 06714 ** but combine to completely cover the page. 06715 ** NO 2. Make sure cell keys are in order. 06716 ** NO 3. Make sure no key is less than or equal to zLowerBound. 06717 ** NO 4. Make sure no key is greater than or equal to zUpperBound. 06718 ** 5. Check the integrity of overflow pages. 06719 ** 6. Recursively call checkTreePage on all children. 06720 ** 7. Verify that the depth of all children is the same. 06721 ** 8. Make sure this page is at least 33% full or else it is 06722 ** the root of the tree. 06723 */ 06724 static int checkTreePage( 06725 IntegrityCk *pCheck, /* Context for the sanity check */ 06726 int iPage, /* Page number of the page to check */ 06727 MemPage *pParent, /* Parent page */ 06728 char *zParentContext /* Parent context */ 06729 ){ 06730 MemPage *pPage; 06731 int i, rc, depth, d2, pgno, cnt; 06732 int hdr, cellStart; 06733 int nCell; 06734 u8 *data; 06735 BtShared *pBt; 06736 int usableSize; 06737 char zContext[100]; 06738 char *hit = 0; 06739 06740 sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage); 06741 06742 /* Check that the page exists 06743 */ 06744 pBt = pCheck->pBt; 06745 usableSize = pBt->usableSize; 06746 if( iPage==0 ) return 0; 06747 if( checkRef(pCheck, iPage, zParentContext) ) return 0; 06748 if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ 06749 checkAppendMsg(pCheck, zContext, 06750 "unable to get the page. error code=%d", rc); 06751 return 0; 06752 } 06753 if( (rc = sqlite3BtreeInitPage(pPage))!=0 ){ 06754 checkAppendMsg(pCheck, zContext, 06755 "sqlite3BtreeInitPage() returns error code %d", rc); 06756 releasePage(pPage); 06757 return 0; 06758 } 06759 06760 /* Check out all the cells. 06761 */ 06762 depth = 0; 06763 for(i=0; i<pPage->nCell && pCheck->mxErr; i++){ 06764 u8 *pCell; 06765 int sz; 06766 CellInfo info; 06767 06768 /* Check payload overflow pages 06769 */ 06770 sqlite3_snprintf(sizeof(zContext), zContext, 06771 "On tree page %d cell %d: ", iPage, i); 06772 pCell = findCell(pPage,i); 06773 sqlite3BtreeParseCellPtr(pPage, pCell, &info); 06774 sz = info.nData; 06775 if( !pPage->intKey ) sz += info.nKey; 06776 assert( sz==info.nPayload ); 06777 if( sz>info.nLocal ){ 06778 int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4); 06779 Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); 06780 #ifndef SQLITE_OMIT_AUTOVACUUM 06781 if( pBt->autoVacuum ){ 06782 checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext); 06783 } 06784 #endif 06785 checkList(pCheck, 0, pgnoOvfl, nPage, zContext); 06786 } 06787 06788 /* Check sanity of left child page. 06789 */ 06790 if( !pPage->leaf ){ 06791 pgno = get4byte(pCell); 06792 #ifndef SQLITE_OMIT_AUTOVACUUM 06793 if( pBt->autoVacuum ){ 06794 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext); 06795 } 06796 #endif 06797 d2 = checkTreePage(pCheck,pgno,pPage,zContext); 06798 if( i>0 && d2!=depth ){ 06799 checkAppendMsg(pCheck, zContext, "Child page depth differs"); 06800 } 06801 depth = d2; 06802 } 06803 } 06804 if( !pPage->leaf ){ 06805 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); 06806 sqlite3_snprintf(sizeof(zContext), zContext, 06807 "On page %d at right child: ", iPage); 06808 #ifndef SQLITE_OMIT_AUTOVACUUM 06809 if( pBt->autoVacuum ){ 06810 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0); 06811 } 06812 #endif 06813 checkTreePage(pCheck, pgno, pPage, zContext); 06814 } 06815 06816 /* Check for complete coverage of the page 06817 */ 06818 data = pPage->aData; 06819 hdr = pPage->hdrOffset; 06820 hit = sqlite3PageMalloc( pBt->pageSize ); 06821 if( hit==0 ){ 06822 pCheck->mallocFailed = 1; 06823 }else{ 06824 u16 contentOffset = get2byte(&data[hdr+5]); 06825 if (contentOffset > usableSize) { 06826 checkAppendMsg(pCheck, 0, 06827 "Corruption detected in header on page %d",iPage,0); 06828 goto check_page_abort; 06829 } 06830 memset(hit+contentOffset, 0, usableSize-contentOffset); 06831 memset(hit, 1, contentOffset); 06832 nCell = get2byte(&data[hdr+3]); 06833 cellStart = hdr + 12 - 4*pPage->leaf; 06834 for(i=0; i<nCell; i++){ 06835 int pc = get2byte(&data[cellStart+i*2]); 06836 u16 size = 1024; 06837 int j; 06838 if( pc<=usableSize ){ 06839 size = cellSizePtr(pPage, &data[pc]); 06840 } 06841 if( (pc+size-1)>=usableSize || pc<0 ){ 06842 checkAppendMsg(pCheck, 0, 06843 "Corruption detected in cell %d on page %d",i,iPage,0); 06844 }else{ 06845 for(j=pc+size-1; j>=pc; j--) hit[j]++; 06846 } 06847 } 06848 for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; 06849 cnt++){ 06850 int size = get2byte(&data[i+2]); 06851 int j; 06852 if( (i+size-1)>=usableSize || i<0 ){ 06853 checkAppendMsg(pCheck, 0, 06854 "Corruption detected in cell %d on page %d",i,iPage,0); 06855 }else{ 06856 for(j=i+size-1; j>=i; j--) hit[j]++; 06857 } 06858 i = get2byte(&data[i]); 06859 } 06860 for(i=cnt=0; i<usableSize; i++){ 06861 if( hit[i]==0 ){ 06862 cnt++; 06863 }else if( hit[i]>1 ){ 06864 checkAppendMsg(pCheck, 0, 06865 "Multiple uses for byte %d of page %d", i, iPage); 06866 break; 06867 } 06868 } 06869 if( cnt!=data[hdr+7] ){ 06870 checkAppendMsg(pCheck, 0, 06871 "Fragmented space is %d byte reported as %d on page %d", 06872 cnt, data[hdr+7], iPage); 06873 } 06874 } 06875 check_page_abort: 06876 if (hit) sqlite3PageFree(hit); 06877 06878 releasePage(pPage); 06879 return depth+1; 06880 } 06881 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 06882 06883 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 06884 /* 06885 ** This routine does a complete check of the given BTree file. aRoot[] is 06886 ** an array of pages numbers were each page number is the root page of 06887 ** a table. nRoot is the number of entries in aRoot. 06888 ** 06889 ** Write the number of error seen in *pnErr. Except for some memory 06890 ** allocation errors, nn error message is held in memory obtained from 06891 ** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is 06892 ** returned. 06893 */ 06894 char *sqlite3BtreeIntegrityCheck( 06895 Btree *p, /* The btree to be checked */ 06896 int *aRoot, /* An array of root pages numbers for individual trees */ 06897 int nRoot, /* Number of entries in aRoot[] */ 06898 int mxErr, /* Stop reporting errors after this many */ 06899 int *pnErr /* Write number of errors seen to this variable */ 06900 ){ 06901 int i; 06902 int nRef; 06903 IntegrityCk sCheck; 06904 BtShared *pBt = p->pBt; 06905 char zErr[100]; 06906 06907 sqlite3BtreeEnter(p); 06908 pBt->db = p->db; 06909 nRef = sqlite3PagerRefcount(pBt->pPager); 06910 if( lockBtreeWithRetry(p)!=SQLITE_OK ){ 06911 *pnErr = 1; 06912 sqlite3BtreeLeave(p); 06913 return sqlite3DbStrDup(0, "cannot acquire a read lock on the database"); 06914 } 06915 sCheck.pBt = pBt; 06916 sCheck.pPager = pBt->pPager; 06917 sCheck.nPage = pagerPagecount(sCheck.pPager); 06918 sCheck.mxErr = mxErr; 06919 sCheck.nErr = 0; 06920 sCheck.mallocFailed = 0; 06921 *pnErr = 0; 06922 #ifndef SQLITE_OMIT_AUTOVACUUM 06923 if( pBt->nTrunc!=0 ){ 06924 sCheck.nPage = pBt->nTrunc; 06925 } 06926 #endif 06927 if( sCheck.nPage==0 ){ 06928 unlockBtreeIfUnused(pBt); 06929 sqlite3BtreeLeave(p); 06930 return 0; 06931 } 06932 sCheck.anRef = sqlite3Malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) ); 06933 if( !sCheck.anRef ){ 06934 unlockBtreeIfUnused(pBt); 06935 *pnErr = 1; 06936 sqlite3BtreeLeave(p); 06937 return 0; 06938 } 06939 for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; } 06940 i = PENDING_BYTE_PAGE(pBt); 06941 if( i<=sCheck.nPage ){ 06942 sCheck.anRef[i] = 1; 06943 } 06944 sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), 20000); 06945 06946 /* Check the integrity of the freelist 06947 */ 06948 checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), 06949 get4byte(&pBt->pPage1->aData[36]), "Main freelist: "); 06950 06951 /* Check all the tables. 06952 */ 06953 for(i=0; i<nRoot && sCheck.mxErr; i++){ 06954 if( aRoot[i]==0 ) continue; 06955 #ifndef SQLITE_OMIT_AUTOVACUUM 06956 if( pBt->autoVacuum && aRoot[i]>1 ){ 06957 checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0); 06958 } 06959 #endif 06960 checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: "); 06961 } 06962 06963 /* Make sure every page in the file is referenced 06964 */ 06965 for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ 06966 #ifdef SQLITE_OMIT_AUTOVACUUM 06967 if( sCheck.anRef[i]==0 ){ 06968 checkAppendMsg(&sCheck, 0, "Page %d is never used", i); 06969 } 06970 #else 06971 /* If the database supports auto-vacuum, make sure no tables contain 06972 ** references to pointer-map pages. 06973 */ 06974 if( sCheck.anRef[i]==0 && 06975 (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ 06976 checkAppendMsg(&sCheck, 0, "Page %d is never used", i); 06977 } 06978 if( sCheck.anRef[i]!=0 && 06979 (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ 06980 checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i); 06981 } 06982 #endif 06983 } 06984 06985 /* Make sure this analysis did not leave any unref() pages 06986 */ 06987 unlockBtreeIfUnused(pBt); 06988 if( nRef != sqlite3PagerRefcount(pBt->pPager) ){ 06989 checkAppendMsg(&sCheck, 0, 06990 "Outstanding page count goes from %d to %d during this analysis", 06991 nRef, sqlite3PagerRefcount(pBt->pPager) 06992 ); 06993 } 06994 06995 /* Clean up and report errors. 06996 */ 06997 sqlite3BtreeLeave(p); 06998 sqlite3_free(sCheck.anRef); 06999 if( sCheck.mallocFailed ){ 07000 sqlite3StrAccumReset(&sCheck.errMsg); 07001 *pnErr = sCheck.nErr+1; 07002 return 0; 07003 } 07004 *pnErr = sCheck.nErr; 07005 if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg); 07006 return sqlite3StrAccumFinish(&sCheck.errMsg); 07007 } 07008 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 07009 07010 /* 07011 ** Return the full pathname of the underlying database file. 07012 ** 07013 ** The pager filename is invariant as long as the pager is 07014 ** open so it is safe to access without the BtShared mutex. 07015 */ 07016 const char *sqlite3BtreeGetFilename(Btree *p){ 07017 assert( p->pBt->pPager!=0 ); 07018 return sqlite3PagerFilename(p->pBt->pPager); 07019 } 07020 07021 /* 07022 ** Return the pathname of the directory that contains the database file. 07023 ** 07024 ** The pager directory name is invariant as long as the pager is 07025 ** open so it is safe to access without the BtShared mutex. 07026 */ 07027 const char *sqlite3BtreeGetDirname(Btree *p){ 07028 assert( p->pBt->pPager!=0 ); 07029 return sqlite3PagerDirname(p->pBt->pPager); 07030 } 07031 07032 /* 07033 ** Return the pathname of the journal file for this database. The return 07034 ** value of this routine is the same regardless of whether the journal file 07035 ** has been created or not. 07036 ** 07037 ** The pager journal filename is invariant as long as the pager is 07038 ** open so it is safe to access without the BtShared mutex. 07039 */ 07040 const char *sqlite3BtreeGetJournalname(Btree *p){ 07041 assert( p->pBt->pPager!=0 ); 07042 return sqlite3PagerJournalname(p->pBt->pPager); 07043 } 07044 07045 #ifndef SQLITE_OMIT_VACUUM 07046 /* 07047 ** Copy the complete content of pBtFrom into pBtTo. A transaction 07048 ** must be active for both files. 07049 ** 07050 ** The size of file pTo may be reduced by this operation. 07051 ** If anything goes wrong, the transaction on pTo is rolled back. 07052 ** 07053 ** If successful, CommitPhaseOne() may be called on pTo before returning. 07054 ** The caller should finish committing the transaction on pTo by calling 07055 ** sqlite3BtreeCommit(). 07056 */ 07057 static int btreeCopyFile(Btree *pTo, Btree *pFrom){ 07058 int rc = SQLITE_OK; 07059 Pgno i; 07060 07061 Pgno nFromPage; /* Number of pages in pFrom */ 07062 Pgno nToPage; /* Number of pages in pTo */ 07063 Pgno nNewPage; /* Number of pages in pTo after the copy */ 07064 07065 Pgno iSkip; /* Pending byte page in pTo */ 07066 int nToPageSize; /* Page size of pTo in bytes */ 07067 int nFromPageSize; /* Page size of pFrom in bytes */ 07068 07069 BtShared *pBtTo = pTo->pBt; 07070 BtShared *pBtFrom = pFrom->pBt; 07071 pBtTo->db = pTo->db; 07072 pBtFrom->db = pFrom->db; 07073 07074 nToPageSize = pBtTo->pageSize; 07075 nFromPageSize = pBtFrom->pageSize; 07076 07077 if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){ 07078 return SQLITE_ERROR; 07079 } 07080 if( pBtTo->pCursor ){ 07081 return SQLITE_BUSY; 07082 } 07083 07084 nToPage = pagerPagecount(pBtTo->pPager); 07085 nFromPage = pagerPagecount(pBtFrom->pPager); 07086 iSkip = PENDING_BYTE_PAGE(pBtTo); 07087 07088 /* Variable nNewPage is the number of pages required to store the 07089 ** contents of pFrom using the current page-size of pTo. 07090 */ 07091 nNewPage = ((i64)nFromPage * (i64)nFromPageSize + (i64)nToPageSize - 1) / 07092 (i64)nToPageSize; 07093 07094 for(i=1; rc==SQLITE_OK && (i<=nToPage || i<=nNewPage); i++){ 07095 07096 /* Journal the original page. 07097 ** 07098 ** iSkip is the page number of the locking page (PENDING_BYTE_PAGE) 07099 ** in database *pTo (before the copy). This page is never written 07100 ** into the journal file. Unless i==iSkip or the page was not 07101 ** present in pTo before the copy operation, journal page i from pTo. 07102 */ 07103 if( i!=iSkip && i<=nToPage ){ 07104 DbPage *pDbPage = 0; 07105 rc = sqlite3PagerGet(pBtTo->pPager, i, &pDbPage); 07106 if( rc==SQLITE_OK ){ 07107 rc = sqlite3PagerWrite(pDbPage); 07108 if( rc==SQLITE_OK && i>nFromPage ){ 07109 /* Yeah. It seems wierd to call DontWrite() right after Write(). But 07110 ** that is because the names of those procedures do not exactly 07111 ** represent what they do. Write() really means "put this page in the 07112 ** rollback journal and mark it as dirty so that it will be written 07113 ** to the database file later." DontWrite() undoes the second part of 07114 ** that and prevents the page from being written to the database. The 07115 ** page is still on the rollback journal, though. And that is the 07116 ** whole point of this block: to put pages on the rollback journal. 07117 */ 07118 rc = sqlite3PagerDontWrite(pDbPage); 07119 } 07120 sqlite3PagerUnref(pDbPage); 07121 } 07122 } 07123 07124 /* Overwrite the data in page i of the target database */ 07125 if( rc==SQLITE_OK && i!=iSkip && i<=nNewPage ){ 07126 07127 DbPage *pToPage = 0; 07128 sqlite3_int64 iOff; 07129 07130 rc = sqlite3PagerGet(pBtTo->pPager, i, &pToPage); 07131 if( rc==SQLITE_OK ){ 07132 rc = sqlite3PagerWrite(pToPage); 07133 } 07134 07135 for( 07136 iOff=(i-1)*nToPageSize; 07137 rc==SQLITE_OK && iOff<i*nToPageSize; 07138 iOff += nFromPageSize 07139 ){ 07140 DbPage *pFromPage = 0; 07141 Pgno iFrom = (iOff/nFromPageSize)+1; 07142 07143 if( iFrom==PENDING_BYTE_PAGE(pBtFrom) ){ 07144 continue; 07145 } 07146 07147 rc = sqlite3PagerGet(pBtFrom->pPager, iFrom, &pFromPage); 07148 if( rc==SQLITE_OK ){ 07149 char *zTo = sqlite3PagerGetData(pToPage); 07150 char *zFrom = sqlite3PagerGetData(pFromPage); 07151 int nCopy; 07152 07153 if( nFromPageSize>=nToPageSize ){ 07154 zFrom += ((i-1)*nToPageSize - ((iFrom-1)*nFromPageSize)); 07155 nCopy = nToPageSize; 07156 }else{ 07157 zTo += (((iFrom-1)*nFromPageSize) - (i-1)*nToPageSize); 07158 nCopy = nFromPageSize; 07159 } 07160 07161 memcpy(zTo, zFrom, nCopy); 07162 sqlite3PagerUnref(pFromPage); 07163 } 07164 } 07165 07166 if( pToPage ){ 07167 MemPage *p = (MemPage *)sqlite3PagerGetExtra(pToPage); 07168 p->isInit = 0; 07169 sqlite3PagerUnref(pToPage); 07170 } 07171 } 07172 } 07173 07174 /* If things have worked so far, the database file may need to be 07175 ** truncated. The complex part is that it may need to be truncated to 07176 ** a size that is not an integer multiple of nToPageSize - the current 07177 ** page size used by the pager associated with B-Tree pTo. 07178 ** 07179 ** For example, say the page-size of pTo is 2048 bytes and the original 07180 ** number of pages is 5 (10 KB file). If pFrom has a page size of 1024 07181 ** bytes and 9 pages, then the file needs to be truncated to 9KB. 07182 */ 07183 if( rc==SQLITE_OK ){ 07184 if( nFromPageSize!=nToPageSize ){ 07185 sqlite3_file *pFile = sqlite3PagerFile(pBtTo->pPager); 07186 i64 iSize = (i64)nFromPageSize * (i64)nFromPage; 07187 i64 iNow = (i64)((nToPage>nNewPage)?nToPage:nNewPage) * (i64)nToPageSize; 07188 i64 iPending = ((i64)PENDING_BYTE_PAGE(pBtTo)-1) *(i64)nToPageSize; 07189 07190 assert( iSize<=iNow ); 07191 07192 /* Commit phase one syncs the journal file associated with pTo 07193 ** containing the original data. It does not sync the database file 07194 ** itself. After doing this it is safe to use OsTruncate() and other 07195 ** file APIs on the database file directly. 07196 */ 07197 pBtTo->db = pTo->db; 07198 rc = sqlite3PagerCommitPhaseOne(pBtTo->pPager, 0, 0, 1); 07199 if( iSize<iNow && rc==SQLITE_OK ){ 07200 rc = sqlite3OsTruncate(pFile, iSize); 07201 } 07202 07203 /* The loop that copied data from database pFrom to pTo did not 07204 ** populate the locking page of database pTo. If the page-size of 07205 ** pFrom is smaller than that of pTo, this means some data will 07206 ** not have been copied. 07207 ** 07208 ** This block copies the missing data from database pFrom to pTo 07209 ** using file APIs. This is safe because at this point we know that 07210 ** all of the original data from pTo has been synced into the 07211 ** journal file. At this point it would be safe to do anything at 07212 ** all to the database file except truncate it to zero bytes. 07213 */ 07214 if( rc==SQLITE_OK && nFromPageSize<nToPageSize && iSize>iPending){ 07215 i64 iOff; 07216 for( 07217 iOff=iPending; 07218 rc==SQLITE_OK && iOff<(iPending+nToPageSize); 07219 iOff += nFromPageSize 07220 ){ 07221 DbPage *pFromPage = 0; 07222 Pgno iFrom = (iOff/nFromPageSize)+1; 07223 07224 if( iFrom==PENDING_BYTE_PAGE(pBtFrom) || iFrom>nFromPage ){ 07225 continue; 07226 } 07227 07228 rc = sqlite3PagerGet(pBtFrom->pPager, iFrom, &pFromPage); 07229 if( rc==SQLITE_OK ){ 07230 char *zFrom = sqlite3PagerGetData(pFromPage); 07231 rc = sqlite3OsWrite(pFile, zFrom, nFromPageSize, iOff); 07232 sqlite3PagerUnref(pFromPage); 07233 } 07234 } 07235 } 07236 07237 /* Sync the database file */ 07238 if( rc==SQLITE_OK ){ 07239 rc = sqlite3PagerSync(pBtTo->pPager); 07240 } 07241 }else{ 07242 rc = sqlite3PagerTruncate(pBtTo->pPager, nNewPage); 07243 } 07244 if( rc==SQLITE_OK ){ 07245 pBtTo->pageSizeFixed = 0; 07246 } 07247 } 07248 07249 if( rc ){ 07250 sqlite3BtreeRollback(pTo); 07251 } 07252 07253 return rc; 07254 } 07255 int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){ 07256 int rc; 07257 sqlite3BtreeEnter(pTo); 07258 sqlite3BtreeEnter(pFrom); 07259 rc = btreeCopyFile(pTo, pFrom); 07260 sqlite3BtreeLeave(pFrom); 07261 sqlite3BtreeLeave(pTo); 07262 return rc; 07263 } 07264 07265 #endif /* SQLITE_OMIT_VACUUM */ 07266 07267 /* 07268 ** Return non-zero if a transaction is active. 07269 */ 07270 int sqlite3BtreeIsInTrans(Btree *p){ 07271 assert( p==0 || sqlite3_mutex_held(p->db->mutex) ); 07272 return (p && (p->inTrans==TRANS_WRITE)); 07273 } 07274 07275 /* 07276 ** Return non-zero if a statement transaction is active. 07277 */ 07278 int sqlite3BtreeIsInStmt(Btree *p){ 07279 assert( sqlite3BtreeHoldsMutex(p) ); 07280 return (p->pBt && p->pBt->inStmt); 07281 } 07282 07283 /* 07284 ** Return non-zero if a read (or write) transaction is active. 07285 */ 07286 int sqlite3BtreeIsInReadTrans(Btree *p){ 07287 assert( sqlite3_mutex_held(p->db->mutex) ); 07288 return (p && (p->inTrans!=TRANS_NONE)); 07289 } 07290 07291 /* 07292 ** This function returns a pointer to a blob of memory associated with 07293 ** a single shared-btree. The memory is used by client code for its own 07294 ** purposes (for example, to store a high-level schema associated with 07295 ** the shared-btree). The btree layer manages reference counting issues. 07296 ** 07297 ** The first time this is called on a shared-btree, nBytes bytes of memory 07298 ** are allocated, zeroed, and returned to the caller. For each subsequent 07299 ** call the nBytes parameter is ignored and a pointer to the same blob 07300 ** of memory returned. 07301 ** 07302 ** If the nBytes parameter is 0 and the blob of memory has not yet been 07303 ** allocated, a null pointer is returned. If the blob has already been 07304 ** allocated, it is returned as normal. 07305 ** 07306 ** Just before the shared-btree is closed, the function passed as the 07307 ** xFree argument when the memory allocation was made is invoked on the 07308 ** blob of allocated memory. This function should not call sqlite3_free() 07309 ** on the memory, the btree layer does that. 07310 */ 07311 void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){ 07312 BtShared *pBt = p->pBt; 07313 sqlite3BtreeEnter(p); 07314 if( !pBt->pSchema && nBytes ){ 07315 pBt->pSchema = sqlite3MallocZero(nBytes); 07316 pBt->xFreeSchema = xFree; 07317 } 07318 sqlite3BtreeLeave(p); 07319 return pBt->pSchema; 07320 } 07321 07322 /* 07323 ** Return true if another user of the same shared btree as the argument 07324 ** handle holds an exclusive lock on the sqlite_master table. 07325 */ 07326 int sqlite3BtreeSchemaLocked(Btree *p){ 07327 int rc; 07328 assert( sqlite3_mutex_held(p->db->mutex) ); 07329 sqlite3BtreeEnter(p); 07330 rc = (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK); 07331 sqlite3BtreeLeave(p); 07332 return rc; 07333 } 07334 07335 07336 #ifndef SQLITE_OMIT_SHARED_CACHE 07337 /* 07338 ** Obtain a lock on the table whose root page is iTab. The 07339 ** lock is a write lock if isWritelock is true or a read lock 07340 ** if it is false. 07341 */ 07342 int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ 07343 int rc = SQLITE_OK; 07344 if( p->sharable ){ 07345 u8 lockType = READ_LOCK + isWriteLock; 07346 assert( READ_LOCK+1==WRITE_LOCK ); 07347 assert( isWriteLock==0 || isWriteLock==1 ); 07348 sqlite3BtreeEnter(p); 07349 rc = queryTableLock(p, iTab, lockType); 07350 if( rc==SQLITE_OK ){ 07351 rc = lockTable(p, iTab, lockType); 07352 } 07353 sqlite3BtreeLeave(p); 07354 } 07355 return rc; 07356 } 07357 #endif 07358 07359 #ifndef SQLITE_OMIT_INCRBLOB 07360 /* 07361 ** Argument pCsr must be a cursor opened for writing on an 07362 ** INTKEY table currently pointing at a valid table entry. 07363 ** This function modifies the data stored as part of that entry. 07364 ** Only the data content may only be modified, it is not possible 07365 ** to change the length of the data stored. 07366 */ 07367 int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ 07368 assert( cursorHoldsMutex(pCsr) ); 07369 assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); 07370 assert(pCsr->isIncrblobHandle); 07371 07372 restoreCursorPosition(pCsr); 07373 assert( pCsr->eState!=CURSOR_REQUIRESEEK ); 07374 if( pCsr->eState!=CURSOR_VALID ){ 07375 return SQLITE_ABORT; 07376 } 07377 07378 /* Check some preconditions: 07379 ** (a) the cursor is open for writing, 07380 ** (b) there is no read-lock on the table being modified and 07381 ** (c) the cursor points at a valid row of an intKey table. 07382 */ 07383 if( !pCsr->wrFlag ){ 07384 return SQLITE_READONLY; 07385 } 07386 assert( !pCsr->pBt->readOnly 07387 && pCsr->pBt->inTransaction==TRANS_WRITE ); 07388 if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr, 0) ){ 07389 return SQLITE_LOCKED; /* The table pCur points to has a read lock */ 07390 } 07391 if( pCsr->eState==CURSOR_INVALID || !pCsr->apPage[pCsr->iPage]->intKey ){ 07392 return SQLITE_ERROR; 07393 } 07394 07395 return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1); 07396 } 07397 07398 /* 07399 ** Set a flag on this cursor to cache the locations of pages from the 07400 ** overflow list for the current row. This is used by cursors opened 07401 ** for incremental blob IO only. 07402 ** 07403 ** This function sets a flag only. The actual page location cache 07404 ** (stored in BtCursor.aOverflow[]) is allocated and used by function 07405 ** accessPayload() (the worker function for sqlite3BtreeData() and 07406 ** sqlite3BtreePutData()). 07407 */ 07408 void sqlite3BtreeCacheOverflow(BtCursor *pCur){ 07409 assert( cursorHoldsMutex(pCur) ); 07410 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 07411 assert(!pCur->isIncrblobHandle); 07412 assert(!pCur->aOverflow); 07413 pCur->isIncrblobHandle = 1; 07414 } 07415 #endif
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:52 2011 by Doxygen 1.6.1