fts2_hash.c

Go to the documentation of this file.
00001 /*
00002 ** 2001 September 22
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 *************************************************************************
00012 ** This is the implementation of generic hash-tables used in SQLite.
00013 ** We've modified it slightly to serve as a standalone hash table
00014 ** implementation for the full-text indexing module.
00015 */
00016 
00017 /*
00018 ** The code in this file is only compiled if:
00019 **
00020 **     * The FTS2 module is being built as an extension
00021 **       (in which case SQLITE_CORE is not defined), or
00022 **
00023 **     * The FTS2 module is being built into the core of
00024 **       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
00025 */
00026 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
00027 
00028 #include <assert.h>
00029 #include <stdlib.h>
00030 #include <string.h>
00031 
00032 #include "sqlite3.h"
00033 #include "fts2_hash.h"
00034 
00035 /*
00036 ** Malloc and Free functions
00037 */
00038 static void *fts2HashMalloc(int n){
00039   void *p = sqlite3_malloc(n);
00040   if( p ){
00041     memset(p, 0, n);
00042   }
00043   return p;
00044 }
00045 static void fts2HashFree(void *p){
00046   sqlite3_free(p);
00047 }
00048 
00049 /* Turn bulk memory into a hash table object by initializing the
00050 ** fields of the Hash structure.
00051 **
00052 ** "pNew" is a pointer to the hash table that is to be initialized.
00053 ** keyClass is one of the constants 
00054 ** FTS2_HASH_BINARY or FTS2_HASH_STRING.  The value of keyClass 
00055 ** determines what kind of key the hash table will use.  "copyKey" is
00056 ** true if the hash table should make its own private copy of keys and
00057 ** false if it should just use the supplied pointer.
00058 */
00059 void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
00060   assert( pNew!=0 );
00061   assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
00062   pNew->keyClass = keyClass;
00063   pNew->copyKey = copyKey;
00064   pNew->first = 0;
00065   pNew->count = 0;
00066   pNew->htsize = 0;
00067   pNew->ht = 0;
00068 }
00069 
00070 /* Remove all entries from a hash table.  Reclaim all memory.
00071 ** Call this routine to delete a hash table or to reset a hash table
00072 ** to the empty state.
00073 */
00074 void sqlite3Fts2HashClear(fts2Hash *pH){
00075   fts2HashElem *elem;         /* For looping over all elements of the table */
00076 
00077   assert( pH!=0 );
00078   elem = pH->first;
00079   pH->first = 0;
00080   fts2HashFree(pH->ht);
00081   pH->ht = 0;
00082   pH->htsize = 0;
00083   while( elem ){
00084     fts2HashElem *next_elem = elem->next;
00085     if( pH->copyKey && elem->pKey ){
00086       fts2HashFree(elem->pKey);
00087     }
00088     fts2HashFree(elem);
00089     elem = next_elem;
00090   }
00091   pH->count = 0;
00092 }
00093 
00094 /*
00095 ** Hash and comparison functions when the mode is FTS2_HASH_STRING
00096 */
00097 static int strHash(const void *pKey, int nKey){
00098   const char *z = (const char *)pKey;
00099   int h = 0;
00100   if( nKey<=0 ) nKey = (int) strlen(z);
00101   while( nKey > 0  ){
00102     h = (h<<3) ^ h ^ *z++;
00103     nKey--;
00104   }
00105   return h & 0x7fffffff;
00106 }
00107 static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
00108   if( n1!=n2 ) return 1;
00109   return strncmp((const char*)pKey1,(const char*)pKey2,n1);
00110 }
00111 
00112 /*
00113 ** Hash and comparison functions when the mode is FTS2_HASH_BINARY
00114 */
00115 static int binHash(const void *pKey, int nKey){
00116   int h = 0;
00117   const char *z = (const char *)pKey;
00118   while( nKey-- > 0 ){
00119     h = (h<<3) ^ h ^ *(z++);
00120   }
00121   return h & 0x7fffffff;
00122 }
00123 static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
00124   if( n1!=n2 ) return 1;
00125   return memcmp(pKey1,pKey2,n1);
00126 }
00127 
00128 /*
00129 ** Return a pointer to the appropriate hash function given the key class.
00130 **
00131 ** The C syntax in this function definition may be unfamilar to some 
00132 ** programmers, so we provide the following additional explanation:
00133 **
00134 ** The name of the function is "hashFunction".  The function takes a
00135 ** single parameter "keyClass".  The return value of hashFunction()
00136 ** is a pointer to another function.  Specifically, the return value
00137 ** of hashFunction() is a pointer to a function that takes two parameters
00138 ** with types "const void*" and "int" and returns an "int".
00139 */
00140 static int (*hashFunction(int keyClass))(const void*,int){
00141   if( keyClass==FTS2_HASH_STRING ){
00142     return &strHash;
00143   }else{
00144     assert( keyClass==FTS2_HASH_BINARY );
00145     return &binHash;
00146   }
00147 }
00148 
00149 /*
00150 ** Return a pointer to the appropriate hash function given the key class.
00151 **
00152 ** For help in interpreted the obscure C code in the function definition,
00153 ** see the header comment on the previous function.
00154 */
00155 static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
00156   if( keyClass==FTS2_HASH_STRING ){
00157     return &strCompare;
00158   }else{
00159     assert( keyClass==FTS2_HASH_BINARY );
00160     return &binCompare;
00161   }
00162 }
00163 
00164 /* Link an element into the hash table
00165 */
00166 static void insertElement(
00167   fts2Hash *pH,            /* The complete hash table */
00168   struct _fts2ht *pEntry,  /* The entry into which pNew is inserted */
00169   fts2HashElem *pNew       /* The element to be inserted */
00170 ){
00171   fts2HashElem *pHead;     /* First element already in pEntry */
00172   pHead = pEntry->chain;
00173   if( pHead ){
00174     pNew->next = pHead;
00175     pNew->prev = pHead->prev;
00176     if( pHead->prev ){ pHead->prev->next = pNew; }
00177     else             { pH->first = pNew; }
00178     pHead->prev = pNew;
00179   }else{
00180     pNew->next = pH->first;
00181     if( pH->first ){ pH->first->prev = pNew; }
00182     pNew->prev = 0;
00183     pH->first = pNew;
00184   }
00185   pEntry->count++;
00186   pEntry->chain = pNew;
00187 }
00188 
00189 
00190 /* Resize the hash table so that it cantains "new_size" buckets.
00191 ** "new_size" must be a power of 2.  The hash table might fail 
00192 ** to resize if sqliteMalloc() fails.
00193 */
00194 static void rehash(fts2Hash *pH, int new_size){
00195   struct _fts2ht *new_ht;          /* The new hash table */
00196   fts2HashElem *elem, *next_elem;  /* For looping over existing elements */
00197   int (*xHash)(const void*,int);   /* The hash function */
00198 
00199   assert( (new_size & (new_size-1))==0 );
00200   new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
00201   if( new_ht==0 ) return;
00202   fts2HashFree(pH->ht);
00203   pH->ht = new_ht;
00204   pH->htsize = new_size;
00205   xHash = hashFunction(pH->keyClass);
00206   for(elem=pH->first, pH->first=0; elem; elem = next_elem){
00207     int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
00208     next_elem = elem->next;
00209     insertElement(pH, &new_ht[h], elem);
00210   }
00211 }
00212 
00213 /* This function (for internal use only) locates an element in an
00214 ** hash table that matches the given key.  The hash for this key has
00215 ** already been computed and is passed as the 4th parameter.
00216 */
00217 static fts2HashElem *findElementGivenHash(
00218   const fts2Hash *pH, /* The pH to be searched */
00219   const void *pKey,   /* The key we are searching for */
00220   int nKey,
00221   int h               /* The hash for this key. */
00222 ){
00223   fts2HashElem *elem;            /* Used to loop thru the element list */
00224   int count;                     /* Number of elements left to test */
00225   int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
00226 
00227   if( pH->ht ){
00228     struct _fts2ht *pEntry = &pH->ht[h];
00229     elem = pEntry->chain;
00230     count = pEntry->count;
00231     xCompare = compareFunction(pH->keyClass);
00232     while( count-- && elem ){
00233       if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 
00234         return elem;
00235       }
00236       elem = elem->next;
00237     }
00238   }
00239   return 0;
00240 }
00241 
00242 /* Remove a single entry from the hash table given a pointer to that
00243 ** element and a hash on the element's key.
00244 */
00245 static void removeElementGivenHash(
00246   fts2Hash *pH,         /* The pH containing "elem" */
00247   fts2HashElem* elem,   /* The element to be removed from the pH */
00248   int h                 /* Hash value for the element */
00249 ){
00250   struct _fts2ht *pEntry;
00251   if( elem->prev ){
00252     elem->prev->next = elem->next; 
00253   }else{
00254     pH->first = elem->next;
00255   }
00256   if( elem->next ){
00257     elem->next->prev = elem->prev;
00258   }
00259   pEntry = &pH->ht[h];
00260   if( pEntry->chain==elem ){
00261     pEntry->chain = elem->next;
00262   }
00263   pEntry->count--;
00264   if( pEntry->count<=0 ){
00265     pEntry->chain = 0;
00266   }
00267   if( pH->copyKey && elem->pKey ){
00268     fts2HashFree(elem->pKey);
00269   }
00270   fts2HashFree( elem );
00271   pH->count--;
00272   if( pH->count<=0 ){
00273     assert( pH->first==0 );
00274     assert( pH->count==0 );
00275     fts2HashClear(pH);
00276   }
00277 }
00278 
00279 /* Attempt to locate an element of the hash table pH with a key
00280 ** that matches pKey,nKey.  Return the data for this element if it is
00281 ** found, or NULL if there is no match.
00282 */
00283 void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
00284   int h;                 /* A hash on key */
00285   fts2HashElem *elem;    /* The element that matches key */
00286   int (*xHash)(const void*,int);  /* The hash function */
00287 
00288   if( pH==0 || pH->ht==0 ) return 0;
00289   xHash = hashFunction(pH->keyClass);
00290   assert( xHash!=0 );
00291   h = (*xHash)(pKey,nKey);
00292   assert( (pH->htsize & (pH->htsize-1))==0 );
00293   elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
00294   return elem ? elem->data : 0;
00295 }
00296 
00297 /* Insert an element into the hash table pH.  The key is pKey,nKey
00298 ** and the data is "data".
00299 **
00300 ** If no element exists with a matching key, then a new
00301 ** element is created.  A copy of the key is made if the copyKey
00302 ** flag is set.  NULL is returned.
00303 **
00304 ** If another element already exists with the same key, then the
00305 ** new data replaces the old data and the old data is returned.
00306 ** The key is not copied in this instance.  If a malloc fails, then
00307 ** the new data is returned and the hash table is unchanged.
00308 **
00309 ** If the "data" parameter to this function is NULL, then the
00310 ** element corresponding to "key" is removed from the hash table.
00311 */
00312 void *sqlite3Fts2HashInsert(
00313   fts2Hash *pH,        /* The hash table to insert into */
00314   const void *pKey,    /* The key */
00315   int nKey,            /* Number of bytes in the key */
00316   void *data           /* The data */
00317 ){
00318   int hraw;                 /* Raw hash value of the key */
00319   int h;                    /* the hash of the key modulo hash table size */
00320   fts2HashElem *elem;       /* Used to loop thru the element list */
00321   fts2HashElem *new_elem;   /* New element added to the pH */
00322   int (*xHash)(const void*,int);  /* The hash function */
00323 
00324   assert( pH!=0 );
00325   xHash = hashFunction(pH->keyClass);
00326   assert( xHash!=0 );
00327   hraw = (*xHash)(pKey, nKey);
00328   assert( (pH->htsize & (pH->htsize-1))==0 );
00329   h = hraw & (pH->htsize-1);
00330   elem = findElementGivenHash(pH,pKey,nKey,h);
00331   if( elem ){
00332     void *old_data = elem->data;
00333     if( data==0 ){
00334       removeElementGivenHash(pH,elem,h);
00335     }else{
00336       elem->data = data;
00337     }
00338     return old_data;
00339   }
00340   if( data==0 ) return 0;
00341   new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
00342   if( new_elem==0 ) return data;
00343   if( pH->copyKey && pKey!=0 ){
00344     new_elem->pKey = fts2HashMalloc( nKey );
00345     if( new_elem->pKey==0 ){
00346       fts2HashFree(new_elem);
00347       return data;
00348     }
00349     memcpy((void*)new_elem->pKey, pKey, nKey);
00350   }else{
00351     new_elem->pKey = (void*)pKey;
00352   }
00353   new_elem->nKey = nKey;
00354   pH->count++;
00355   if( pH->htsize==0 ){
00356     rehash(pH,8);
00357     if( pH->htsize==0 ){
00358       pH->count = 0;
00359       fts2HashFree(new_elem);
00360       return data;
00361     }
00362   }
00363   if( pH->count > pH->htsize ){
00364     rehash(pH,pH->htsize*2);
00365   }
00366   assert( pH->htsize>0 );
00367   assert( (pH->htsize & (pH->htsize-1))==0 );
00368   h = hraw & (pH->htsize-1);
00369   insertElement(pH, &pH->ht[h], new_elem);
00370   new_elem->data = data;
00371   return 0;
00372 }
00373 
00374 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:53 2011 by Doxygen 1.6.1