00001 /* 00002 ** 2007 June 22 00003 ** 00004 ** The author disclaims copyright to this source code. In place of 00005 ** a legal notice, here is a blessing: 00006 ** 00007 ** May you do good and not evil. 00008 ** May you find forgiveness for yourself and forgive others. 00009 ** May you share freely, never taking more than you give. 00010 ** 00011 ****************************************************************************** 00012 ** 00013 ** This is part of an SQLite module implementing full-text search. 00014 ** This particular file implements the generic tokenizer interface. 00015 */ 00016 00017 /* 00018 ** The code in this file is only compiled if: 00019 ** 00020 ** * The FTS2 module is being built as an extension 00021 ** (in which case SQLITE_CORE is not defined), or 00022 ** 00023 ** * The FTS2 module is being built into the core of 00024 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). 00025 */ 00026 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) 00027 00028 00029 #include "sqlite3.h" 00030 #include "sqlite3ext.h" 00031 SQLITE_EXTENSION_INIT1 00032 00033 #include "fts2_hash.h" 00034 #include "fts2_tokenizer.h" 00035 #include <assert.h> 00036 00037 /* 00038 ** Implementation of the SQL scalar function for accessing the underlying 00039 ** hash table. This function may be called as follows: 00040 ** 00041 ** SELECT <function-name>(<key-name>); 00042 ** SELECT <function-name>(<key-name>, <pointer>); 00043 ** 00044 ** where <function-name> is the name passed as the second argument 00045 ** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer'). 00046 ** 00047 ** If the <pointer> argument is specified, it must be a blob value 00048 ** containing a pointer to be stored as the hash data corresponding 00049 ** to the string <key-name>. If <pointer> is not specified, then 00050 ** the string <key-name> must already exist in the has table. Otherwise, 00051 ** an error is returned. 00052 ** 00053 ** Whether or not the <pointer> argument is specified, the value returned 00054 ** is a blob containing the pointer stored as the hash data corresponding 00055 ** to string <key-name> (after the hash-table is updated, if applicable). 00056 */ 00057 static void scalarFunc( 00058 sqlite3_context *context, 00059 int argc, 00060 sqlite3_value **argv 00061 ){ 00062 fts2Hash *pHash; 00063 void *pPtr = 0; 00064 const unsigned char *zName; 00065 int nName; 00066 00067 assert( argc==1 || argc==2 ); 00068 00069 pHash = (fts2Hash *)sqlite3_user_data(context); 00070 00071 zName = sqlite3_value_text(argv[0]); 00072 nName = sqlite3_value_bytes(argv[0])+1; 00073 00074 if( argc==2 ){ 00075 void *pOld; 00076 int n = sqlite3_value_bytes(argv[1]); 00077 if( n!=sizeof(pPtr) ){ 00078 sqlite3_result_error(context, "argument type mismatch", -1); 00079 return; 00080 } 00081 pPtr = *(void **)sqlite3_value_blob(argv[1]); 00082 pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr); 00083 if( pOld==pPtr ){ 00084 sqlite3_result_error(context, "out of memory", -1); 00085 return; 00086 } 00087 }else{ 00088 pPtr = sqlite3Fts2HashFind(pHash, zName, nName); 00089 if( !pPtr ){ 00090 char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); 00091 sqlite3_result_error(context, zErr, -1); 00092 sqlite3_free(zErr); 00093 return; 00094 } 00095 } 00096 00097 sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); 00098 } 00099 00100 #ifdef SQLITE_TEST 00101 00102 #include <tcl.h> 00103 #include <string.h> 00104 00105 /* 00106 ** Implementation of a special SQL scalar function for testing tokenizers 00107 ** designed to be used in concert with the Tcl testing framework. This 00108 ** function must be called with two arguments: 00109 ** 00110 ** SELECT <function-name>(<key-name>, <input-string>); 00111 ** SELECT <function-name>(<key-name>, <pointer>); 00112 ** 00113 ** where <function-name> is the name passed as the second argument 00114 ** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer') 00115 ** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test'). 00116 ** 00117 ** The return value is a string that may be interpreted as a Tcl 00118 ** list. For each token in the <input-string>, three elements are 00119 ** added to the returned list. The first is the token position, the 00120 ** second is the token text (folded, stemmed, etc.) and the third is the 00121 ** substring of <input-string> associated with the token. For example, 00122 ** using the built-in "simple" tokenizer: 00123 ** 00124 ** SELECT fts_tokenizer_test('simple', 'I don't see how'); 00125 ** 00126 ** will return the string: 00127 ** 00128 ** "{0 i I 1 dont don't 2 see see 3 how how}" 00129 ** 00130 */ 00131 static void testFunc( 00132 sqlite3_context *context, 00133 int argc, 00134 sqlite3_value **argv 00135 ){ 00136 fts2Hash *pHash; 00137 sqlite3_tokenizer_module *p; 00138 sqlite3_tokenizer *pTokenizer = 0; 00139 sqlite3_tokenizer_cursor *pCsr = 0; 00140 00141 const char *zErr = 0; 00142 00143 const char *zName; 00144 int nName; 00145 const char *zInput; 00146 int nInput; 00147 00148 const char *zArg = 0; 00149 00150 const char *zToken; 00151 int nToken; 00152 int iStart; 00153 int iEnd; 00154 int iPos; 00155 00156 Tcl_Obj *pRet; 00157 00158 assert( argc==2 || argc==3 ); 00159 00160 nName = sqlite3_value_bytes(argv[0]); 00161 zName = (const char *)sqlite3_value_text(argv[0]); 00162 nInput = sqlite3_value_bytes(argv[argc-1]); 00163 zInput = (const char *)sqlite3_value_text(argv[argc-1]); 00164 00165 if( argc==3 ){ 00166 zArg = (const char *)sqlite3_value_text(argv[1]); 00167 } 00168 00169 pHash = (fts2Hash *)sqlite3_user_data(context); 00170 p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1); 00171 00172 if( !p ){ 00173 char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); 00174 sqlite3_result_error(context, zErr, -1); 00175 sqlite3_free(zErr); 00176 return; 00177 } 00178 00179 pRet = Tcl_NewObj(); 00180 Tcl_IncrRefCount(pRet); 00181 00182 if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ 00183 zErr = "error in xCreate()"; 00184 goto finish; 00185 } 00186 pTokenizer->pModule = p; 00187 if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ 00188 zErr = "error in xOpen()"; 00189 goto finish; 00190 } 00191 pCsr->pTokenizer = pTokenizer; 00192 00193 while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ 00194 Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); 00195 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); 00196 zToken = &zInput[iStart]; 00197 nToken = iEnd-iStart; 00198 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); 00199 } 00200 00201 if( SQLITE_OK!=p->xClose(pCsr) ){ 00202 zErr = "error in xClose()"; 00203 goto finish; 00204 } 00205 if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ 00206 zErr = "error in xDestroy()"; 00207 goto finish; 00208 } 00209 00210 finish: 00211 if( zErr ){ 00212 sqlite3_result_error(context, zErr, -1); 00213 }else{ 00214 sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); 00215 } 00216 Tcl_DecrRefCount(pRet); 00217 } 00218 00219 static 00220 int registerTokenizer( 00221 sqlite3 *db, 00222 char *zName, 00223 const sqlite3_tokenizer_module *p 00224 ){ 00225 int rc; 00226 sqlite3_stmt *pStmt; 00227 const char zSql[] = "SELECT fts2_tokenizer(?, ?)"; 00228 00229 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 00230 if( rc!=SQLITE_OK ){ 00231 return rc; 00232 } 00233 00234 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 00235 sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); 00236 sqlite3_step(pStmt); 00237 00238 return sqlite3_finalize(pStmt); 00239 } 00240 00241 static 00242 int queryTokenizer( 00243 sqlite3 *db, 00244 char *zName, 00245 const sqlite3_tokenizer_module **pp 00246 ){ 00247 int rc; 00248 sqlite3_stmt *pStmt; 00249 const char zSql[] = "SELECT fts2_tokenizer(?)"; 00250 00251 *pp = 0; 00252 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 00253 if( rc!=SQLITE_OK ){ 00254 return rc; 00255 } 00256 00257 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 00258 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 00259 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ 00260 memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); 00261 } 00262 } 00263 00264 return sqlite3_finalize(pStmt); 00265 } 00266 00267 void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); 00268 00269 /* 00270 ** Implementation of the scalar function fts2_tokenizer_internal_test(). 00271 ** This function is used for testing only, it is not included in the 00272 ** build unless SQLITE_TEST is defined. 00273 ** 00274 ** The purpose of this is to test that the fts2_tokenizer() function 00275 ** can be used as designed by the C-code in the queryTokenizer and 00276 ** registerTokenizer() functions above. These two functions are repeated 00277 ** in the README.tokenizer file as an example, so it is important to 00278 ** test them. 00279 ** 00280 ** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar 00281 ** function with no arguments. An assert() will fail if a problem is 00282 ** detected. i.e.: 00283 ** 00284 ** SELECT fts2_tokenizer_internal_test(); 00285 ** 00286 */ 00287 static void intTestFunc( 00288 sqlite3_context *context, 00289 int argc, 00290 sqlite3_value **argv 00291 ){ 00292 int rc; 00293 const sqlite3_tokenizer_module *p1; 00294 const sqlite3_tokenizer_module *p2; 00295 sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); 00296 00297 /* Test the query function */ 00298 sqlite3Fts2SimpleTokenizerModule(&p1); 00299 rc = queryTokenizer(db, "simple", &p2); 00300 assert( rc==SQLITE_OK ); 00301 assert( p1==p2 ); 00302 rc = queryTokenizer(db, "nosuchtokenizer", &p2); 00303 assert( rc==SQLITE_ERROR ); 00304 assert( p2==0 ); 00305 assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); 00306 00307 /* Test the storage function */ 00308 rc = registerTokenizer(db, "nosuchtokenizer", p1); 00309 assert( rc==SQLITE_OK ); 00310 rc = queryTokenizer(db, "nosuchtokenizer", &p2); 00311 assert( rc==SQLITE_OK ); 00312 assert( p2==p1 ); 00313 00314 sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); 00315 } 00316 00317 #endif 00318 00319 /* 00320 ** Set up SQL objects in database db used to access the contents of 00321 ** the hash table pointed to by argument pHash. The hash table must 00322 ** been initialised to use string keys, and to take a private copy 00323 ** of the key when a value is inserted. i.e. by a call similar to: 00324 ** 00325 ** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); 00326 ** 00327 ** This function adds a scalar function (see header comment above 00328 ** scalarFunc() in this file for details) and, if ENABLE_TABLE is 00329 ** defined at compilation time, a temporary virtual table (see header 00330 ** comment above struct HashTableVtab) to the database schema. Both 00331 ** provide read/write access to the contents of *pHash. 00332 ** 00333 ** The third argument to this function, zName, is used as the name 00334 ** of both the scalar and, if created, the virtual table. 00335 */ 00336 int sqlite3Fts2InitHashTable( 00337 sqlite3 *db, 00338 fts2Hash *pHash, 00339 const char *zName 00340 ){ 00341 int rc = SQLITE_OK; 00342 void *p = (void *)pHash; 00343 const int any = SQLITE_ANY; 00344 char *zTest = 0; 00345 char *zTest2 = 0; 00346 00347 #ifdef SQLITE_TEST 00348 void *pdb = (void *)db; 00349 zTest = sqlite3_mprintf("%s_test", zName); 00350 zTest2 = sqlite3_mprintf("%s_internal_test", zName); 00351 if( !zTest || !zTest2 ){ 00352 rc = SQLITE_NOMEM; 00353 } 00354 #endif 00355 00356 if( rc!=SQLITE_OK 00357 || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) 00358 || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) 00359 #ifdef SQLITE_TEST 00360 || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) 00361 || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0)) 00362 || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0)) 00363 #endif 00364 ); 00365 00366 sqlite3_free(zTest); 00367 sqlite3_free(zTest2); 00368 return rc; 00369 } 00370 00371 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:53 2011 by Doxygen 1.6.1