00001 /* 00002 ** 2006 July 10 00003 ** 00004 ** The author disclaims copyright to this source code. 00005 ** 00006 ************************************************************************* 00007 ** Defines the interface to tokenizers used by fulltext-search. There 00008 ** are three basic components: 00009 ** 00010 ** sqlite3_tokenizer_module is a singleton defining the tokenizer 00011 ** interface functions. This is essentially the class structure for 00012 ** tokenizers. 00013 ** 00014 ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps 00015 ** including customization information defined at creation time. 00016 ** 00017 ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate 00018 ** tokens from a particular input. 00019 */ 00020 #ifndef _FTS1_TOKENIZER_H_ 00021 #define _FTS1_TOKENIZER_H_ 00022 00023 /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. 00024 ** If tokenizers are to be allowed to call sqlite3_*() functions, then 00025 ** we will need a way to register the API consistently. 00026 */ 00027 #include "sqlite3.h" 00028 00029 /* 00030 ** Structures used by the tokenizer interface. 00031 */ 00032 typedef struct sqlite3_tokenizer sqlite3_tokenizer; 00033 typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; 00034 typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; 00035 00036 struct sqlite3_tokenizer_module { 00037 int iVersion; /* currently 0 */ 00038 00039 /* 00040 ** Create and destroy a tokenizer. argc/argv are passed down from 00041 ** the fulltext virtual table creation to allow customization. 00042 */ 00043 int (*xCreate)(int argc, const char *const*argv, 00044 sqlite3_tokenizer **ppTokenizer); 00045 int (*xDestroy)(sqlite3_tokenizer *pTokenizer); 00046 00047 /* 00048 ** Tokenize a particular input. Call xOpen() to prepare to 00049 ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then 00050 ** xClose() to free any internal state. The pInput passed to 00051 ** xOpen() must exist until the cursor is closed. The ppToken 00052 ** result from xNext() is only valid until the next call to xNext() 00053 ** or until xClose() is called. 00054 */ 00055 /* TODO(shess) current implementation requires pInput to be 00056 ** nul-terminated. This should either be fixed, or pInput/nBytes 00057 ** should be converted to zInput. 00058 */ 00059 int (*xOpen)(sqlite3_tokenizer *pTokenizer, 00060 const char *pInput, int nBytes, 00061 sqlite3_tokenizer_cursor **ppCursor); 00062 int (*xClose)(sqlite3_tokenizer_cursor *pCursor); 00063 int (*xNext)(sqlite3_tokenizer_cursor *pCursor, 00064 const char **ppToken, int *pnBytes, 00065 int *piStartOffset, int *piEndOffset, int *piPosition); 00066 }; 00067 00068 struct sqlite3_tokenizer { 00069 const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ 00070 /* Tokenizer implementations will typically add additional fields */ 00071 }; 00072 00073 struct sqlite3_tokenizer_cursor { 00074 sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ 00075 /* Tokenizer implementations will typically add additional fields */ 00076 }; 00077 00078 /* 00079 ** Get the module for a tokenizer which generates tokens based on a 00080 ** set of non-token characters. The default is to break tokens at any 00081 ** non-alnum character, though the set of delimiters can also be 00082 ** specified by the first argv argument to xCreate(). 00083 */ 00084 /* TODO(shess) This doesn't belong here. Need some sort of 00085 ** registration process. 00086 */ 00087 void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); 00088 void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); 00089 00090 #endif /* _FTS1_TOKENIZER_H_ */
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:53 2011 by Doxygen 1.6.1