00001 /* 00002 ** 2001 September 15 00003 ** 00004 ** The author disclaims copyright to this source code. In place of 00005 ** a legal notice, here is a blessing: 00006 ** 00007 ** May you do good and not evil. 00008 ** May you find forgiveness for yourself and forgive others. 00009 ** May you share freely, never taking more than you give. 00010 ** 00011 ************************************************************************* 00012 ** An tokenizer for SQL 00013 ** 00014 ** This file contains C code that splits an SQL input string up into 00015 ** individual tokens and sends those tokens one-by-one over to the 00016 ** parser for analysis. 00017 ** 00018 ** $Id: tokenize.c,v 1.152 2008/09/01 15:52:11 drh Exp $ 00019 */ 00020 #include "sqliteInt.h" 00021 #include <ctype.h> 00022 #include <stdlib.h> 00023 00024 /* 00025 ** The charMap() macro maps alphabetic characters into their 00026 ** lower-case ASCII equivalent. On ASCII machines, this is just 00027 ** an upper-to-lower case map. On EBCDIC machines we also need 00028 ** to adjust the encoding. Only alphabetic characters and underscores 00029 ** need to be translated. 00030 */ 00031 #ifdef SQLITE_ASCII 00032 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] 00033 #endif 00034 #ifdef SQLITE_EBCDIC 00035 # define charMap(X) ebcdicToAscii[(unsigned char)X] 00036 const unsigned char ebcdicToAscii[] = { 00037 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 00038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 00039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 00040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 00041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 00042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 00043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 00044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 00045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 00046 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 00047 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 00048 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 00049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 00050 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 00051 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 00052 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 00053 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 00054 }; 00055 #endif 00056 00057 /* 00058 ** The sqlite3KeywordCode function looks up an identifier to determine if 00059 ** it is a keyword. If it is a keyword, the token code of that keyword is 00060 ** returned. If the input is not a keyword, TK_ID is returned. 00061 ** 00062 ** The implementation of this routine was generated by a program, 00063 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 00064 ** The output of the mkkeywordhash.c program is written into a file 00065 ** named keywordhash.h and then included into this source file by 00066 ** the #include below. 00067 */ 00068 #include "keywordhash.h" 00069 00070 00071 /* 00072 ** If X is a character that can be used in an identifier then 00073 ** IdChar(X) will be true. Otherwise it is false. 00074 ** 00075 ** For ASCII, any character with the high-order bit set is 00076 ** allowed in an identifier. For 7-bit characters, 00077 ** sqlite3IsIdChar[X] must be 1. 00078 ** 00079 ** For EBCDIC, the rules are more complex but have the same 00080 ** end result. 00081 ** 00082 ** Ticket #1066. the SQL standard does not allow '$' in the 00083 ** middle of identfiers. But many SQL implementations do. 00084 ** SQLite will allow '$' in identifiers for compatibility. 00085 ** But the feature is undocumented. 00086 */ 00087 #ifdef SQLITE_ASCII 00088 const char sqlite3IsAsciiIdChar[] = { 00089 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 00090 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 00091 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 00092 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 00093 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 00094 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 00095 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 00096 }; 00097 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20])) 00098 #endif 00099 #ifdef SQLITE_EBCDIC 00100 const char sqlite3IsEbcdicIdChar[] = { 00101 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 00102 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 00103 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 00104 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 00105 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 00106 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 00107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 00108 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 00109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 00110 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 00111 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 00112 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 00113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 00114 }; 00115 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 00116 #endif 00117 00118 00119 /* 00120 ** Return the length of the token that begins at z[0]. 00121 ** Store the token type in *tokenType before returning. 00122 */ 00123 int sqlite3GetToken(const unsigned char *z, int *tokenType){ 00124 int i, c; 00125 switch( *z ){ 00126 case ' ': case '\t': case '\n': case '\f': case '\r': { 00127 for(i=1; isspace(z[i]); i++){} 00128 *tokenType = TK_SPACE; 00129 return i; 00130 } 00131 case '-': { 00132 if( z[1]=='-' ){ 00133 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 00134 *tokenType = TK_SPACE; 00135 return i; 00136 } 00137 *tokenType = TK_MINUS; 00138 return 1; 00139 } 00140 case '(': { 00141 *tokenType = TK_LP; 00142 return 1; 00143 } 00144 case ')': { 00145 *tokenType = TK_RP; 00146 return 1; 00147 } 00148 case ';': { 00149 *tokenType = TK_SEMI; 00150 return 1; 00151 } 00152 case '+': { 00153 *tokenType = TK_PLUS; 00154 return 1; 00155 } 00156 case '*': { 00157 *tokenType = TK_STAR; 00158 return 1; 00159 } 00160 case '/': { 00161 if( z[1]!='*' || z[2]==0 ){ 00162 *tokenType = TK_SLASH; 00163 return 1; 00164 } 00165 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 00166 if( c ) i++; 00167 *tokenType = TK_SPACE; 00168 return i; 00169 } 00170 case '%': { 00171 *tokenType = TK_REM; 00172 return 1; 00173 } 00174 case '=': { 00175 *tokenType = TK_EQ; 00176 return 1 + (z[1]=='='); 00177 } 00178 case '<': { 00179 if( (c=z[1])=='=' ){ 00180 *tokenType = TK_LE; 00181 return 2; 00182 }else if( c=='>' ){ 00183 *tokenType = TK_NE; 00184 return 2; 00185 }else if( c=='<' ){ 00186 *tokenType = TK_LSHIFT; 00187 return 2; 00188 }else{ 00189 *tokenType = TK_LT; 00190 return 1; 00191 } 00192 } 00193 case '>': { 00194 if( (c=z[1])=='=' ){ 00195 *tokenType = TK_GE; 00196 return 2; 00197 }else if( c=='>' ){ 00198 *tokenType = TK_RSHIFT; 00199 return 2; 00200 }else{ 00201 *tokenType = TK_GT; 00202 return 1; 00203 } 00204 } 00205 case '!': { 00206 if( z[1]!='=' ){ 00207 *tokenType = TK_ILLEGAL; 00208 return 2; 00209 }else{ 00210 *tokenType = TK_NE; 00211 return 2; 00212 } 00213 } 00214 case '|': { 00215 if( z[1]!='|' ){ 00216 *tokenType = TK_BITOR; 00217 return 1; 00218 }else{ 00219 *tokenType = TK_CONCAT; 00220 return 2; 00221 } 00222 } 00223 case ',': { 00224 *tokenType = TK_COMMA; 00225 return 1; 00226 } 00227 case '&': { 00228 *tokenType = TK_BITAND; 00229 return 1; 00230 } 00231 case '~': { 00232 *tokenType = TK_BITNOT; 00233 return 1; 00234 } 00235 case '`': 00236 case '\'': 00237 case '"': { 00238 int delim = z[0]; 00239 for(i=1; (c=z[i])!=0; i++){ 00240 if( c==delim ){ 00241 if( z[i+1]==delim ){ 00242 i++; 00243 }else{ 00244 break; 00245 } 00246 } 00247 } 00248 if( c=='\'' ){ 00249 *tokenType = TK_STRING; 00250 return i+1; 00251 }else if( c!=0 ){ 00252 *tokenType = TK_ID; 00253 return i+1; 00254 }else{ 00255 *tokenType = TK_ILLEGAL; 00256 return i; 00257 } 00258 } 00259 case '.': { 00260 #ifndef SQLITE_OMIT_FLOATING_POINT 00261 if( !isdigit(z[1]) ) 00262 #endif 00263 { 00264 *tokenType = TK_DOT; 00265 return 1; 00266 } 00267 /* If the next character is a digit, this is a floating point 00268 ** number that begins with ".". Fall thru into the next case */ 00269 } 00270 case '0': case '1': case '2': case '3': case '4': 00271 case '5': case '6': case '7': case '8': case '9': { 00272 *tokenType = TK_INTEGER; 00273 for(i=0; isdigit(z[i]); i++){} 00274 #ifndef SQLITE_OMIT_FLOATING_POINT 00275 if( z[i]=='.' ){ 00276 i++; 00277 while( isdigit(z[i]) ){ i++; } 00278 *tokenType = TK_FLOAT; 00279 } 00280 if( (z[i]=='e' || z[i]=='E') && 00281 ( isdigit(z[i+1]) 00282 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) 00283 ) 00284 ){ 00285 i += 2; 00286 while( isdigit(z[i]) ){ i++; } 00287 *tokenType = TK_FLOAT; 00288 } 00289 #endif 00290 while( IdChar(z[i]) ){ 00291 *tokenType = TK_ILLEGAL; 00292 i++; 00293 } 00294 return i; 00295 } 00296 case '[': { 00297 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 00298 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 00299 return i; 00300 } 00301 case '?': { 00302 *tokenType = TK_VARIABLE; 00303 for(i=1; isdigit(z[i]); i++){} 00304 return i; 00305 } 00306 case '#': { 00307 for(i=1; isdigit(z[i]); i++){} 00308 if( i>1 ){ 00309 /* Parameters of the form #NNN (where NNN is a number) are used 00310 ** internally by sqlite3NestedParse. */ 00311 *tokenType = TK_REGISTER; 00312 return i; 00313 } 00314 /* Fall through into the next case if the '#' is not followed by 00315 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 00316 } 00317 #ifndef SQLITE_OMIT_TCL_VARIABLE 00318 case '$': 00319 #endif 00320 case '@': /* For compatibility with MS SQL Server */ 00321 case ':': { 00322 int n = 0; 00323 *tokenType = TK_VARIABLE; 00324 for(i=1; (c=z[i])!=0; i++){ 00325 if( IdChar(c) ){ 00326 n++; 00327 #ifndef SQLITE_OMIT_TCL_VARIABLE 00328 }else if( c=='(' && n>0 ){ 00329 do{ 00330 i++; 00331 }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); 00332 if( c==')' ){ 00333 i++; 00334 }else{ 00335 *tokenType = TK_ILLEGAL; 00336 } 00337 break; 00338 }else if( c==':' && z[i+1]==':' ){ 00339 i++; 00340 #endif 00341 }else{ 00342 break; 00343 } 00344 } 00345 if( n==0 ) *tokenType = TK_ILLEGAL; 00346 return i; 00347 } 00348 #ifndef SQLITE_OMIT_BLOB_LITERAL 00349 case 'x': case 'X': { 00350 if( z[1]=='\'' ){ 00351 *tokenType = TK_BLOB; 00352 for(i=2; (c=z[i])!=0 && c!='\''; i++){ 00353 if( !isxdigit(c) ){ 00354 *tokenType = TK_ILLEGAL; 00355 } 00356 } 00357 if( i%2 || !c ) *tokenType = TK_ILLEGAL; 00358 if( c ) i++; 00359 return i; 00360 } 00361 /* Otherwise fall through to the next case */ 00362 } 00363 #endif 00364 default: { 00365 if( !IdChar(*z) ){ 00366 break; 00367 } 00368 for(i=1; IdChar(z[i]); i++){} 00369 *tokenType = keywordCode((char*)z, i); 00370 return i; 00371 } 00372 } 00373 *tokenType = TK_ILLEGAL; 00374 return 1; 00375 } 00376 00377 /* 00378 ** Run the parser on the given SQL string. The parser structure is 00379 ** passed in. An SQLITE_ status code is returned. If an error occurs 00380 ** then an and attempt is made to write an error message into 00381 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that 00382 ** error message. 00383 */ 00384 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 00385 int nErr = 0; 00386 int i; 00387 void *pEngine; 00388 int tokenType; 00389 int lastTokenParsed = -1; 00390 sqlite3 *db = pParse->db; 00391 int mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; 00392 00393 if( db->activeVdbeCnt==0 ){ 00394 db->u1.isInterrupted = 0; 00395 } 00396 pParse->rc = SQLITE_OK; 00397 pParse->zTail = pParse->zSql = zSql; 00398 i = 0; 00399 assert( pzErrMsg!=0 ); 00400 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc); 00401 if( pEngine==0 ){ 00402 db->mallocFailed = 1; 00403 return SQLITE_NOMEM; 00404 } 00405 assert( pParse->sLastToken.dyn==0 ); 00406 assert( pParse->pNewTable==0 ); 00407 assert( pParse->pNewTrigger==0 ); 00408 assert( pParse->nVar==0 ); 00409 assert( pParse->nVarExpr==0 ); 00410 assert( pParse->nVarExprAlloc==0 ); 00411 assert( pParse->apVarExpr==0 ); 00412 while( !db->mallocFailed && zSql[i]!=0 ){ 00413 assert( i>=0 ); 00414 pParse->sLastToken.z = (u8*)&zSql[i]; 00415 assert( pParse->sLastToken.dyn==0 ); 00416 pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); 00417 i += pParse->sLastToken.n; 00418 if( i>mxSqlLen ){ 00419 pParse->rc = SQLITE_TOOBIG; 00420 break; 00421 } 00422 switch( tokenType ){ 00423 case TK_SPACE: { 00424 if( db->u1.isInterrupted ){ 00425 pParse->rc = SQLITE_INTERRUPT; 00426 sqlite3SetString(pzErrMsg, db, "interrupt"); 00427 goto abort_parse; 00428 } 00429 break; 00430 } 00431 case TK_ILLEGAL: { 00432 sqlite3DbFree(db, *pzErrMsg); 00433 *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", 00434 &pParse->sLastToken); 00435 nErr++; 00436 goto abort_parse; 00437 } 00438 case TK_SEMI: { 00439 pParse->zTail = &zSql[i]; 00440 /* Fall thru into the default case */ 00441 } 00442 default: { 00443 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 00444 lastTokenParsed = tokenType; 00445 if( pParse->rc!=SQLITE_OK ){ 00446 goto abort_parse; 00447 } 00448 break; 00449 } 00450 } 00451 } 00452 abort_parse: 00453 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 00454 if( lastTokenParsed!=TK_SEMI ){ 00455 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 00456 pParse->zTail = &zSql[i]; 00457 } 00458 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 00459 } 00460 #ifdef YYTRACKMAXSTACKDEPTH 00461 sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, 00462 sqlite3ParserStackPeak(pEngine) 00463 ); 00464 #endif /* YYDEBUG */ 00465 sqlite3ParserFree(pEngine, sqlite3_free); 00466 if( db->mallocFailed ){ 00467 pParse->rc = SQLITE_NOMEM; 00468 } 00469 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 00470 sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); 00471 } 00472 if( pParse->zErrMsg ){ 00473 if( *pzErrMsg==0 ){ 00474 *pzErrMsg = pParse->zErrMsg; 00475 }else{ 00476 sqlite3DbFree(db, pParse->zErrMsg); 00477 } 00478 pParse->zErrMsg = 0; 00479 nErr++; 00480 } 00481 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 00482 sqlite3VdbeDelete(pParse->pVdbe); 00483 pParse->pVdbe = 0; 00484 } 00485 #ifndef SQLITE_OMIT_SHARED_CACHE 00486 if( pParse->nested==0 ){ 00487 sqlite3DbFree(db, pParse->aTableLock); 00488 pParse->aTableLock = 0; 00489 pParse->nTableLock = 0; 00490 } 00491 #endif 00492 #ifndef SQLITE_OMIT_VIRTUALTABLE 00493 sqlite3DbFree(db, pParse->apVtabLock); 00494 #endif 00495 00496 if( !IN_DECLARE_VTAB ){ 00497 /* If the pParse->declareVtab flag is set, do not delete any table 00498 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) 00499 ** will take responsibility for freeing the Table structure. 00500 */ 00501 sqlite3DeleteTable(pParse->pNewTable); 00502 } 00503 00504 sqlite3DeleteTrigger(db, pParse->pNewTrigger); 00505 sqlite3DbFree(db, pParse->apVarExpr); 00506 sqlite3DbFree(db, pParse->aAlias); 00507 while( pParse->pZombieTab ){ 00508 Table *p = pParse->pZombieTab; 00509 pParse->pZombieTab = p->pNextZombie; 00510 sqlite3DeleteTable(p); 00511 } 00512 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ 00513 pParse->rc = SQLITE_ERROR; 00514 } 00515 return nErr; 00516 }
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:56 2011 by Doxygen 1.6.1