00001 /* 00002 ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $ 00003 ** Lexical Analyzer 00004 ** See Copyright Notice in lua.h 00005 */ 00006 00007 00008 #include <ctype.h> 00009 #include <locale.h> 00010 #include <string.h> 00011 00012 #define llex_c 00013 #define LUA_CORE 00014 00015 #include "lua.h" 00016 00017 #include "ldo.h" 00018 #include "llex.h" 00019 #include "lobject.h" 00020 #include "lparser.h" 00021 #include "lstate.h" 00022 #include "lstring.h" 00023 #include "ltable.h" 00024 #include "lzio.h" 00025 00026 00027 00028 #define next(ls) (ls->current = zgetc(ls->z)) 00029 00030 00031 00032 00033 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') 00034 00035 00036 /* ORDER RESERVED */ 00037 const char *const luaX_tokens [] = { 00038 "and", "break", "do", "else", "elseif", 00039 "end", "false", "for", "function", "if", 00040 "in", "local", "nil", "not", "or", "repeat", 00041 "return", "then", "true", "until", "while", 00042 "..", "...", "==", ">=", "<=", "~=", 00043 "<number>", "<name>", "<string>", "<eof>", 00044 NULL 00045 }; 00046 00047 00048 #define save_and_next(ls) (save(ls, ls->current), next(ls)) 00049 00050 00051 static void save (LexState *ls, int c) { 00052 Mbuffer *b = ls->buff; 00053 if (b->n + 1 > b->buffsize) { 00054 size_t newsize; 00055 if (b->buffsize >= MAX_SIZET/2) 00056 luaX_lexerror(ls, "lexical element too long", 0); 00057 newsize = b->buffsize * 2; 00058 luaZ_resizebuffer(ls->L, b, newsize); 00059 } 00060 b->buffer[b->n++] = cast(char, c); 00061 } 00062 00063 00064 void luaX_init (lua_State *L) { 00065 int i; 00066 for (i=0; i<NUM_RESERVED; i++) { 00067 TString *ts = luaS_new(L, luaX_tokens[i]); 00068 luaS_fix(ts); /* reserved words are never collected */ 00069 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); 00070 ts->tsv.reserved = cast_byte(i+1); /* reserved word */ 00071 } 00072 } 00073 00074 00075 #define MAXSRC 80 00076 00077 00078 const char *luaX_token2str (LexState *ls, int token) { 00079 if (token < FIRST_RESERVED) { 00080 lua_assert(token == cast(unsigned char, token)); 00081 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : 00082 luaO_pushfstring(ls->L, "%c", token); 00083 } 00084 else 00085 return luaX_tokens[token-FIRST_RESERVED]; 00086 } 00087 00088 00089 static const char *txtToken (LexState *ls, int token) { 00090 switch (token) { 00091 case TK_NAME: 00092 case TK_STRING: 00093 case TK_NUMBER: 00094 save(ls, '\0'); 00095 return luaZ_buffer(ls->buff); 00096 default: 00097 return luaX_token2str(ls, token); 00098 } 00099 } 00100 00101 00102 void luaX_lexerror (LexState *ls, const char *msg, int token) { 00103 char buff[MAXSRC]; 00104 luaO_chunkid(buff, getstr(ls->source), MAXSRC); 00105 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); 00106 if (token) 00107 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); 00108 luaD_throw(ls->L, LUA_ERRSYNTAX); 00109 } 00110 00111 00112 void luaX_syntaxerror (LexState *ls, const char *msg) { 00113 luaX_lexerror(ls, msg, ls->t.token); 00114 } 00115 00116 00117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) { 00118 lua_State *L = ls->L; 00119 TString *ts = luaS_newlstr(L, str, l); 00120 TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ 00121 if (ttisnil(o)) 00122 setbvalue(o, 1); /* make sure `str' will not be collected */ 00123 return ts; 00124 } 00125 00126 00127 static void inclinenumber (LexState *ls) { 00128 int old = ls->current; 00129 lua_assert(currIsNewline(ls)); 00130 next(ls); /* skip `\n' or `\r' */ 00131 if (currIsNewline(ls) && ls->current != old) 00132 next(ls); /* skip `\n\r' or `\r\n' */ 00133 if (++ls->linenumber >= MAX_INT) 00134 luaX_syntaxerror(ls, "chunk has too many lines"); 00135 } 00136 00137 00138 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { 00139 ls->decpoint = '.'; 00140 ls->L = L; 00141 ls->lookahead.token = TK_EOS; /* no look-ahead token */ 00142 ls->z = z; 00143 ls->fs = NULL; 00144 ls->linenumber = 1; 00145 ls->lastline = 1; 00146 ls->source = source; 00147 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ 00148 next(ls); /* read first char */ 00149 } 00150 00151 00152 00153 /* 00154 ** ======================================================= 00155 ** LEXICAL ANALYZER 00156 ** ======================================================= 00157 */ 00158 00159 00160 00161 static int check_next (LexState *ls, const char *set) { 00162 if (!strchr(set, ls->current)) 00163 return 0; 00164 save_and_next(ls); 00165 return 1; 00166 } 00167 00168 00169 static void buffreplace (LexState *ls, char from, char to) { 00170 size_t n = luaZ_bufflen(ls->buff); 00171 char *p = luaZ_buffer(ls->buff); 00172 while (n--) 00173 if (p[n] == from) p[n] = to; 00174 } 00175 00176 00177 static void trydecpoint (LexState *ls, SemInfo *seminfo) { 00178 /* format error: try to update decimal point separator */ 00179 struct lconv *cv = localeconv(); 00180 char old = ls->decpoint; 00181 ls->decpoint = (cv ? cv->decimal_point[0] : '.'); 00182 buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ 00183 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { 00184 /* format error with correct decimal point: no more options */ 00185 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ 00186 luaX_lexerror(ls, "malformed number", TK_NUMBER); 00187 } 00188 } 00189 00190 00191 /* LUA_NUMBER */ 00192 static void read_numeral (LexState *ls, SemInfo *seminfo) { 00193 lua_assert(isdigit(ls->current)); 00194 do { 00195 save_and_next(ls); 00196 } while (isdigit(ls->current) || ls->current == '.'); 00197 if (check_next(ls, "Ee")) /* `E'? */ 00198 check_next(ls, "+-"); /* optional exponent sign */ 00199 while (isalnum(ls->current) || ls->current == '_') 00200 save_and_next(ls); 00201 save(ls, '\0'); 00202 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ 00203 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ 00204 trydecpoint(ls, seminfo); /* try to update decimal point separator */ 00205 } 00206 00207 00208 static int skip_sep (LexState *ls) { 00209 int count = 0; 00210 int s = ls->current; 00211 lua_assert(s == '[' || s == ']'); 00212 save_and_next(ls); 00213 while (ls->current == '=') { 00214 save_and_next(ls); 00215 count++; 00216 } 00217 return (ls->current == s) ? count : (-count) - 1; 00218 } 00219 00220 00221 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { 00222 int cont = 0; 00223 (void)(cont); /* avoid warnings when `cont' is not used */ 00224 save_and_next(ls); /* skip 2nd `[' */ 00225 if (currIsNewline(ls)) /* string starts with a newline? */ 00226 inclinenumber(ls); /* skip it */ 00227 for (;;) { 00228 switch (ls->current) { 00229 case EOZ: 00230 luaX_lexerror(ls, (seminfo) ? "unfinished long string" : 00231 "unfinished long comment", TK_EOS); 00232 break; /* to avoid warnings */ 00233 #if defined(LUA_COMPAT_LSTR) 00234 case '[': { 00235 if (skip_sep(ls) == sep) { 00236 save_and_next(ls); /* skip 2nd `[' */ 00237 cont++; 00238 #if LUA_COMPAT_LSTR == 1 00239 if (sep == 0) 00240 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); 00241 #endif 00242 } 00243 break; 00244 } 00245 #endif 00246 case ']': { 00247 if (skip_sep(ls) == sep) { 00248 save_and_next(ls); /* skip 2nd `]' */ 00249 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 00250 cont--; 00251 if (sep == 0 && cont >= 0) break; 00252 #endif 00253 goto endloop; 00254 } 00255 break; 00256 } 00257 case '\n': 00258 case '\r': { 00259 save(ls, '\n'); 00260 inclinenumber(ls); 00261 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ 00262 break; 00263 } 00264 default: { 00265 if (seminfo) save_and_next(ls); 00266 else next(ls); 00267 } 00268 } 00269 } endloop: 00270 if (seminfo) 00271 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), 00272 luaZ_bufflen(ls->buff) - 2*(2 + sep)); 00273 } 00274 00275 00276 static void read_string (LexState *ls, int del, SemInfo *seminfo) { 00277 save_and_next(ls); 00278 while (ls->current != del) { 00279 switch (ls->current) { 00280 case EOZ: 00281 luaX_lexerror(ls, "unfinished string", TK_EOS); 00282 continue; /* to avoid warnings */ 00283 case '\n': 00284 case '\r': 00285 luaX_lexerror(ls, "unfinished string", TK_STRING); 00286 continue; /* to avoid warnings */ 00287 case '\\': { 00288 int c; 00289 next(ls); /* do not save the `\' */ 00290 switch (ls->current) { 00291 case 'a': c = '\a'; break; 00292 case 'b': c = '\b'; break; 00293 case 'f': c = '\f'; break; 00294 case 'n': c = '\n'; break; 00295 case 'r': c = '\r'; break; 00296 case 't': c = '\t'; break; 00297 case 'v': c = '\v'; break; 00298 case '\n': /* go through */ 00299 case '\r': save(ls, '\n'); inclinenumber(ls); continue; 00300 case EOZ: continue; /* will raise an error next loop */ 00301 default: { 00302 if (!isdigit(ls->current)) 00303 save_and_next(ls); /* handles \\, \", \', and \? */ 00304 else { /* \xxx */ 00305 int i = 0; 00306 c = 0; 00307 do { 00308 c = 10*c + (ls->current-'0'); 00309 next(ls); 00310 } while (++i<3 && isdigit(ls->current)); 00311 if (c > UCHAR_MAX) 00312 luaX_lexerror(ls, "escape sequence too large", TK_STRING); 00313 save(ls, c); 00314 } 00315 continue; 00316 } 00317 } 00318 save(ls, c); 00319 next(ls); 00320 continue; 00321 } 00322 default: 00323 save_and_next(ls); 00324 } 00325 } 00326 save_and_next(ls); /* skip delimiter */ 00327 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, 00328 luaZ_bufflen(ls->buff) - 2); 00329 } 00330 00331 00332 static int llex (LexState *ls, SemInfo *seminfo) { 00333 luaZ_resetbuffer(ls->buff); 00334 for (;;) { 00335 switch (ls->current) { 00336 case '\n': 00337 case '\r': { 00338 inclinenumber(ls); 00339 continue; 00340 } 00341 case '-': { 00342 next(ls); 00343 if (ls->current != '-') return '-'; 00344 /* else is a comment */ 00345 next(ls); 00346 if (ls->current == '[') { 00347 int sep = skip_sep(ls); 00348 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ 00349 if (sep >= 0) { 00350 read_long_string(ls, NULL, sep); /* long comment */ 00351 luaZ_resetbuffer(ls->buff); 00352 continue; 00353 } 00354 } 00355 /* else short comment */ 00356 while (!currIsNewline(ls) && ls->current != EOZ) 00357 next(ls); 00358 continue; 00359 } 00360 case '[': { 00361 int sep = skip_sep(ls); 00362 if (sep >= 0) { 00363 read_long_string(ls, seminfo, sep); 00364 return TK_STRING; 00365 } 00366 else if (sep == -1) return '['; 00367 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); 00368 } 00369 case '=': { 00370 next(ls); 00371 if (ls->current != '=') return '='; 00372 else { next(ls); return TK_EQ; } 00373 } 00374 case '<': { 00375 next(ls); 00376 if (ls->current != '=') return '<'; 00377 else { next(ls); return TK_LE; } 00378 } 00379 case '>': { 00380 next(ls); 00381 if (ls->current != '=') return '>'; 00382 else { next(ls); return TK_GE; } 00383 } 00384 case '~': { 00385 next(ls); 00386 if (ls->current != '=') return '~'; 00387 else { next(ls); return TK_NE; } 00388 } 00389 case '"': 00390 case '\'': { 00391 read_string(ls, ls->current, seminfo); 00392 return TK_STRING; 00393 } 00394 case '.': { 00395 save_and_next(ls); 00396 if (check_next(ls, ".")) { 00397 if (check_next(ls, ".")) 00398 return TK_DOTS; /* ... */ 00399 else return TK_CONCAT; /* .. */ 00400 } 00401 else if (!isdigit(ls->current)) return '.'; 00402 else { 00403 read_numeral(ls, seminfo); 00404 return TK_NUMBER; 00405 } 00406 } 00407 case EOZ: { 00408 return TK_EOS; 00409 } 00410 default: { 00411 if (isspace(ls->current)) { 00412 lua_assert(!currIsNewline(ls)); 00413 next(ls); 00414 continue; 00415 } 00416 else if (isdigit(ls->current)) { 00417 read_numeral(ls, seminfo); 00418 return TK_NUMBER; 00419 } 00420 else if (isalpha(ls->current) || ls->current == '_') { 00421 /* identifier or reserved word */ 00422 TString *ts; 00423 do { 00424 save_and_next(ls); 00425 } while (isalnum(ls->current) || ls->current == '_'); 00426 ts = luaX_newstring(ls, luaZ_buffer(ls->buff), 00427 luaZ_bufflen(ls->buff)); 00428 if (ts->tsv.reserved > 0) /* reserved word? */ 00429 return ts->tsv.reserved - 1 + FIRST_RESERVED; 00430 else { 00431 seminfo->ts = ts; 00432 return TK_NAME; 00433 } 00434 } 00435 else { 00436 int c = ls->current; 00437 next(ls); 00438 return c; /* single-char tokens (+ - / ...) */ 00439 } 00440 } 00441 } 00442 } 00443 } 00444 00445 00446 void luaX_next (LexState *ls) { 00447 ls->lastline = ls->linenumber; 00448 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ 00449 ls->t = ls->lookahead; /* use this one */ 00450 ls->lookahead.token = TK_EOS; /* and discharge it */ 00451 } 00452 else 00453 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ 00454 } 00455 00456 00457 void luaX_lookahead (LexState *ls) { 00458 lua_assert(ls->lookahead.token == TK_EOS); 00459 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); 00460 } 00461
ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:54 2011 by Doxygen 1.6.1