llex.c

Go to the documentation of this file.
00001 /*
00002 ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $
00003 ** Lexical Analyzer
00004 ** See Copyright Notice in lua.h
00005 */
00006 
00007 
00008 #include <ctype.h>
00009 #include <locale.h>
00010 #include <string.h>
00011 
00012 #define llex_c
00013 #define LUA_CORE
00014 
00015 #include "lua.h"
00016 
00017 #include "ldo.h"
00018 #include "llex.h"
00019 #include "lobject.h"
00020 #include "lparser.h"
00021 #include "lstate.h"
00022 #include "lstring.h"
00023 #include "ltable.h"
00024 #include "lzio.h"
00025 
00026 
00027 
00028 #define next(ls) (ls->current = zgetc(ls->z))
00029 
00030 
00031 
00032 
00033 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
00034 
00035 
00036 /* ORDER RESERVED */
00037 const char *const luaX_tokens [] = {
00038     "and", "break", "do", "else", "elseif",
00039     "end", "false", "for", "function", "if",
00040     "in", "local", "nil", "not", "or", "repeat",
00041     "return", "then", "true", "until", "while",
00042     "..", "...", "==", ">=", "<=", "~=",
00043     "<number>", "<name>", "<string>", "<eof>",
00044     NULL
00045 };
00046 
00047 
00048 #define save_and_next(ls) (save(ls, ls->current), next(ls))
00049 
00050 
00051 static void save (LexState *ls, int c) {
00052   Mbuffer *b = ls->buff;
00053   if (b->n + 1 > b->buffsize) {
00054     size_t newsize;
00055     if (b->buffsize >= MAX_SIZET/2)
00056       luaX_lexerror(ls, "lexical element too long", 0);
00057     newsize = b->buffsize * 2;
00058     luaZ_resizebuffer(ls->L, b, newsize);
00059   }
00060   b->buffer[b->n++] = cast(char, c);
00061 }
00062 
00063 
00064 void luaX_init (lua_State *L) {
00065   int i;
00066   for (i=0; i<NUM_RESERVED; i++) {
00067     TString *ts = luaS_new(L, luaX_tokens[i]);
00068     luaS_fix(ts);  /* reserved words are never collected */
00069     lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
00070     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
00071   }
00072 }
00073 
00074 
00075 #define MAXSRC          80
00076 
00077 
00078 const char *luaX_token2str (LexState *ls, int token) {
00079   if (token < FIRST_RESERVED) {
00080     lua_assert(token == cast(unsigned char, token));
00081     return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
00082                               luaO_pushfstring(ls->L, "%c", token);
00083   }
00084   else
00085     return luaX_tokens[token-FIRST_RESERVED];
00086 }
00087 
00088 
00089 static const char *txtToken (LexState *ls, int token) {
00090   switch (token) {
00091     case TK_NAME:
00092     case TK_STRING:
00093     case TK_NUMBER:
00094       save(ls, '\0');
00095       return luaZ_buffer(ls->buff);
00096     default:
00097       return luaX_token2str(ls, token);
00098   }
00099 }
00100 
00101 
00102 void luaX_lexerror (LexState *ls, const char *msg, int token) {
00103   char buff[MAXSRC];
00104   luaO_chunkid(buff, getstr(ls->source), MAXSRC);
00105   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
00106   if (token)
00107     luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
00108   luaD_throw(ls->L, LUA_ERRSYNTAX);
00109 }
00110 
00111 
00112 void luaX_syntaxerror (LexState *ls, const char *msg) {
00113   luaX_lexerror(ls, msg, ls->t.token);
00114 }
00115 
00116 
00117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
00118   lua_State *L = ls->L;
00119   TString *ts = luaS_newlstr(L, str, l);
00120   TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
00121   if (ttisnil(o))
00122     setbvalue(o, 1);  /* make sure `str' will not be collected */
00123   return ts;
00124 }
00125 
00126 
00127 static void inclinenumber (LexState *ls) {
00128   int old = ls->current;
00129   lua_assert(currIsNewline(ls));
00130   next(ls);  /* skip `\n' or `\r' */
00131   if (currIsNewline(ls) && ls->current != old)
00132     next(ls);  /* skip `\n\r' or `\r\n' */
00133   if (++ls->linenumber >= MAX_INT)
00134     luaX_syntaxerror(ls, "chunk has too many lines");
00135 }
00136 
00137 
00138 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
00139   ls->decpoint = '.';
00140   ls->L = L;
00141   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
00142   ls->z = z;
00143   ls->fs = NULL;
00144   ls->linenumber = 1;
00145   ls->lastline = 1;
00146   ls->source = source;
00147   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
00148   next(ls);  /* read first char */
00149 }
00150 
00151 
00152 
00153 /*
00154 ** =======================================================
00155 ** LEXICAL ANALYZER
00156 ** =======================================================
00157 */
00158 
00159 
00160 
00161 static int check_next (LexState *ls, const char *set) {
00162   if (!strchr(set, ls->current))
00163     return 0;
00164   save_and_next(ls);
00165   return 1;
00166 }
00167 
00168 
00169 static void buffreplace (LexState *ls, char from, char to) {
00170   size_t n = luaZ_bufflen(ls->buff);
00171   char *p = luaZ_buffer(ls->buff);
00172   while (n--)
00173     if (p[n] == from) p[n] = to;
00174 }
00175 
00176 
00177 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
00178   /* format error: try to update decimal point separator */
00179   struct lconv *cv = localeconv();
00180   char old = ls->decpoint;
00181   ls->decpoint = (cv ? cv->decimal_point[0] : '.');
00182   buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
00183   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
00184     /* format error with correct decimal point: no more options */
00185     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
00186     luaX_lexerror(ls, "malformed number", TK_NUMBER);
00187   }
00188 }
00189 
00190 
00191 /* LUA_NUMBER */
00192 static void read_numeral (LexState *ls, SemInfo *seminfo) {
00193   lua_assert(isdigit(ls->current));
00194   do {
00195     save_and_next(ls);
00196   } while (isdigit(ls->current) || ls->current == '.');
00197   if (check_next(ls, "Ee"))  /* `E'? */
00198     check_next(ls, "+-");  /* optional exponent sign */
00199   while (isalnum(ls->current) || ls->current == '_')
00200     save_and_next(ls);
00201   save(ls, '\0');
00202   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
00203   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
00204     trydecpoint(ls, seminfo); /* try to update decimal point separator */
00205 }
00206 
00207 
00208 static int skip_sep (LexState *ls) {
00209   int count = 0;
00210   int s = ls->current;
00211   lua_assert(s == '[' || s == ']');
00212   save_and_next(ls);
00213   while (ls->current == '=') {
00214     save_and_next(ls);
00215     count++;
00216   }
00217   return (ls->current == s) ? count : (-count) - 1;
00218 }
00219 
00220 
00221 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
00222   int cont = 0;
00223   (void)(cont);  /* avoid warnings when `cont' is not used */
00224   save_and_next(ls);  /* skip 2nd `[' */
00225   if (currIsNewline(ls))  /* string starts with a newline? */
00226     inclinenumber(ls);  /* skip it */
00227   for (;;) {
00228     switch (ls->current) {
00229       case EOZ:
00230         luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
00231                                    "unfinished long comment", TK_EOS);
00232         break;  /* to avoid warnings */
00233 #if defined(LUA_COMPAT_LSTR)
00234       case '[': {
00235         if (skip_sep(ls) == sep) {
00236           save_and_next(ls);  /* skip 2nd `[' */
00237           cont++;
00238 #if LUA_COMPAT_LSTR == 1
00239           if (sep == 0)
00240             luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
00241 #endif
00242         }
00243         break;
00244       }
00245 #endif
00246       case ']': {
00247         if (skip_sep(ls) == sep) {
00248           save_and_next(ls);  /* skip 2nd `]' */
00249 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
00250           cont--;
00251           if (sep == 0 && cont >= 0) break;
00252 #endif
00253           goto endloop;
00254         }
00255         break;
00256       }
00257       case '\n':
00258       case '\r': {
00259         save(ls, '\n');
00260         inclinenumber(ls);
00261         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
00262         break;
00263       }
00264       default: {
00265         if (seminfo) save_and_next(ls);
00266         else next(ls);
00267       }
00268     }
00269   } endloop:
00270   if (seminfo)
00271     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
00272                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
00273 }
00274 
00275 
00276 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
00277   save_and_next(ls);
00278   while (ls->current != del) {
00279     switch (ls->current) {
00280       case EOZ:
00281         luaX_lexerror(ls, "unfinished string", TK_EOS);
00282         continue;  /* to avoid warnings */
00283       case '\n':
00284       case '\r':
00285         luaX_lexerror(ls, "unfinished string", TK_STRING);
00286         continue;  /* to avoid warnings */
00287       case '\\': {
00288         int c;
00289         next(ls);  /* do not save the `\' */
00290         switch (ls->current) {
00291           case 'a': c = '\a'; break;
00292           case 'b': c = '\b'; break;
00293           case 'f': c = '\f'; break;
00294           case 'n': c = '\n'; break;
00295           case 'r': c = '\r'; break;
00296           case 't': c = '\t'; break;
00297           case 'v': c = '\v'; break;
00298           case '\n':  /* go through */
00299           case '\r': save(ls, '\n'); inclinenumber(ls); continue;
00300           case EOZ: continue;  /* will raise an error next loop */
00301           default: {
00302             if (!isdigit(ls->current))
00303               save_and_next(ls);  /* handles \\, \", \', and \? */
00304             else {  /* \xxx */
00305               int i = 0;
00306               c = 0;
00307               do {
00308                 c = 10*c + (ls->current-'0');
00309                 next(ls);
00310               } while (++i<3 && isdigit(ls->current));
00311               if (c > UCHAR_MAX)
00312                 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
00313               save(ls, c);
00314             }
00315             continue;
00316           }
00317         }
00318         save(ls, c);
00319         next(ls);
00320         continue;
00321       }
00322       default:
00323         save_and_next(ls);
00324     }
00325   }
00326   save_and_next(ls);  /* skip delimiter */
00327   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
00328                                    luaZ_bufflen(ls->buff) - 2);
00329 }
00330 
00331 
00332 static int llex (LexState *ls, SemInfo *seminfo) {
00333   luaZ_resetbuffer(ls->buff);
00334   for (;;) {
00335     switch (ls->current) {
00336       case '\n':
00337       case '\r': {
00338         inclinenumber(ls);
00339         continue;
00340       }
00341       case '-': {
00342         next(ls);
00343         if (ls->current != '-') return '-';
00344         /* else is a comment */
00345         next(ls);
00346         if (ls->current == '[') {
00347           int sep = skip_sep(ls);
00348           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
00349           if (sep >= 0) {
00350             read_long_string(ls, NULL, sep);  /* long comment */
00351             luaZ_resetbuffer(ls->buff);
00352             continue;
00353           }
00354         }
00355         /* else short comment */
00356         while (!currIsNewline(ls) && ls->current != EOZ)
00357           next(ls);
00358         continue;
00359       }
00360       case '[': {
00361         int sep = skip_sep(ls);
00362         if (sep >= 0) {
00363           read_long_string(ls, seminfo, sep);
00364           return TK_STRING;
00365         }
00366         else if (sep == -1) return '[';
00367         else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
00368       }
00369       case '=': {
00370         next(ls);
00371         if (ls->current != '=') return '=';
00372         else { next(ls); return TK_EQ; }
00373       }
00374       case '<': {
00375         next(ls);
00376         if (ls->current != '=') return '<';
00377         else { next(ls); return TK_LE; }
00378       }
00379       case '>': {
00380         next(ls);
00381         if (ls->current != '=') return '>';
00382         else { next(ls); return TK_GE; }
00383       }
00384       case '~': {
00385         next(ls);
00386         if (ls->current != '=') return '~';
00387         else { next(ls); return TK_NE; }
00388       }
00389       case '"':
00390       case '\'': {
00391         read_string(ls, ls->current, seminfo);
00392         return TK_STRING;
00393       }
00394       case '.': {
00395         save_and_next(ls);
00396         if (check_next(ls, ".")) {
00397           if (check_next(ls, "."))
00398             return TK_DOTS;   /* ... */
00399           else return TK_CONCAT;   /* .. */
00400         }
00401         else if (!isdigit(ls->current)) return '.';
00402         else {
00403           read_numeral(ls, seminfo);
00404           return TK_NUMBER;
00405         }
00406       }
00407       case EOZ: {
00408         return TK_EOS;
00409       }
00410       default: {
00411         if (isspace(ls->current)) {
00412           lua_assert(!currIsNewline(ls));
00413           next(ls);
00414           continue;
00415         }
00416         else if (isdigit(ls->current)) {
00417           read_numeral(ls, seminfo);
00418           return TK_NUMBER;
00419         }
00420         else if (isalpha(ls->current) || ls->current == '_') {
00421           /* identifier or reserved word */
00422           TString *ts;
00423           do {
00424             save_and_next(ls);
00425           } while (isalnum(ls->current) || ls->current == '_');
00426           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
00427                                   luaZ_bufflen(ls->buff));
00428           if (ts->tsv.reserved > 0)  /* reserved word? */
00429             return ts->tsv.reserved - 1 + FIRST_RESERVED;
00430           else {
00431             seminfo->ts = ts;
00432             return TK_NAME;
00433           }
00434         }
00435         else {
00436           int c = ls->current;
00437           next(ls);
00438           return c;  /* single-char tokens (+ - / ...) */
00439         }
00440       }
00441     }
00442   }
00443 }
00444 
00445 
00446 void luaX_next (LexState *ls) {
00447   ls->lastline = ls->linenumber;
00448   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
00449     ls->t = ls->lookahead;  /* use this one */
00450     ls->lookahead.token = TK_EOS;  /* and discharge it */
00451   }
00452   else
00453     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
00454 }
00455 
00456 
00457 void luaX_lookahead (LexState *ls) {
00458   lua_assert(ls->lookahead.token == TK_EOS);
00459   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
00460 }
00461 

ContextLogger2—ContextLogger2 Logger Daemon Internals—Generated on Mon May 2 13:49:54 2011 by Doxygen 1.6.1