misc/liblua/llex.c
changeset 2812 0a24853de796
child 6039 d75329716a02
equal deleted inserted replaced
2811:4cad87e11bf6 2812:0a24853de796
       
     1 /*
       
     2 ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $
       
     3 ** Lexical Analyzer
       
     4 ** See Copyright Notice in lua.h
       
     5 */
       
     6 
       
     7 
       
     8 #include <ctype.h>
       
     9 #include <locale.h>
       
    10 #include <string.h>
       
    11 
       
    12 #define llex_c
       
    13 #define LUA_CORE
       
    14 
       
    15 #include "lua.h"
       
    16 
       
    17 #include "ldo.h"
       
    18 #include "llex.h"
       
    19 #include "lobject.h"
       
    20 #include "lparser.h"
       
    21 #include "lstate.h"
       
    22 #include "lstring.h"
       
    23 #include "ltable.h"
       
    24 #include "lzio.h"
       
    25 
       
    26 
       
    27 
       
    28 #define next(ls) (ls->current = zgetc(ls->z))
       
    29 
       
    30 
       
    31 
       
    32 
       
    33 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
       
    34 
       
    35 
       
    36 /* ORDER RESERVED */
       
    37 const char *const luaX_tokens [] = {
       
    38     "and", "break", "do", "else", "elseif",
       
    39     "end", "false", "for", "function", "if",
       
    40     "in", "local", "nil", "not", "or", "repeat",
       
    41     "return", "then", "true", "until", "while",
       
    42     "..", "...", "==", ">=", "<=", "~=",
       
    43     "<number>", "<name>", "<string>", "<eof>",
       
    44     NULL
       
    45 };
       
    46 
       
    47 
       
    48 #define save_and_next(ls) (save(ls, ls->current), next(ls))
       
    49 
       
    50 
       
    51 static void save (LexState *ls, int c) {
       
    52   Mbuffer *b = ls->buff;
       
    53   if (b->n + 1 > b->buffsize) {
       
    54     size_t newsize;
       
    55     if (b->buffsize >= MAX_SIZET/2)
       
    56       luaX_lexerror(ls, "lexical element too long", 0);
       
    57     newsize = b->buffsize * 2;
       
    58     luaZ_resizebuffer(ls->L, b, newsize);
       
    59   }
       
    60   b->buffer[b->n++] = cast(char, c);
       
    61 }
       
    62 
       
    63 
       
    64 void luaX_init (lua_State *L) {
       
    65   int i;
       
    66   for (i=0; i<NUM_RESERVED; i++) {
       
    67     TString *ts = luaS_new(L, luaX_tokens[i]);
       
    68     luaS_fix(ts);  /* reserved words are never collected */
       
    69     lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
       
    70     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
       
    71   }
       
    72 }
       
    73 
       
    74 
       
    75 #define MAXSRC          80
       
    76 
       
    77 
       
    78 const char *luaX_token2str (LexState *ls, int token) {
       
    79   if (token < FIRST_RESERVED) {
       
    80     lua_assert(token == cast(unsigned char, token));
       
    81     return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
       
    82                               luaO_pushfstring(ls->L, "%c", token);
       
    83   }
       
    84   else
       
    85     return luaX_tokens[token-FIRST_RESERVED];
       
    86 }
       
    87 
       
    88 
       
    89 static const char *txtToken (LexState *ls, int token) {
       
    90   switch (token) {
       
    91     case TK_NAME:
       
    92     case TK_STRING:
       
    93     case TK_NUMBER:
       
    94       save(ls, '\0');
       
    95       return luaZ_buffer(ls->buff);
       
    96     default:
       
    97       return luaX_token2str(ls, token);
       
    98   }
       
    99 }
       
   100 
       
   101 
       
   102 void luaX_lexerror (LexState *ls, const char *msg, int token) {
       
   103   char buff[MAXSRC];
       
   104   luaO_chunkid(buff, getstr(ls->source), MAXSRC);
       
   105   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
       
   106   if (token)
       
   107     luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
       
   108   luaD_throw(ls->L, LUA_ERRSYNTAX);
       
   109 }
       
   110 
       
   111 
       
   112 void luaX_syntaxerror (LexState *ls, const char *msg) {
       
   113   luaX_lexerror(ls, msg, ls->t.token);
       
   114 }
       
   115 
       
   116 
       
   117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
       
   118   lua_State *L = ls->L;
       
   119   TString *ts = luaS_newlstr(L, str, l);
       
   120   TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
       
   121   if (ttisnil(o))
       
   122     setbvalue(o, 1);  /* make sure `str' will not be collected */
       
   123   return ts;
       
   124 }
       
   125 
       
   126 
       
   127 static void inclinenumber (LexState *ls) {
       
   128   int old = ls->current;
       
   129   lua_assert(currIsNewline(ls));
       
   130   next(ls);  /* skip `\n' or `\r' */
       
   131   if (currIsNewline(ls) && ls->current != old)
       
   132     next(ls);  /* skip `\n\r' or `\r\n' */
       
   133   if (++ls->linenumber >= MAX_INT)
       
   134     luaX_syntaxerror(ls, "chunk has too many lines");
       
   135 }
       
   136 
       
   137 
       
   138 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
       
   139   ls->decpoint = '.';
       
   140   ls->L = L;
       
   141   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
       
   142   ls->z = z;
       
   143   ls->fs = NULL;
       
   144   ls->linenumber = 1;
       
   145   ls->lastline = 1;
       
   146   ls->source = source;
       
   147   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
       
   148   next(ls);  /* read first char */
       
   149 }
       
   150 
       
   151 
       
   152 
       
   153 /*
       
   154 ** =======================================================
       
   155 ** LEXICAL ANALYZER
       
   156 ** =======================================================
       
   157 */
       
   158 
       
   159 
       
   160 
       
   161 static int check_next (LexState *ls, const char *set) {
       
   162   if (!strchr(set, ls->current))
       
   163     return 0;
       
   164   save_and_next(ls);
       
   165   return 1;
       
   166 }
       
   167 
       
   168 
       
   169 static void buffreplace (LexState *ls, char from, char to) {
       
   170   size_t n = luaZ_bufflen(ls->buff);
       
   171   char *p = luaZ_buffer(ls->buff);
       
   172   while (n--)
       
   173     if (p[n] == from) p[n] = to;
       
   174 }
       
   175 
       
   176 
       
   177 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
       
   178   /* format error: try to update decimal point separator */
       
   179   struct lconv *cv = localeconv();
       
   180   char old = ls->decpoint;
       
   181   ls->decpoint = (cv ? cv->decimal_point[0] : '.');
       
   182   buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
       
   183   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
       
   184     /* format error with correct decimal point: no more options */
       
   185     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
       
   186     luaX_lexerror(ls, "malformed number", TK_NUMBER);
       
   187   }
       
   188 }
       
   189 
       
   190 
       
   191 /* LUA_NUMBER */
       
   192 static void read_numeral (LexState *ls, SemInfo *seminfo) {
       
   193   lua_assert(isdigit(ls->current));
       
   194   do {
       
   195     save_and_next(ls);
       
   196   } while (isdigit(ls->current) || ls->current == '.');
       
   197   if (check_next(ls, "Ee"))  /* `E'? */
       
   198     check_next(ls, "+-");  /* optional exponent sign */
       
   199   while (isalnum(ls->current) || ls->current == '_')
       
   200     save_and_next(ls);
       
   201   save(ls, '\0');
       
   202   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
       
   203   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
       
   204     trydecpoint(ls, seminfo); /* try to update decimal point separator */
       
   205 }
       
   206 
       
   207 
       
   208 static int skip_sep (LexState *ls) {
       
   209   int count = 0;
       
   210   int s = ls->current;
       
   211   lua_assert(s == '[' || s == ']');
       
   212   save_and_next(ls);
       
   213   while (ls->current == '=') {
       
   214     save_and_next(ls);
       
   215     count++;
       
   216   }
       
   217   return (ls->current == s) ? count : (-count) - 1;
       
   218 }
       
   219 
       
   220 
       
   221 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
       
   222   int cont = 0;
       
   223   (void)(cont);  /* avoid warnings when `cont' is not used */
       
   224   save_and_next(ls);  /* skip 2nd `[' */
       
   225   if (currIsNewline(ls))  /* string starts with a newline? */
       
   226     inclinenumber(ls);  /* skip it */
       
   227   for (;;) {
       
   228     switch (ls->current) {
       
   229       case EOZ:
       
   230         luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
       
   231                                    "unfinished long comment", TK_EOS);
       
   232         break;  /* to avoid warnings */
       
   233 #if defined(LUA_COMPAT_LSTR)
       
   234       case '[': {
       
   235         if (skip_sep(ls) == sep) {
       
   236           save_and_next(ls);  /* skip 2nd `[' */
       
   237           cont++;
       
   238 #if LUA_COMPAT_LSTR == 1
       
   239           if (sep == 0)
       
   240             luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
       
   241 #endif
       
   242         }
       
   243         break;
       
   244       }
       
   245 #endif
       
   246       case ']': {
       
   247         if (skip_sep(ls) == sep) {
       
   248           save_and_next(ls);  /* skip 2nd `]' */
       
   249 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
       
   250           cont--;
       
   251           if (sep == 0 && cont >= 0) break;
       
   252 #endif
       
   253           goto endloop;
       
   254         }
       
   255         break;
       
   256       }
       
   257       case '\n':
       
   258       case '\r': {
       
   259         save(ls, '\n');
       
   260         inclinenumber(ls);
       
   261         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
       
   262         break;
       
   263       }
       
   264       default: {
       
   265         if (seminfo) save_and_next(ls);
       
   266         else next(ls);
       
   267       }
       
   268     }
       
   269   } endloop:
       
   270   if (seminfo)
       
   271     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
       
   272                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
       
   273 }
       
   274 
       
   275 
       
   276 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
       
   277   save_and_next(ls);
       
   278   while (ls->current != del) {
       
   279     switch (ls->current) {
       
   280       case EOZ:
       
   281         luaX_lexerror(ls, "unfinished string", TK_EOS);
       
   282         continue;  /* to avoid warnings */
       
   283       case '\n':
       
   284       case '\r':
       
   285         luaX_lexerror(ls, "unfinished string", TK_STRING);
       
   286         continue;  /* to avoid warnings */
       
   287       case '\\': {
       
   288         int c;
       
   289         next(ls);  /* do not save the `\' */
       
   290         switch (ls->current) {
       
   291           case 'a': c = '\a'; break;
       
   292           case 'b': c = '\b'; break;
       
   293           case 'f': c = '\f'; break;
       
   294           case 'n': c = '\n'; break;
       
   295           case 'r': c = '\r'; break;
       
   296           case 't': c = '\t'; break;
       
   297           case 'v': c = '\v'; break;
       
   298           case '\n':  /* go through */
       
   299           case '\r': save(ls, '\n'); inclinenumber(ls); continue;
       
   300           case EOZ: continue;  /* will raise an error next loop */
       
   301           default: {
       
   302             if (!isdigit(ls->current))
       
   303               save_and_next(ls);  /* handles \\, \", \', and \? */
       
   304             else {  /* \xxx */
       
   305               int i = 0;
       
   306               c = 0;
       
   307               do {
       
   308                 c = 10*c + (ls->current-'0');
       
   309                 next(ls);
       
   310               } while (++i<3 && isdigit(ls->current));
       
   311               if (c > UCHAR_MAX)
       
   312                 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
       
   313               save(ls, c);
       
   314             }
       
   315             continue;
       
   316           }
       
   317         }
       
   318         save(ls, c);
       
   319         next(ls);
       
   320         continue;
       
   321       }
       
   322       default:
       
   323         save_and_next(ls);
       
   324     }
       
   325   }
       
   326   save_and_next(ls);  /* skip delimiter */
       
   327   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
       
   328                                    luaZ_bufflen(ls->buff) - 2);
       
   329 }
       
   330 
       
   331 
       
   332 static int llex (LexState *ls, SemInfo *seminfo) {
       
   333   luaZ_resetbuffer(ls->buff);
       
   334   for (;;) {
       
   335     switch (ls->current) {
       
   336       case '\n':
       
   337       case '\r': {
       
   338         inclinenumber(ls);
       
   339         continue;
       
   340       }
       
   341       case '-': {
       
   342         next(ls);
       
   343         if (ls->current != '-') return '-';
       
   344         /* else is a comment */
       
   345         next(ls);
       
   346         if (ls->current == '[') {
       
   347           int sep = skip_sep(ls);
       
   348           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
       
   349           if (sep >= 0) {
       
   350             read_long_string(ls, NULL, sep);  /* long comment */
       
   351             luaZ_resetbuffer(ls->buff);
       
   352             continue;
       
   353           }
       
   354         }
       
   355         /* else short comment */
       
   356         while (!currIsNewline(ls) && ls->current != EOZ)
       
   357           next(ls);
       
   358         continue;
       
   359       }
       
   360       case '[': {
       
   361         int sep = skip_sep(ls);
       
   362         if (sep >= 0) {
       
   363           read_long_string(ls, seminfo, sep);
       
   364           return TK_STRING;
       
   365         }
       
   366         else if (sep == -1) return '[';
       
   367         else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
       
   368       }
       
   369       case '=': {
       
   370         next(ls);
       
   371         if (ls->current != '=') return '=';
       
   372         else { next(ls); return TK_EQ; }
       
   373       }
       
   374       case '<': {
       
   375         next(ls);
       
   376         if (ls->current != '=') return '<';
       
   377         else { next(ls); return TK_LE; }
       
   378       }
       
   379       case '>': {
       
   380         next(ls);
       
   381         if (ls->current != '=') return '>';
       
   382         else { next(ls); return TK_GE; }
       
   383       }
       
   384       case '~': {
       
   385         next(ls);
       
   386         if (ls->current != '=') return '~';
       
   387         else { next(ls); return TK_NE; }
       
   388       }
       
   389       case '"':
       
   390       case '\'': {
       
   391         read_string(ls, ls->current, seminfo);
       
   392         return TK_STRING;
       
   393       }
       
   394       case '.': {
       
   395         save_and_next(ls);
       
   396         if (check_next(ls, ".")) {
       
   397           if (check_next(ls, "."))
       
   398             return TK_DOTS;   /* ... */
       
   399           else return TK_CONCAT;   /* .. */
       
   400         }
       
   401         else if (!isdigit(ls->current)) return '.';
       
   402         else {
       
   403           read_numeral(ls, seminfo);
       
   404           return TK_NUMBER;
       
   405         }
       
   406       }
       
   407       case EOZ: {
       
   408         return TK_EOS;
       
   409       }
       
   410       default: {
       
   411         if (isspace(ls->current)) {
       
   412           lua_assert(!currIsNewline(ls));
       
   413           next(ls);
       
   414           continue;
       
   415         }
       
   416         else if (isdigit(ls->current)) {
       
   417           read_numeral(ls, seminfo);
       
   418           return TK_NUMBER;
       
   419         }
       
   420         else if (isalpha(ls->current) || ls->current == '_') {
       
   421           /* identifier or reserved word */
       
   422           TString *ts;
       
   423           do {
       
   424             save_and_next(ls);
       
   425           } while (isalnum(ls->current) || ls->current == '_');
       
   426           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
       
   427                                   luaZ_bufflen(ls->buff));
       
   428           if (ts->tsv.reserved > 0)  /* reserved word? */
       
   429             return ts->tsv.reserved - 1 + FIRST_RESERVED;
       
   430           else {
       
   431             seminfo->ts = ts;
       
   432             return TK_NAME;
       
   433           }
       
   434         }
       
   435         else {
       
   436           int c = ls->current;
       
   437           next(ls);
       
   438           return c;  /* single-char tokens (+ - / ...) */
       
   439         }
       
   440       }
       
   441     }
       
   442   }
       
   443 }
       
   444 
       
   445 
       
   446 void luaX_next (LexState *ls) {
       
   447   ls->lastline = ls->linenumber;
       
   448   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
       
   449     ls->t = ls->lookahead;  /* use this one */
       
   450     ls->lookahead.token = TK_EOS;  /* and discharge it */
       
   451   }
       
   452   else
       
   453     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
       
   454 }
       
   455 
       
   456 
       
   457 void luaX_lookahead (LexState *ls) {
       
   458   lua_assert(ls->lookahead.token == TK_EOS);
       
   459   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
       
   460 }
       
   461