|
1 /* |
|
2 ** $Id: lstrlib.c,v 1.132.1.4 2008/07/11 17:27:21 roberto Exp $ |
|
3 ** Standard library for string operations and pattern-matching |
|
4 ** See Copyright Notice in lua.h |
|
5 */ |
|
6 |
|
7 |
|
8 #include <ctype.h> |
|
9 #include <stddef.h> |
|
10 #include <stdio.h> |
|
11 #include <stdlib.h> |
|
12 #include <string.h> |
|
13 |
|
14 #define lstrlib_c |
|
15 #define LUA_LIB |
|
16 |
|
17 #include "lua.h" |
|
18 |
|
19 #include "lauxlib.h" |
|
20 #include "lualib.h" |
|
21 |
|
22 |
|
23 /* macro to `unsign' a character */ |
|
24 #define uchar(c) ((unsigned char)(c)) |
|
25 |
|
26 |
|
27 |
|
28 static int str_len (lua_State *L) { |
|
29 size_t l; |
|
30 luaL_checklstring(L, 1, &l); |
|
31 lua_pushinteger(L, l); |
|
32 return 1; |
|
33 } |
|
34 |
|
35 |
|
36 static ptrdiff_t posrelat (ptrdiff_t pos, size_t len) { |
|
37 /* relative string position: negative means back from end */ |
|
38 if (pos < 0) pos += (ptrdiff_t)len + 1; |
|
39 return (pos >= 0) ? pos : 0; |
|
40 } |
|
41 |
|
42 |
|
43 static int str_sub (lua_State *L) { |
|
44 size_t l; |
|
45 const char *s = luaL_checklstring(L, 1, &l); |
|
46 ptrdiff_t start = posrelat(luaL_checkinteger(L, 2), l); |
|
47 ptrdiff_t end = posrelat(luaL_optinteger(L, 3, -1), l); |
|
48 if (start < 1) start = 1; |
|
49 if (end > (ptrdiff_t)l) end = (ptrdiff_t)l; |
|
50 if (start <= end) |
|
51 lua_pushlstring(L, s+start-1, end-start+1); |
|
52 else lua_pushliteral(L, ""); |
|
53 return 1; |
|
54 } |
|
55 |
|
56 |
|
57 static int str_reverse (lua_State *L) { |
|
58 size_t l; |
|
59 luaL_Buffer b; |
|
60 const char *s = luaL_checklstring(L, 1, &l); |
|
61 luaL_buffinit(L, &b); |
|
62 while (l--) luaL_addchar(&b, s[l]); |
|
63 luaL_pushresult(&b); |
|
64 return 1; |
|
65 } |
|
66 |
|
67 |
|
68 static int str_lower (lua_State *L) { |
|
69 size_t l; |
|
70 size_t i; |
|
71 luaL_Buffer b; |
|
72 const char *s = luaL_checklstring(L, 1, &l); |
|
73 luaL_buffinit(L, &b); |
|
74 for (i=0; i<l; i++) |
|
75 luaL_addchar(&b, tolower(uchar(s[i]))); |
|
76 luaL_pushresult(&b); |
|
77 return 1; |
|
78 } |
|
79 |
|
80 |
|
81 static int str_upper (lua_State *L) { |
|
82 size_t l; |
|
83 size_t i; |
|
84 luaL_Buffer b; |
|
85 const char *s = luaL_checklstring(L, 1, &l); |
|
86 luaL_buffinit(L, &b); |
|
87 for (i=0; i<l; i++) |
|
88 luaL_addchar(&b, toupper(uchar(s[i]))); |
|
89 luaL_pushresult(&b); |
|
90 return 1; |
|
91 } |
|
92 |
|
93 static int str_rep (lua_State *L) { |
|
94 size_t l; |
|
95 luaL_Buffer b; |
|
96 const char *s = luaL_checklstring(L, 1, &l); |
|
97 int n = luaL_checkint(L, 2); |
|
98 luaL_buffinit(L, &b); |
|
99 while (n-- > 0) |
|
100 luaL_addlstring(&b, s, l); |
|
101 luaL_pushresult(&b); |
|
102 return 1; |
|
103 } |
|
104 |
|
105 |
|
106 static int str_byte (lua_State *L) { |
|
107 size_t l; |
|
108 const char *s = luaL_checklstring(L, 1, &l); |
|
109 ptrdiff_t posi = posrelat(luaL_optinteger(L, 2, 1), l); |
|
110 ptrdiff_t pose = posrelat(luaL_optinteger(L, 3, posi), l); |
|
111 int n, i; |
|
112 if (posi <= 0) posi = 1; |
|
113 if ((size_t)pose > l) pose = l; |
|
114 if (posi > pose) return 0; /* empty interval; return no values */ |
|
115 n = (int)(pose - posi + 1); |
|
116 if (posi + n <= pose) /* overflow? */ |
|
117 luaL_error(L, "string slice too long"); |
|
118 luaL_checkstack(L, n, "string slice too long"); |
|
119 for (i=0; i<n; i++) |
|
120 lua_pushinteger(L, uchar(s[posi+i-1])); |
|
121 return n; |
|
122 } |
|
123 |
|
124 |
|
125 static int str_char (lua_State *L) { |
|
126 int n = lua_gettop(L); /* number of arguments */ |
|
127 int i; |
|
128 luaL_Buffer b; |
|
129 luaL_buffinit(L, &b); |
|
130 for (i=1; i<=n; i++) { |
|
131 int c = luaL_checkint(L, i); |
|
132 luaL_argcheck(L, uchar(c) == c, i, "invalid value"); |
|
133 luaL_addchar(&b, uchar(c)); |
|
134 } |
|
135 luaL_pushresult(&b); |
|
136 return 1; |
|
137 } |
|
138 |
|
139 |
|
140 static int writer (lua_State *L, const void* b, size_t size, void* B) { |
|
141 (void)L; |
|
142 luaL_addlstring((luaL_Buffer*) B, (const char *)b, size); |
|
143 return 0; |
|
144 } |
|
145 |
|
146 |
|
147 static int str_dump (lua_State *L) { |
|
148 luaL_Buffer b; |
|
149 luaL_checktype(L, 1, LUA_TFUNCTION); |
|
150 lua_settop(L, 1); |
|
151 luaL_buffinit(L,&b); |
|
152 if (lua_dump(L, writer, &b) != 0) |
|
153 luaL_error(L, "unable to dump given function"); |
|
154 luaL_pushresult(&b); |
|
155 return 1; |
|
156 } |
|
157 |
|
158 |
|
159 |
|
160 /* |
|
161 ** {====================================================== |
|
162 ** PATTERN MATCHING |
|
163 ** ======================================================= |
|
164 */ |
|
165 |
|
166 |
|
167 #define CAP_UNFINISHED (-1) |
|
168 #define CAP_POSITION (-2) |
|
169 |
|
170 typedef struct MatchState { |
|
171 const char *src_init; /* init of source string */ |
|
172 const char *src_end; /* end (`\0') of source string */ |
|
173 lua_State *L; |
|
174 int level; /* total number of captures (finished or unfinished) */ |
|
175 struct { |
|
176 const char *init; |
|
177 ptrdiff_t len; |
|
178 } capture[LUA_MAXCAPTURES]; |
|
179 } MatchState; |
|
180 |
|
181 |
|
182 #define L_ESC '%' |
|
183 #define SPECIALS "^$*+?.([%-" |
|
184 |
|
185 |
|
186 static int check_capture (MatchState *ms, int l) { |
|
187 l -= '1'; |
|
188 if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) |
|
189 return luaL_error(ms->L, "invalid capture index"); |
|
190 return l; |
|
191 } |
|
192 |
|
193 |
|
194 static int capture_to_close (MatchState *ms) { |
|
195 int level = ms->level; |
|
196 for (level--; level>=0; level--) |
|
197 if (ms->capture[level].len == CAP_UNFINISHED) return level; |
|
198 return luaL_error(ms->L, "invalid pattern capture"); |
|
199 } |
|
200 |
|
201 |
|
202 static const char *classend (MatchState *ms, const char *p) { |
|
203 switch (*p++) { |
|
204 case L_ESC: { |
|
205 if (*p == '\0') |
|
206 luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); |
|
207 return p+1; |
|
208 } |
|
209 case '[': { |
|
210 if (*p == '^') p++; |
|
211 do { /* look for a `]' */ |
|
212 if (*p == '\0') |
|
213 luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); |
|
214 if (*(p++) == L_ESC && *p != '\0') |
|
215 p++; /* skip escapes (e.g. `%]') */ |
|
216 } while (*p != ']'); |
|
217 return p+1; |
|
218 } |
|
219 default: { |
|
220 return p; |
|
221 } |
|
222 } |
|
223 } |
|
224 |
|
225 |
|
226 static int match_class (int c, int cl) { |
|
227 int res; |
|
228 switch (tolower(cl)) { |
|
229 case 'a' : res = isalpha(c); break; |
|
230 case 'c' : res = iscntrl(c); break; |
|
231 case 'd' : res = isdigit(c); break; |
|
232 case 'l' : res = islower(c); break; |
|
233 case 'p' : res = ispunct(c); break; |
|
234 case 's' : res = isspace(c); break; |
|
235 case 'u' : res = isupper(c); break; |
|
236 case 'w' : res = isalnum(c); break; |
|
237 case 'x' : res = isxdigit(c); break; |
|
238 case 'z' : res = (c == 0); break; |
|
239 default: return (cl == c); |
|
240 } |
|
241 return (islower(cl) ? res : !res); |
|
242 } |
|
243 |
|
244 |
|
245 static int matchbracketclass (int c, const char *p, const char *ec) { |
|
246 int sig = 1; |
|
247 if (*(p+1) == '^') { |
|
248 sig = 0; |
|
249 p++; /* skip the `^' */ |
|
250 } |
|
251 while (++p < ec) { |
|
252 if (*p == L_ESC) { |
|
253 p++; |
|
254 if (match_class(c, uchar(*p))) |
|
255 return sig; |
|
256 } |
|
257 else if ((*(p+1) == '-') && (p+2 < ec)) { |
|
258 p+=2; |
|
259 if (uchar(*(p-2)) <= c && c <= uchar(*p)) |
|
260 return sig; |
|
261 } |
|
262 else if (uchar(*p) == c) return sig; |
|
263 } |
|
264 return !sig; |
|
265 } |
|
266 |
|
267 |
|
268 static int singlematch (int c, const char *p, const char *ep) { |
|
269 switch (*p) { |
|
270 case '.': return 1; /* matches any char */ |
|
271 case L_ESC: return match_class(c, uchar(*(p+1))); |
|
272 case '[': return matchbracketclass(c, p, ep-1); |
|
273 default: return (uchar(*p) == c); |
|
274 } |
|
275 } |
|
276 |
|
277 |
|
278 static const char *match (MatchState *ms, const char *s, const char *p); |
|
279 |
|
280 |
|
281 static const char *matchbalance (MatchState *ms, const char *s, |
|
282 const char *p) { |
|
283 if (*p == 0 || *(p+1) == 0) |
|
284 luaL_error(ms->L, "unbalanced pattern"); |
|
285 if (*s != *p) return NULL; |
|
286 else { |
|
287 int b = *p; |
|
288 int e = *(p+1); |
|
289 int cont = 1; |
|
290 while (++s < ms->src_end) { |
|
291 if (*s == e) { |
|
292 if (--cont == 0) return s+1; |
|
293 } |
|
294 else if (*s == b) cont++; |
|
295 } |
|
296 } |
|
297 return NULL; /* string ends out of balance */ |
|
298 } |
|
299 |
|
300 |
|
301 static const char *max_expand (MatchState *ms, const char *s, |
|
302 const char *p, const char *ep) { |
|
303 ptrdiff_t i = 0; /* counts maximum expand for item */ |
|
304 while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep)) |
|
305 i++; |
|
306 /* keeps trying to match with the maximum repetitions */ |
|
307 while (i>=0) { |
|
308 const char *res = match(ms, (s+i), ep+1); |
|
309 if (res) return res; |
|
310 i--; /* else didn't match; reduce 1 repetition to try again */ |
|
311 } |
|
312 return NULL; |
|
313 } |
|
314 |
|
315 |
|
316 static const char *min_expand (MatchState *ms, const char *s, |
|
317 const char *p, const char *ep) { |
|
318 for (;;) { |
|
319 const char *res = match(ms, s, ep+1); |
|
320 if (res != NULL) |
|
321 return res; |
|
322 else if (s<ms->src_end && singlematch(uchar(*s), p, ep)) |
|
323 s++; /* try with one more repetition */ |
|
324 else return NULL; |
|
325 } |
|
326 } |
|
327 |
|
328 |
|
329 static const char *start_capture (MatchState *ms, const char *s, |
|
330 const char *p, int what) { |
|
331 const char *res; |
|
332 int level = ms->level; |
|
333 if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); |
|
334 ms->capture[level].init = s; |
|
335 ms->capture[level].len = what; |
|
336 ms->level = level+1; |
|
337 if ((res=match(ms, s, p)) == NULL) /* match failed? */ |
|
338 ms->level--; /* undo capture */ |
|
339 return res; |
|
340 } |
|
341 |
|
342 |
|
343 static const char *end_capture (MatchState *ms, const char *s, |
|
344 const char *p) { |
|
345 int l = capture_to_close(ms); |
|
346 const char *res; |
|
347 ms->capture[l].len = s - ms->capture[l].init; /* close capture */ |
|
348 if ((res = match(ms, s, p)) == NULL) /* match failed? */ |
|
349 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ |
|
350 return res; |
|
351 } |
|
352 |
|
353 |
|
354 static const char *match_capture (MatchState *ms, const char *s, int l) { |
|
355 size_t len; |
|
356 l = check_capture(ms, l); |
|
357 len = ms->capture[l].len; |
|
358 if ((size_t)(ms->src_end-s) >= len && |
|
359 memcmp(ms->capture[l].init, s, len) == 0) |
|
360 return s+len; |
|
361 else return NULL; |
|
362 } |
|
363 |
|
364 |
|
365 static const char *match (MatchState *ms, const char *s, const char *p) { |
|
366 init: /* using goto's to optimize tail recursion */ |
|
367 switch (*p) { |
|
368 case '(': { /* start capture */ |
|
369 if (*(p+1) == ')') /* position capture? */ |
|
370 return start_capture(ms, s, p+2, CAP_POSITION); |
|
371 else |
|
372 return start_capture(ms, s, p+1, CAP_UNFINISHED); |
|
373 } |
|
374 case ')': { /* end capture */ |
|
375 return end_capture(ms, s, p+1); |
|
376 } |
|
377 case L_ESC: { |
|
378 switch (*(p+1)) { |
|
379 case 'b': { /* balanced string? */ |
|
380 s = matchbalance(ms, s, p+2); |
|
381 if (s == NULL) return NULL; |
|
382 p+=4; goto init; /* else return match(ms, s, p+4); */ |
|
383 } |
|
384 case 'f': { /* frontier? */ |
|
385 const char *ep; char previous; |
|
386 p += 2; |
|
387 if (*p != '[') |
|
388 luaL_error(ms->L, "missing " LUA_QL("[") " after " |
|
389 LUA_QL("%%f") " in pattern"); |
|
390 ep = classend(ms, p); /* points to what is next */ |
|
391 previous = (s == ms->src_init) ? '\0' : *(s-1); |
|
392 if (matchbracketclass(uchar(previous), p, ep-1) || |
|
393 !matchbracketclass(uchar(*s), p, ep-1)) return NULL; |
|
394 p=ep; goto init; /* else return match(ms, s, ep); */ |
|
395 } |
|
396 default: { |
|
397 if (isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */ |
|
398 s = match_capture(ms, s, uchar(*(p+1))); |
|
399 if (s == NULL) return NULL; |
|
400 p+=2; goto init; /* else return match(ms, s, p+2) */ |
|
401 } |
|
402 goto dflt; /* case default */ |
|
403 } |
|
404 } |
|
405 } |
|
406 case '\0': { /* end of pattern */ |
|
407 return s; /* match succeeded */ |
|
408 } |
|
409 case '$': { |
|
410 if (*(p+1) == '\0') /* is the `$' the last char in pattern? */ |
|
411 return (s == ms->src_end) ? s : NULL; /* check end of string */ |
|
412 else goto dflt; |
|
413 } |
|
414 default: dflt: { /* it is a pattern item */ |
|
415 const char *ep = classend(ms, p); /* points to what is next */ |
|
416 int m = s<ms->src_end && singlematch(uchar(*s), p, ep); |
|
417 switch (*ep) { |
|
418 case '?': { /* optional */ |
|
419 const char *res; |
|
420 if (m && ((res=match(ms, s+1, ep+1)) != NULL)) |
|
421 return res; |
|
422 p=ep+1; goto init; /* else return match(ms, s, ep+1); */ |
|
423 } |
|
424 case '*': { /* 0 or more repetitions */ |
|
425 return max_expand(ms, s, p, ep); |
|
426 } |
|
427 case '+': { /* 1 or more repetitions */ |
|
428 return (m ? max_expand(ms, s+1, p, ep) : NULL); |
|
429 } |
|
430 case '-': { /* 0 or more repetitions (minimum) */ |
|
431 return min_expand(ms, s, p, ep); |
|
432 } |
|
433 default: { |
|
434 if (!m) return NULL; |
|
435 s++; p=ep; goto init; /* else return match(ms, s+1, ep); */ |
|
436 } |
|
437 } |
|
438 } |
|
439 } |
|
440 } |
|
441 |
|
442 |
|
443 |
|
444 static const char *lmemfind (const char *s1, size_t l1, |
|
445 const char *s2, size_t l2) { |
|
446 if (l2 == 0) return s1; /* empty strings are everywhere */ |
|
447 else if (l2 > l1) return NULL; /* avoids a negative `l1' */ |
|
448 else { |
|
449 const char *init; /* to search for a `*s2' inside `s1' */ |
|
450 l2--; /* 1st char will be checked by `memchr' */ |
|
451 l1 = l1-l2; /* `s2' cannot be found after that */ |
|
452 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { |
|
453 init++; /* 1st char is already checked */ |
|
454 if (memcmp(init, s2+1, l2) == 0) |
|
455 return init-1; |
|
456 else { /* correct `l1' and `s1' to try again */ |
|
457 l1 -= init-s1; |
|
458 s1 = init; |
|
459 } |
|
460 } |
|
461 return NULL; /* not found */ |
|
462 } |
|
463 } |
|
464 |
|
465 |
|
466 static void push_onecapture (MatchState *ms, int i, const char *s, |
|
467 const char *e) { |
|
468 if (i >= ms->level) { |
|
469 if (i == 0) /* ms->level == 0, too */ |
|
470 lua_pushlstring(ms->L, s, e - s); /* add whole match */ |
|
471 else |
|
472 luaL_error(ms->L, "invalid capture index"); |
|
473 } |
|
474 else { |
|
475 ptrdiff_t l = ms->capture[i].len; |
|
476 if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); |
|
477 if (l == CAP_POSITION) |
|
478 lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); |
|
479 else |
|
480 lua_pushlstring(ms->L, ms->capture[i].init, l); |
|
481 } |
|
482 } |
|
483 |
|
484 |
|
485 static int push_captures (MatchState *ms, const char *s, const char *e) { |
|
486 int i; |
|
487 int nlevels = (ms->level == 0 && s) ? 1 : ms->level; |
|
488 luaL_checkstack(ms->L, nlevels, "too many captures"); |
|
489 for (i = 0; i < nlevels; i++) |
|
490 push_onecapture(ms, i, s, e); |
|
491 return nlevels; /* number of strings pushed */ |
|
492 } |
|
493 |
|
494 |
|
495 static int str_find_aux (lua_State *L, int find) { |
|
496 size_t l1, l2; |
|
497 const char *s = luaL_checklstring(L, 1, &l1); |
|
498 const char *p = luaL_checklstring(L, 2, &l2); |
|
499 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; |
|
500 if (init < 0) init = 0; |
|
501 else if ((size_t)(init) > l1) init = (ptrdiff_t)l1; |
|
502 if (find && (lua_toboolean(L, 4) || /* explicit request? */ |
|
503 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ |
|
504 /* do a plain search */ |
|
505 const char *s2 = lmemfind(s+init, l1-init, p, l2); |
|
506 if (s2) { |
|
507 lua_pushinteger(L, s2-s+1); |
|
508 lua_pushinteger(L, s2-s+l2); |
|
509 return 2; |
|
510 } |
|
511 } |
|
512 else { |
|
513 MatchState ms; |
|
514 int anchor = (*p == '^') ? (p++, 1) : 0; |
|
515 const char *s1=s+init; |
|
516 ms.L = L; |
|
517 ms.src_init = s; |
|
518 ms.src_end = s+l1; |
|
519 do { |
|
520 const char *res; |
|
521 ms.level = 0; |
|
522 if ((res=match(&ms, s1, p)) != NULL) { |
|
523 if (find) { |
|
524 lua_pushinteger(L, s1-s+1); /* start */ |
|
525 lua_pushinteger(L, res-s); /* end */ |
|
526 return push_captures(&ms, NULL, 0) + 2; |
|
527 } |
|
528 else |
|
529 return push_captures(&ms, s1, res); |
|
530 } |
|
531 } while (s1++ < ms.src_end && !anchor); |
|
532 } |
|
533 lua_pushnil(L); /* not found */ |
|
534 return 1; |
|
535 } |
|
536 |
|
537 |
|
538 static int str_find (lua_State *L) { |
|
539 return str_find_aux(L, 1); |
|
540 } |
|
541 |
|
542 |
|
543 static int str_match (lua_State *L) { |
|
544 return str_find_aux(L, 0); |
|
545 } |
|
546 |
|
547 |
|
548 static int gmatch_aux (lua_State *L) { |
|
549 MatchState ms; |
|
550 size_t ls; |
|
551 const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); |
|
552 const char *p = lua_tostring(L, lua_upvalueindex(2)); |
|
553 const char *src; |
|
554 ms.L = L; |
|
555 ms.src_init = s; |
|
556 ms.src_end = s+ls; |
|
557 for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); |
|
558 src <= ms.src_end; |
|
559 src++) { |
|
560 const char *e; |
|
561 ms.level = 0; |
|
562 if ((e = match(&ms, src, p)) != NULL) { |
|
563 lua_Integer newstart = e-s; |
|
564 if (e == src) newstart++; /* empty match? go at least one position */ |
|
565 lua_pushinteger(L, newstart); |
|
566 lua_replace(L, lua_upvalueindex(3)); |
|
567 return push_captures(&ms, src, e); |
|
568 } |
|
569 } |
|
570 return 0; /* not found */ |
|
571 } |
|
572 |
|
573 |
|
574 static int gmatch (lua_State *L) { |
|
575 luaL_checkstring(L, 1); |
|
576 luaL_checkstring(L, 2); |
|
577 lua_settop(L, 2); |
|
578 lua_pushinteger(L, 0); |
|
579 lua_pushcclosure(L, gmatch_aux, 3); |
|
580 return 1; |
|
581 } |
|
582 |
|
583 |
|
584 static int gfind_nodef (lua_State *L) { |
|
585 return luaL_error(L, LUA_QL("string.gfind") " was renamed to " |
|
586 LUA_QL("string.gmatch")); |
|
587 } |
|
588 |
|
589 |
|
590 static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, |
|
591 const char *e) { |
|
592 size_t l, i; |
|
593 const char *news = lua_tolstring(ms->L, 3, &l); |
|
594 for (i = 0; i < l; i++) { |
|
595 if (news[i] != L_ESC) |
|
596 luaL_addchar(b, news[i]); |
|
597 else { |
|
598 i++; /* skip ESC */ |
|
599 if (!isdigit(uchar(news[i]))) |
|
600 luaL_addchar(b, news[i]); |
|
601 else if (news[i] == '0') |
|
602 luaL_addlstring(b, s, e - s); |
|
603 else { |
|
604 push_onecapture(ms, news[i] - '1', s, e); |
|
605 luaL_addvalue(b); /* add capture to accumulated result */ |
|
606 } |
|
607 } |
|
608 } |
|
609 } |
|
610 |
|
611 |
|
612 static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, |
|
613 const char *e) { |
|
614 lua_State *L = ms->L; |
|
615 switch (lua_type(L, 3)) { |
|
616 case LUA_TNUMBER: |
|
617 case LUA_TSTRING: { |
|
618 add_s(ms, b, s, e); |
|
619 return; |
|
620 } |
|
621 case LUA_TFUNCTION: { |
|
622 int n; |
|
623 lua_pushvalue(L, 3); |
|
624 n = push_captures(ms, s, e); |
|
625 lua_call(L, n, 1); |
|
626 break; |
|
627 } |
|
628 case LUA_TTABLE: { |
|
629 push_onecapture(ms, 0, s, e); |
|
630 lua_gettable(L, 3); |
|
631 break; |
|
632 } |
|
633 } |
|
634 if (!lua_toboolean(L, -1)) { /* nil or false? */ |
|
635 lua_pop(L, 1); |
|
636 lua_pushlstring(L, s, e - s); /* keep original text */ |
|
637 } |
|
638 else if (!lua_isstring(L, -1)) |
|
639 luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); |
|
640 luaL_addvalue(b); /* add result to accumulator */ |
|
641 } |
|
642 |
|
643 |
|
644 static int str_gsub (lua_State *L) { |
|
645 size_t srcl; |
|
646 const char *src = luaL_checklstring(L, 1, &srcl); |
|
647 const char *p = luaL_checkstring(L, 2); |
|
648 int tr = lua_type(L, 3); |
|
649 int max_s = luaL_optint(L, 4, srcl+1); |
|
650 int anchor = (*p == '^') ? (p++, 1) : 0; |
|
651 int n = 0; |
|
652 MatchState ms; |
|
653 luaL_Buffer b; |
|
654 luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || |
|
655 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, |
|
656 "string/function/table expected"); |
|
657 luaL_buffinit(L, &b); |
|
658 ms.L = L; |
|
659 ms.src_init = src; |
|
660 ms.src_end = src+srcl; |
|
661 while (n < max_s) { |
|
662 const char *e; |
|
663 ms.level = 0; |
|
664 e = match(&ms, src, p); |
|
665 if (e) { |
|
666 n++; |
|
667 add_value(&ms, &b, src, e); |
|
668 } |
|
669 if (e && e>src) /* non empty match? */ |
|
670 src = e; /* skip it */ |
|
671 else if (src < ms.src_end) |
|
672 luaL_addchar(&b, *src++); |
|
673 else break; |
|
674 if (anchor) break; |
|
675 } |
|
676 luaL_addlstring(&b, src, ms.src_end-src); |
|
677 luaL_pushresult(&b); |
|
678 lua_pushinteger(L, n); /* number of substitutions */ |
|
679 return 2; |
|
680 } |
|
681 |
|
682 /* }====================================================== */ |
|
683 |
|
684 |
|
685 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ |
|
686 #define MAX_ITEM 512 |
|
687 /* valid flags in a format specification */ |
|
688 #define FLAGS "-+ #0" |
|
689 /* |
|
690 ** maximum size of each format specification (such as '%-099.99d') |
|
691 ** (+10 accounts for %99.99x plus margin of error) |
|
692 */ |
|
693 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10) |
|
694 |
|
695 |
|
696 static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { |
|
697 size_t l; |
|
698 const char *s = luaL_checklstring(L, arg, &l); |
|
699 luaL_addchar(b, '"'); |
|
700 while (l--) { |
|
701 switch (*s) { |
|
702 case '"': case '\\': case '\n': { |
|
703 luaL_addchar(b, '\\'); |
|
704 luaL_addchar(b, *s); |
|
705 break; |
|
706 } |
|
707 case '\r': { |
|
708 luaL_addlstring(b, "\\r", 2); |
|
709 break; |
|
710 } |
|
711 case '\0': { |
|
712 luaL_addlstring(b, "\\000", 4); |
|
713 break; |
|
714 } |
|
715 default: { |
|
716 luaL_addchar(b, *s); |
|
717 break; |
|
718 } |
|
719 } |
|
720 s++; |
|
721 } |
|
722 luaL_addchar(b, '"'); |
|
723 } |
|
724 |
|
725 static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { |
|
726 const char *p = strfrmt; |
|
727 while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */ |
|
728 if ((size_t)(p - strfrmt) >= sizeof(FLAGS)) |
|
729 luaL_error(L, "invalid format (repeated flags)"); |
|
730 if (isdigit(uchar(*p))) p++; /* skip width */ |
|
731 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ |
|
732 if (*p == '.') { |
|
733 p++; |
|
734 if (isdigit(uchar(*p))) p++; /* skip precision */ |
|
735 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ |
|
736 } |
|
737 if (isdigit(uchar(*p))) |
|
738 luaL_error(L, "invalid format (width or precision too long)"); |
|
739 *(form++) = '%'; |
|
740 strncpy(form, strfrmt, p - strfrmt + 1); |
|
741 form += p - strfrmt + 1; |
|
742 *form = '\0'; |
|
743 return p; |
|
744 } |
|
745 |
|
746 |
|
747 static void addintlen (char *form) { |
|
748 size_t l = strlen(form); |
|
749 char spec = form[l - 1]; |
|
750 strcpy(form + l - 1, LUA_INTFRMLEN); |
|
751 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; |
|
752 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; |
|
753 } |
|
754 |
|
755 |
|
756 static int str_format (lua_State *L) { |
|
757 int arg = 1; |
|
758 size_t sfl; |
|
759 const char *strfrmt = luaL_checklstring(L, arg, &sfl); |
|
760 const char *strfrmt_end = strfrmt+sfl; |
|
761 luaL_Buffer b; |
|
762 luaL_buffinit(L, &b); |
|
763 while (strfrmt < strfrmt_end) { |
|
764 if (*strfrmt != L_ESC) |
|
765 luaL_addchar(&b, *strfrmt++); |
|
766 else if (*++strfrmt == L_ESC) |
|
767 luaL_addchar(&b, *strfrmt++); /* %% */ |
|
768 else { /* format item */ |
|
769 char form[MAX_FORMAT]; /* to store the format (`%...') */ |
|
770 char buff[MAX_ITEM]; /* to store the formatted item */ |
|
771 arg++; |
|
772 strfrmt = scanformat(L, strfrmt, form); |
|
773 switch (*strfrmt++) { |
|
774 case 'c': { |
|
775 sprintf(buff, form, (int)luaL_checknumber(L, arg)); |
|
776 break; |
|
777 } |
|
778 case 'd': case 'i': { |
|
779 addintlen(form); |
|
780 sprintf(buff, form, (LUA_INTFRM_T)luaL_checknumber(L, arg)); |
|
781 break; |
|
782 } |
|
783 case 'o': case 'u': case 'x': case 'X': { |
|
784 addintlen(form); |
|
785 sprintf(buff, form, (unsigned LUA_INTFRM_T)luaL_checknumber(L, arg)); |
|
786 break; |
|
787 } |
|
788 case 'e': case 'E': case 'f': |
|
789 case 'g': case 'G': { |
|
790 sprintf(buff, form, (double)luaL_checknumber(L, arg)); |
|
791 break; |
|
792 } |
|
793 case 'q': { |
|
794 addquoted(L, &b, arg); |
|
795 continue; /* skip the 'addsize' at the end */ |
|
796 } |
|
797 case 's': { |
|
798 size_t l; |
|
799 const char *s = luaL_checklstring(L, arg, &l); |
|
800 if (!strchr(form, '.') && l >= 100) { |
|
801 /* no precision and string is too long to be formatted; |
|
802 keep original string */ |
|
803 lua_pushvalue(L, arg); |
|
804 luaL_addvalue(&b); |
|
805 continue; /* skip the `addsize' at the end */ |
|
806 } |
|
807 else { |
|
808 sprintf(buff, form, s); |
|
809 break; |
|
810 } |
|
811 } |
|
812 default: { /* also treat cases `pnLlh' */ |
|
813 return luaL_error(L, "invalid option " LUA_QL("%%%c") " to " |
|
814 LUA_QL("format"), *(strfrmt - 1)); |
|
815 } |
|
816 } |
|
817 luaL_addlstring(&b, buff, strlen(buff)); |
|
818 } |
|
819 } |
|
820 luaL_pushresult(&b); |
|
821 return 1; |
|
822 } |
|
823 |
|
824 |
|
825 static const luaL_Reg strlib[] = { |
|
826 {"byte", str_byte}, |
|
827 {"char", str_char}, |
|
828 {"dump", str_dump}, |
|
829 {"find", str_find}, |
|
830 {"format", str_format}, |
|
831 {"gfind", gfind_nodef}, |
|
832 {"gmatch", gmatch}, |
|
833 {"gsub", str_gsub}, |
|
834 {"len", str_len}, |
|
835 {"lower", str_lower}, |
|
836 {"match", str_match}, |
|
837 {"rep", str_rep}, |
|
838 {"reverse", str_reverse}, |
|
839 {"sub", str_sub}, |
|
840 {"upper", str_upper}, |
|
841 {NULL, NULL} |
|
842 }; |
|
843 |
|
844 |
|
845 static void createmetatable (lua_State *L) { |
|
846 lua_createtable(L, 0, 1); /* create metatable for strings */ |
|
847 lua_pushliteral(L, ""); /* dummy string */ |
|
848 lua_pushvalue(L, -2); |
|
849 lua_setmetatable(L, -2); /* set string metatable */ |
|
850 lua_pop(L, 1); /* pop dummy string */ |
|
851 lua_pushvalue(L, -2); /* string library... */ |
|
852 lua_setfield(L, -2, "__index"); /* ...is the __index metamethod */ |
|
853 lua_pop(L, 1); /* pop metatable */ |
|
854 } |
|
855 |
|
856 |
|
857 /* |
|
858 ** Open string library |
|
859 */ |
|
860 LUALIB_API int luaopen_string (lua_State *L) { |
|
861 luaL_register(L, LUA_STRLIBNAME, strlib); |
|
862 #if defined(LUA_COMPAT_GFIND) |
|
863 lua_getfield(L, -1, "gmatch"); |
|
864 lua_setfield(L, -2, "gfind"); |
|
865 #endif |
|
866 createmetatable(L); |
|
867 return 1; |
|
868 } |
|
869 |