2 ** $Id: lstrlib.c,v 1.132.1.4 2008/07/11 17:27:21 roberto Exp $
3 ** Standard library for string operations and pattern-matching
4 ** See Copyright Notice in lua.h
24 /* macro to `unsign' a character */
25 #define uchar(c) ((unsigned char)(c))
29 static int str_len (lua_State
*L
) {
31 luaL_checklstring(L
, 1, &l
);
32 lua_pushinteger(L
, l
);
37 static ptrdiff_t posrelat (ptrdiff_t pos
, size_t len
) {
38 /* relative string position: negative means back from end */
39 if (pos
< 0) pos
+= (ptrdiff_t)len
+ 1;
40 return (pos
>= 0) ? pos
: 0;
44 static int str_sub (lua_State
*L
) {
46 const char *s
= luaL_checklstring(L
, 1, &l
);
47 ptrdiff_t start
= posrelat(luaL_checkinteger(L
, 2), l
);
48 ptrdiff_t end
= posrelat(luaL_optinteger(L
, 3, -1), l
);
49 if (start
< 1) start
= 1;
50 if (end
> (ptrdiff_t)l
) end
= (ptrdiff_t)l
;
52 lua_pushlstring(L
, s
+start
-1, end
-start
+1);
53 else lua_pushliteral(L
, "");
58 static int str_reverse (lua_State
*L
) {
61 const char *s
= luaL_checklstring(L
, 1, &l
);
63 while (l
--) luaL_addchar(&b
, s
[l
]);
69 static int str_lower (lua_State
*L
) {
73 const char *s
= luaL_checklstring(L
, 1, &l
);
76 luaL_addchar(&b
, tolower(uchar(s
[i
])));
82 static int str_upper (lua_State
*L
) {
86 const char *s
= luaL_checklstring(L
, 1, &l
);
89 luaL_addchar(&b
, toupper(uchar(s
[i
])));
94 static int str_rep (lua_State
*L
) {
97 const char *s
= luaL_checklstring(L
, 1, &l
);
98 int n
= luaL_checkint(L
, 2);
101 luaL_addlstring(&b
, s
, l
);
107 static int str_byte (lua_State
*L
) {
109 const char *s
= luaL_checklstring(L
, 1, &l
);
110 ptrdiff_t posi
= posrelat(luaL_optinteger(L
, 2, 1), l
);
111 ptrdiff_t pose
= posrelat(luaL_optinteger(L
, 3, posi
), l
);
113 if (posi
<= 0) posi
= 1;
114 if ((size_t)pose
> l
) pose
= l
;
115 if (posi
> pose
) return 0; /* empty interval; return no values */
116 n
= (int)(pose
- posi
+ 1);
117 if (posi
+ n
<= pose
) /* overflow? */
118 luaL_error(L
, "string slice too long");
119 luaL_checkstack(L
, n
, "string slice too long");
121 lua_pushinteger(L
, uchar(s
[posi
+i
-1]));
126 static int str_char (lua_State
*L
) {
127 int n
= lua_gettop(L
); /* number of arguments */
130 luaL_buffinit(L
, &b
);
131 for (i
=1; i
<=n
; i
++) {
132 int c
= luaL_checkint(L
, i
);
133 luaL_argcheck(L
, uchar(c
) == c
, i
, "invalid value");
134 luaL_addchar(&b
, uchar(c
));
141 static int writer (lua_State
*L
, const void* b
, size_t size
, void* B
) {
143 luaL_addlstring((luaL_Buffer
*) B
, (const char *)b
, size
);
148 static int str_dump (lua_State
*L
) {
150 luaL_checktype(L
, 1, LUA_TFUNCTION
);
153 if (lua_dump(L
, writer
, &b
) != 0)
154 luaL_error(L
, "unable to dump given function");
162 ** {======================================================
164 ** =======================================================
168 #define CAP_UNFINISHED (-1)
169 #define CAP_POSITION (-2)
171 typedef struct MatchState
{
172 const char *src_init
; /* init of source string */
173 const char *src_end
; /* end (`\0') of source string */
175 int level
; /* total number of captures (finished or unfinished) */
179 } capture
[LUA_MAXCAPTURES
];
184 #define SPECIALS "^$*+?.([%-"
187 static int check_capture (MatchState
*ms
, int l
) {
189 if (l
< 0 || l
>= ms
->level
|| ms
->capture
[l
].len
== CAP_UNFINISHED
)
190 return luaL_error(ms
->L
, "invalid capture index");
195 static int capture_to_close (MatchState
*ms
) {
196 int level
= ms
->level
;
197 for (level
--; level
>=0; level
--)
198 if (ms
->capture
[level
].len
== CAP_UNFINISHED
) return level
;
199 return luaL_error(ms
->L
, "invalid pattern capture");
203 static const char *classend (MatchState
*ms
, const char *p
) {
207 luaL_error(ms
->L
, "malformed pattern (ends with " LUA_QL("%%") ")");
212 do { /* look for a `]' */
214 luaL_error(ms
->L
, "malformed pattern (missing " LUA_QL("]") ")");
215 if (*(p
++) == L_ESC
&& *p
!= '\0')
216 p
++; /* skip escapes (e.g. `%]') */
227 static int match_class (int c
, int cl
) {
229 switch (tolower(cl
)) {
230 case 'a' : res
= isalpha(c
); break;
231 case 'c' : res
= iscntrl(c
); break;
232 case 'd' : res
= isdigit(c
); break;
233 case 'l' : res
= islower(c
); break;
234 case 'p' : res
= ispunct(c
); break;
235 case 's' : res
= isspace(c
); break;
236 case 'u' : res
= isupper(c
); break;
237 case 'w' : res
= isalnum(c
); break;
238 case 'x' : res
= isxdigit(c
); break;
239 case 'z' : res
= (c
== 0); break;
240 default: return (cl
== c
);
242 return (islower(cl
) ? res
: !res
);
246 static int matchbracketclass (int c
, const char *p
, const char *ec
) {
250 p
++; /* skip the `^' */
255 if (match_class(c
, uchar(*p
)))
258 else if ((*(p
+1) == '-') && (p
+2 < ec
)) {
260 if (uchar(*(p
-2)) <= c
&& c
<= uchar(*p
))
263 else if (uchar(*p
) == c
) return sig
;
269 static int singlematch (int c
, const char *p
, const char *ep
) {
271 case '.': return 1; /* matches any char */
272 case L_ESC
: return match_class(c
, uchar(*(p
+1)));
273 case '[': return matchbracketclass(c
, p
, ep
-1);
274 default: return (uchar(*p
) == c
);
279 static const char *match (MatchState
*ms
, const char *s
, const char *p
);
282 static const char *matchbalance (MatchState
*ms
, const char *s
,
284 if (*p
== 0 || *(p
+1) == 0)
285 luaL_error(ms
->L
, "unbalanced pattern");
286 if (*s
!= *p
) return NULL
;
291 while (++s
< ms
->src_end
) {
293 if (--cont
== 0) return s
+1;
295 else if (*s
== b
) cont
++;
298 return NULL
; /* string ends out of balance */
302 static const char *max_expand (MatchState
*ms
, const char *s
,
303 const char *p
, const char *ep
) {
304 ptrdiff_t i
= 0; /* counts maximum expand for item */
305 while ((s
+i
)<ms
->src_end
&& singlematch(uchar(*(s
+i
)), p
, ep
))
307 /* keeps trying to match with the maximum repetitions */
309 const char *res
= match(ms
, (s
+i
), ep
+1);
311 i
--; /* else didn't match; reduce 1 repetition to try again */
317 static const char *min_expand (MatchState
*ms
, const char *s
,
318 const char *p
, const char *ep
) {
320 const char *res
= match(ms
, s
, ep
+1);
323 else if (s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
))
324 s
++; /* try with one more repetition */
330 static const char *start_capture (MatchState
*ms
, const char *s
,
331 const char *p
, int what
) {
333 int level
= ms
->level
;
334 if (level
>= LUA_MAXCAPTURES
) luaL_error(ms
->L
, "too many captures");
335 ms
->capture
[level
].init
= s
;
336 ms
->capture
[level
].len
= what
;
338 if ((res
=match(ms
, s
, p
)) == NULL
) /* match failed? */
339 ms
->level
--; /* undo capture */
344 static const char *end_capture (MatchState
*ms
, const char *s
,
346 int l
= capture_to_close(ms
);
348 ms
->capture
[l
].len
= s
- ms
->capture
[l
].init
; /* close capture */
349 if ((res
= match(ms
, s
, p
)) == NULL
) /* match failed? */
350 ms
->capture
[l
].len
= CAP_UNFINISHED
; /* undo capture */
355 static const char *match_capture (MatchState
*ms
, const char *s
, int l
) {
357 l
= check_capture(ms
, l
);
358 len
= ms
->capture
[l
].len
;
359 if ((size_t)(ms
->src_end
-s
) >= len
&&
360 memcmp(ms
->capture
[l
].init
, s
, len
) == 0)
366 static const char *match (MatchState
*ms
, const char *s
, const char *p
) {
367 init
: /* using goto's to optimize tail recursion */
369 case '(': { /* start capture */
370 if (*(p
+1) == ')') /* position capture? */
371 return start_capture(ms
, s
, p
+2, CAP_POSITION
);
373 return start_capture(ms
, s
, p
+1, CAP_UNFINISHED
);
375 case ')': { /* end capture */
376 return end_capture(ms
, s
, p
+1);
380 case 'b': { /* balanced string? */
381 s
= matchbalance(ms
, s
, p
+2);
382 if (s
== NULL
) return NULL
;
383 p
+=4; goto init
; /* else return match(ms, s, p+4); */
385 case 'f': { /* frontier? */
386 const char *ep
; char previous
;
389 luaL_error(ms
->L
, "missing " LUA_QL("[") " after "
390 LUA_QL("%%f") " in pattern");
391 ep
= classend(ms
, p
); /* points to what is next */
392 previous
= (s
== ms
->src_init
) ? '\0' : *(s
-1);
393 if (matchbracketclass(uchar(previous
), p
, ep
-1) ||
394 !matchbracketclass(uchar(*s
), p
, ep
-1)) return NULL
;
395 p
=ep
; goto init
; /* else return match(ms, s, ep); */
398 if (isdigit(uchar(*(p
+1)))) { /* capture results (%0-%9)? */
399 s
= match_capture(ms
, s
, uchar(*(p
+1)));
400 if (s
== NULL
) return NULL
;
401 p
+=2; goto init
; /* else return match(ms, s, p+2) */
403 goto dflt
; /* case default */
407 case '\0': { /* end of pattern */
408 return s
; /* match succeeded */
411 if (*(p
+1) == '\0') /* is the `$' the last char in pattern? */
412 return (s
== ms
->src_end
) ? s
: NULL
; /* check end of string */
415 default: dflt
: { /* it is a pattern item */
416 const char *ep
= classend(ms
, p
); /* points to what is next */
417 int m
= s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
);
419 case '?': { /* optional */
421 if (m
&& ((res
=match(ms
, s
+1, ep
+1)) != NULL
))
423 p
=ep
+1; goto init
; /* else return match(ms, s, ep+1); */
425 case '*': { /* 0 or more repetitions */
426 return max_expand(ms
, s
, p
, ep
);
428 case '+': { /* 1 or more repetitions */
429 return (m
? max_expand(ms
, s
+1, p
, ep
) : NULL
);
431 case '-': { /* 0 or more repetitions (minimum) */
432 return min_expand(ms
, s
, p
, ep
);
436 s
++; p
=ep
; goto init
; /* else return match(ms, s+1, ep); */
445 static const char *lmemfind (const char *s1
, size_t l1
,
446 const char *s2
, size_t l2
) {
447 if (l2
== 0) return s1
; /* empty strings are everywhere */
448 else if (l2
> l1
) return NULL
; /* avoids a negative `l1' */
450 const char *init
; /* to search for a `*s2' inside `s1' */
451 l2
--; /* 1st char will be checked by `memchr' */
452 l1
= l1
-l2
; /* `s2' cannot be found after that */
453 while (l1
> 0 && (init
= (const char *)memchr(s1
, *s2
, l1
)) != NULL
) {
454 init
++; /* 1st char is already checked */
455 if (memcmp(init
, s2
+1, l2
) == 0)
457 else { /* correct `l1' and `s1' to try again */
462 return NULL
; /* not found */
467 static void push_onecapture (MatchState
*ms
, int i
, const char *s
,
469 if (i
>= ms
->level
) {
470 if (i
== 0) /* ms->level == 0, too */
471 lua_pushlstring(ms
->L
, s
, e
- s
); /* add whole match */
473 luaL_error(ms
->L
, "invalid capture index");
476 ptrdiff_t l
= ms
->capture
[i
].len
;
477 if (l
== CAP_UNFINISHED
) luaL_error(ms
->L
, "unfinished capture");
478 if (l
== CAP_POSITION
)
479 lua_pushinteger(ms
->L
, ms
->capture
[i
].init
- ms
->src_init
+ 1);
481 lua_pushlstring(ms
->L
, ms
->capture
[i
].init
, l
);
486 static int push_captures (MatchState
*ms
, const char *s
, const char *e
) {
488 int nlevels
= (ms
->level
== 0 && s
) ? 1 : ms
->level
;
489 luaL_checkstack(ms
->L
, nlevels
, "too many captures");
490 for (i
= 0; i
< nlevels
; i
++)
491 push_onecapture(ms
, i
, s
, e
);
492 return nlevels
; /* number of strings pushed */
496 static int str_find_aux (lua_State
*L
, int find
) {
498 const char *s
= luaL_checklstring(L
, 1, &l1
);
499 const char *p
= luaL_checklstring(L
, 2, &l2
);
500 ptrdiff_t init
= posrelat(luaL_optinteger(L
, 3, 1), l1
) - 1;
501 if (init
< 0) init
= 0;
502 else if ((size_t)(init
) > l1
) init
= (ptrdiff_t)l1
;
503 if (find
&& (lua_toboolean(L
, 4) || /* explicit request? */
504 strpbrk(p
, SPECIALS
) == NULL
)) { /* or no special characters? */
505 /* do a plain search */
506 const char *s2
= lmemfind(s
+init
, l1
-init
, p
, l2
);
508 lua_pushinteger(L
, s2
-s
+1);
509 lua_pushinteger(L
, s2
-s
+l2
);
515 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
516 const char *s1
=s
+init
;
523 if ((res
=match(&ms
, s1
, p
)) != NULL
) {
525 lua_pushinteger(L
, s1
-s
+1); /* start */
526 lua_pushinteger(L
, res
-s
); /* end */
527 return push_captures(&ms
, NULL
, 0) + 2;
530 return push_captures(&ms
, s1
, res
);
532 } while (s1
++ < ms
.src_end
&& !anchor
);
534 lua_pushnil(L
); /* not found */
539 static int str_find (lua_State
*L
) {
540 return str_find_aux(L
, 1);
544 static int str_match (lua_State
*L
) {
545 return str_find_aux(L
, 0);
549 static int gmatch_aux (lua_State
*L
) {
552 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
553 const char *p
= lua_tostring(L
, lua_upvalueindex(2));
558 for (src
= s
+ (size_t)lua_tointeger(L
, lua_upvalueindex(3));
563 if ((e
= match(&ms
, src
, p
)) != NULL
) {
564 lua_Integer newstart
= e
-s
;
565 if (e
== src
) newstart
++; /* empty match? go at least one position */
566 lua_pushinteger(L
, newstart
);
567 lua_replace(L
, lua_upvalueindex(3));
568 return push_captures(&ms
, src
, e
);
571 return 0; /* not found */
575 static int gmatch (lua_State
*L
) {
576 luaL_checkstring(L
, 1);
577 luaL_checkstring(L
, 2);
579 lua_pushinteger(L
, 0);
580 lua_pushcclosure(L
, gmatch_aux
, 3);
585 static int gfind_nodef (lua_State
*L
) {
586 return luaL_error(L
, LUA_QL("string.gfind") " was renamed to "
587 LUA_QL("string.gmatch"));
591 static void add_s (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
594 const char *news
= lua_tolstring(ms
->L
, 3, &l
);
595 for (i
= 0; i
< l
; i
++) {
596 if (news
[i
] != L_ESC
)
597 luaL_addchar(b
, news
[i
]);
600 if (!isdigit(uchar(news
[i
])))
601 luaL_addchar(b
, news
[i
]);
602 else if (news
[i
] == '0')
603 luaL_addlstring(b
, s
, e
- s
);
605 push_onecapture(ms
, news
[i
] - '1', s
, e
);
606 luaL_addvalue(b
); /* add capture to accumulated result */
613 static void add_value (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
615 lua_State
*L
= ms
->L
;
616 switch (lua_type(L
, 3)) {
622 case LUA_TFUNCTION
: {
625 n
= push_captures(ms
, s
, e
);
630 push_onecapture(ms
, 0, s
, e
);
635 if (!lua_toboolean(L
, -1)) { /* nil or false? */
637 lua_pushlstring(L
, s
, e
- s
); /* keep original text */
639 else if (!lua_isstring(L
, -1))
640 luaL_error(L
, "invalid replacement value (a %s)", luaL_typename(L
, -1));
641 luaL_addvalue(b
); /* add result to accumulator */
645 static int str_gsub (lua_State
*L
) {
647 const char *src
= luaL_checklstring(L
, 1, &srcl
);
648 const char *p
= luaL_checkstring(L
, 2);
649 int tr
= lua_type(L
, 3);
650 int max_s
= luaL_optint(L
, 4, srcl
+1);
651 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
655 luaL_argcheck(L
, tr
== LUA_TNUMBER
|| tr
== LUA_TSTRING
||
656 tr
== LUA_TFUNCTION
|| tr
== LUA_TTABLE
, 3,
657 "string/function/table expected");
658 luaL_buffinit(L
, &b
);
661 ms
.src_end
= src
+srcl
;
665 e
= match(&ms
, src
, p
);
668 add_value(&ms
, &b
, src
, e
);
670 if (e
&& e
>src
) /* non empty match? */
671 src
= e
; /* skip it */
672 else if (src
< ms
.src_end
)
673 luaL_addchar(&b
, *src
++);
677 luaL_addlstring(&b
, src
, ms
.src_end
-src
);
679 lua_pushinteger(L
, n
); /* number of substitutions */
683 /* }====================================================== */
686 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
688 /* valid flags in a format specification */
689 #define FLAGS "-+ #0"
691 ** maximum size of each format specification (such as '%-099.99d')
692 ** (+10 accounts for %99.99x plus margin of error)
694 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
697 static void addquoted (lua_State
*L
, luaL_Buffer
*b
, int arg
) {
699 const char *s
= luaL_checklstring(L
, arg
, &l
);
700 luaL_addchar(b
, '"');
703 case '"': case '\\': case '\n': {
704 luaL_addchar(b
, '\\');
709 luaL_addlstring(b
, "\\r", 2);
713 luaL_addlstring(b
, "\\000", 4);
723 luaL_addchar(b
, '"');
726 static const char *scanformat (lua_State
*L
, const char *strfrmt
, char *form
) {
727 const char *p
= strfrmt
;
728 while (*p
!= '\0' && strchr(FLAGS
, *p
) != NULL
) p
++; /* skip flags */
729 if ((size_t)(p
- strfrmt
) >= sizeof(FLAGS
))
730 luaL_error(L
, "invalid format (repeated flags)");
731 if (isdigit(uchar(*p
))) p
++; /* skip width */
732 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
735 if (isdigit(uchar(*p
))) p
++; /* skip precision */
736 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
738 if (isdigit(uchar(*p
)))
739 luaL_error(L
, "invalid format (width or precision too long)");
741 strncpy(form
, strfrmt
, p
- strfrmt
+ 1);
742 form
+= p
- strfrmt
+ 1;
748 static void addintlen (char *form
) {
749 size_t l
= strlen(form
);
750 char spec
= form
[l
- 1];
751 strcpy(form
+ l
- 1, LUA_INTFRMLEN
);
752 form
[l
+ sizeof(LUA_INTFRMLEN
) - 2] = spec
;
753 form
[l
+ sizeof(LUA_INTFRMLEN
) - 1] = '\0';
757 static int str_format (lua_State
*L
) {
760 const char *strfrmt
= luaL_checklstring(L
, arg
, &sfl
);
761 const char *strfrmt_end
= strfrmt
+sfl
;
763 luaL_buffinit(L
, &b
);
764 while (strfrmt
< strfrmt_end
) {
765 if (*strfrmt
!= L_ESC
)
766 luaL_addchar(&b
, *strfrmt
++);
767 else if (*++strfrmt
== L_ESC
)
768 luaL_addchar(&b
, *strfrmt
++); /* %% */
769 else { /* format item */
770 char form
[MAX_FORMAT
]; /* to store the format (`%...') */
771 char buff
[MAX_ITEM
]; /* to store the formatted item */
773 strfrmt
= scanformat(L
, strfrmt
, form
);
774 switch (*strfrmt
++) {
776 snprintf(buff
, sizeof (buff
), form
, (int)luaL_checknumber(L
, arg
));
779 case 'd': case 'i': {
781 snprintf(buff
, sizeof (buff
), form
,
782 (LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
785 case 'o': case 'u': case 'x': case 'X': {
787 snprintf(buff
, sizeof (buff
), form
,
788 (unsigned LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
791 case 'e': case 'E': case 'f':
792 case 'g': case 'G': {
793 snprintf(buff
, sizeof (buff
), form
, (double)luaL_checknumber(L
, arg
));
797 addquoted(L
, &b
, arg
);
798 continue; /* skip the 'addsize' at the end */
802 const char *s
= luaL_checklstring(L
, arg
, &l
);
803 if (!strchr(form
, '.') && l
>= 100) {
804 /* no precision and string is too long to be formatted;
805 keep original string */
806 lua_pushvalue(L
, arg
);
808 continue; /* skip the `addsize' at the end */
811 snprintf(buff
, sizeof (buff
), form
, s
);
815 default: { /* also treat cases `pnLlh' */
816 return luaL_error(L
, "invalid option " LUA_QL("%%%c") " to "
817 LUA_QL("format"), *(strfrmt
- 1));
820 luaL_addlstring(&b
, buff
, strlen(buff
));
828 static const luaL_Reg strlib
[] = {
833 {"format", str_format
},
834 {"gfind", gfind_nodef
},
838 {"lower", str_lower
},
839 {"match", str_match
},
841 {"reverse", str_reverse
},
843 {"upper", str_upper
},
848 static void createmetatable (lua_State
*L
) {
849 lua_createtable(L
, 0, 1); /* create metatable for strings */
850 lua_pushliteral(L
, ""); /* dummy string */
851 lua_pushvalue(L
, -2);
852 lua_setmetatable(L
, -2); /* set string metatable */
853 lua_pop(L
, 1); /* pop dummy string */
854 lua_pushvalue(L
, -2); /* string library... */
855 lua_setfield(L
, -2, "__index"); /* ...is the __index metamethod */
856 lua_pop(L
, 1); /* pop metatable */
861 ** Open string library
863 LUALIB_API
int luaopen_string (lua_State
*L
) {
864 luaL_register(L
, LUA_STRLIBNAME
, strlib
);
865 #if defined(LUA_COMPAT_GFIND)
866 lua_getfield(L
, -1, "gmatch");
867 lua_setfield(L
, -2, "gfind");