2 ** $Id: lstrlib.c,v 1.132.1.4 2008/07/11 17:27:21 roberto Exp $
3 ** Standard library for string operations and pattern-matching
4 ** See Copyright Notice in lua.h
23 /* macro to `unsign' a character */
24 #define uchar(c) ((unsigned char)(c))
28 static int str_len (lua_State
*L
) {
30 luaL_checklstring(L
, 1, &l
);
31 lua_pushinteger(L
, l
);
36 static ptrdiff_t posrelat (ptrdiff_t pos
, size_t len
) {
37 /* relative string position: negative means back from end */
38 if (pos
< 0) pos
+= (ptrdiff_t)len
+ 1;
39 return (pos
>= 0) ? pos
: 0;
43 static int str_sub (lua_State
*L
) {
45 const char *s
= luaL_checklstring(L
, 1, &l
);
46 ptrdiff_t start
= posrelat(luaL_checkinteger(L
, 2), l
);
47 ptrdiff_t end
= posrelat(luaL_optinteger(L
, 3, -1), l
);
48 if (start
< 1) start
= 1;
49 if (end
> (ptrdiff_t)l
) end
= (ptrdiff_t)l
;
51 lua_pushlstring(L
, s
+start
-1, end
-start
+1);
52 else lua_pushliteral(L
, "");
57 static int str_reverse (lua_State
*L
) {
60 const char *s
= luaL_checklstring(L
, 1, &l
);
62 while (l
--) luaL_addchar(&b
, s
[l
]);
68 static int str_lower (lua_State
*L
) {
72 const char *s
= luaL_checklstring(L
, 1, &l
);
75 luaL_addchar(&b
, tolower(uchar(s
[i
])));
81 static int str_upper (lua_State
*L
) {
85 const char *s
= luaL_checklstring(L
, 1, &l
);
88 luaL_addchar(&b
, toupper(uchar(s
[i
])));
93 static int str_rep (lua_State
*L
) {
96 const char *s
= luaL_checklstring(L
, 1, &l
);
97 int n
= luaL_checkint(L
, 2);
100 luaL_addlstring(&b
, s
, l
);
106 static int str_byte (lua_State
*L
) {
108 const char *s
= luaL_checklstring(L
, 1, &l
);
109 ptrdiff_t posi
= posrelat(luaL_optinteger(L
, 2, 1), l
);
110 ptrdiff_t pose
= posrelat(luaL_optinteger(L
, 3, posi
), l
);
112 if (posi
<= 0) posi
= 1;
113 if ((size_t)pose
> l
) pose
= l
;
114 if (posi
> pose
) return 0; /* empty interval; return no values */
115 n
= (int)(pose
- posi
+ 1);
116 if (posi
+ n
<= pose
) /* overflow? */
117 luaL_error(L
, "string slice too long");
118 luaL_checkstack(L
, n
, "string slice too long");
120 lua_pushinteger(L
, uchar(s
[posi
+i
-1]));
125 static int str_char (lua_State
*L
) {
126 int n
= lua_gettop(L
); /* number of arguments */
129 luaL_buffinit(L
, &b
);
130 for (i
=1; i
<=n
; i
++) {
131 int c
= luaL_checkint(L
, i
);
132 luaL_argcheck(L
, uchar(c
) == c
, i
, "invalid value");
133 luaL_addchar(&b
, uchar(c
));
140 static int writer (lua_State
*L
, const void* b
, size_t size
, void* B
) {
142 luaL_addlstring((luaL_Buffer
*) B
, (const char *)b
, size
);
147 static int str_dump (lua_State
*L
) {
149 luaL_checktype(L
, 1, LUA_TFUNCTION
);
152 if (lua_dump(L
, writer
, &b
) != 0)
153 luaL_error(L
, "unable to dump given function");
161 ** {======================================================
163 ** =======================================================
167 #define CAP_UNFINISHED (-1)
168 #define CAP_POSITION (-2)
170 typedef struct MatchState
{
171 const char *src_init
; /* init of source string */
172 const char *src_end
; /* end (`\0') of source string */
174 int level
; /* total number of captures (finished or unfinished) */
178 } capture
[LUA_MAXCAPTURES
];
183 #define SPECIALS "^$*+?.([%-"
186 static int check_capture (MatchState
*ms
, int l
) {
188 if (l
< 0 || l
>= ms
->level
|| ms
->capture
[l
].len
== CAP_UNFINISHED
)
189 return luaL_error(ms
->L
, "invalid capture index");
194 static int capture_to_close (MatchState
*ms
) {
195 int level
= ms
->level
;
196 for (level
--; level
>=0; level
--)
197 if (ms
->capture
[level
].len
== CAP_UNFINISHED
) return level
;
198 return luaL_error(ms
->L
, "invalid pattern capture");
202 static const char *classend (MatchState
*ms
, const char *p
) {
206 luaL_error(ms
->L
, "malformed pattern (ends with " LUA_QL("%%") ")");
211 do { /* look for a `]' */
213 luaL_error(ms
->L
, "malformed pattern (missing " LUA_QL("]") ")");
214 if (*(p
++) == L_ESC
&& *p
!= '\0')
215 p
++; /* skip escapes (e.g. `%]') */
226 static int match_class (int c
, int cl
) {
228 switch (tolower(cl
)) {
229 case 'a' : res
= isalpha(c
); break;
230 case 'c' : res
= iscntrl(c
); break;
231 case 'd' : res
= isdigit(c
); break;
232 case 'l' : res
= islower(c
); break;
233 case 'p' : res
= ispunct(c
); break;
234 case 's' : res
= isspace(c
); break;
235 case 'u' : res
= isupper(c
); break;
236 case 'w' : res
= isalnum(c
); break;
237 case 'x' : res
= isxdigit(c
); break;
238 case 'z' : res
= (c
== 0); break;
239 default: return (cl
== c
);
241 return (islower(cl
) ? res
: !res
);
245 static int matchbracketclass (int c
, const char *p
, const char *ec
) {
249 p
++; /* skip the `^' */
254 if (match_class(c
, uchar(*p
)))
257 else if ((*(p
+1) == '-') && (p
+2 < ec
)) {
259 if (uchar(*(p
-2)) <= c
&& c
<= uchar(*p
))
262 else if (uchar(*p
) == c
) return sig
;
268 static int singlematch (int c
, const char *p
, const char *ep
) {
270 case '.': return 1; /* matches any char */
271 case L_ESC
: return match_class(c
, uchar(*(p
+1)));
272 case '[': return matchbracketclass(c
, p
, ep
-1);
273 default: return (uchar(*p
) == c
);
278 static const char *match (MatchState
*ms
, const char *s
, const char *p
);
281 static const char *matchbalance (MatchState
*ms
, const char *s
,
283 if (*p
== 0 || *(p
+1) == 0)
284 luaL_error(ms
->L
, "unbalanced pattern");
285 if (*s
!= *p
) return NULL
;
290 while (++s
< ms
->src_end
) {
292 if (--cont
== 0) return s
+1;
294 else if (*s
== b
) cont
++;
297 return NULL
; /* string ends out of balance */
301 static const char *max_expand (MatchState
*ms
, const char *s
,
302 const char *p
, const char *ep
) {
303 ptrdiff_t i
= 0; /* counts maximum expand for item */
304 while ((s
+i
)<ms
->src_end
&& singlematch(uchar(*(s
+i
)), p
, ep
))
306 /* keeps trying to match with the maximum repetitions */
308 const char *res
= match(ms
, (s
+i
), ep
+1);
310 i
--; /* else didn't match; reduce 1 repetition to try again */
316 static const char *min_expand (MatchState
*ms
, const char *s
,
317 const char *p
, const char *ep
) {
319 const char *res
= match(ms
, s
, ep
+1);
322 else if (s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
))
323 s
++; /* try with one more repetition */
329 static const char *start_capture (MatchState
*ms
, const char *s
,
330 const char *p
, int what
) {
332 int level
= ms
->level
;
333 if (level
>= LUA_MAXCAPTURES
) luaL_error(ms
->L
, "too many captures");
334 ms
->capture
[level
].init
= s
;
335 ms
->capture
[level
].len
= what
;
337 if ((res
=match(ms
, s
, p
)) == NULL
) /* match failed? */
338 ms
->level
--; /* undo capture */
343 static const char *end_capture (MatchState
*ms
, const char *s
,
345 int l
= capture_to_close(ms
);
347 ms
->capture
[l
].len
= s
- ms
->capture
[l
].init
; /* close capture */
348 if ((res
= match(ms
, s
, p
)) == NULL
) /* match failed? */
349 ms
->capture
[l
].len
= CAP_UNFINISHED
; /* undo capture */
354 static const char *match_capture (MatchState
*ms
, const char *s
, int l
) {
356 l
= check_capture(ms
, l
);
357 len
= ms
->capture
[l
].len
;
358 if ((size_t)(ms
->src_end
-s
) >= len
&&
359 memcmp(ms
->capture
[l
].init
, s
, len
) == 0)
365 static const char *match (MatchState
*ms
, const char *s
, const char *p
) {
366 init
: /* using goto's to optimize tail recursion */
368 case '(': { /* start capture */
369 if (*(p
+1) == ')') /* position capture? */
370 return start_capture(ms
, s
, p
+2, CAP_POSITION
);
372 return start_capture(ms
, s
, p
+1, CAP_UNFINISHED
);
374 case ')': { /* end capture */
375 return end_capture(ms
, s
, p
+1);
379 case 'b': { /* balanced string? */
380 s
= matchbalance(ms
, s
, p
+2);
381 if (s
== NULL
) return NULL
;
382 p
+=4; goto init
; /* else return match(ms, s, p+4); */
384 case 'f': { /* frontier? */
385 const char *ep
; char previous
;
388 luaL_error(ms
->L
, "missing " LUA_QL("[") " after "
389 LUA_QL("%%f") " in pattern");
390 ep
= classend(ms
, p
); /* points to what is next */
391 previous
= (s
== ms
->src_init
) ? '\0' : *(s
-1);
392 if (matchbracketclass(uchar(previous
), p
, ep
-1) ||
393 !matchbracketclass(uchar(*s
), p
, ep
-1)) return NULL
;
394 p
=ep
; goto init
; /* else return match(ms, s, ep); */
397 if (isdigit(uchar(*(p
+1)))) { /* capture results (%0-%9)? */
398 s
= match_capture(ms
, s
, uchar(*(p
+1)));
399 if (s
== NULL
) return NULL
;
400 p
+=2; goto init
; /* else return match(ms, s, p+2) */
402 goto dflt
; /* case default */
406 case '\0': { /* end of pattern */
407 return s
; /* match succeeded */
410 if (*(p
+1) == '\0') /* is the `$' the last char in pattern? */
411 return (s
== ms
->src_end
) ? s
: NULL
; /* check end of string */
414 default: dflt
: { /* it is a pattern item */
415 const char *ep
= classend(ms
, p
); /* points to what is next */
416 int m
= s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
);
418 case '?': { /* optional */
420 if (m
&& ((res
=match(ms
, s
+1, ep
+1)) != NULL
))
422 p
=ep
+1; goto init
; /* else return match(ms, s, ep+1); */
424 case '*': { /* 0 or more repetitions */
425 return max_expand(ms
, s
, p
, ep
);
427 case '+': { /* 1 or more repetitions */
428 return (m
? max_expand(ms
, s
+1, p
, ep
) : NULL
);
430 case '-': { /* 0 or more repetitions (minimum) */
431 return min_expand(ms
, s
, p
, ep
);
435 s
++; p
=ep
; goto init
; /* else return match(ms, s+1, ep); */
444 static const char *lmemfind (const char *s1
, size_t l1
,
445 const char *s2
, size_t l2
) {
446 if (l2
== 0) return s1
; /* empty strings are everywhere */
447 else if (l2
> l1
) return NULL
; /* avoids a negative `l1' */
449 const char *init
; /* to search for a `*s2' inside `s1' */
450 l2
--; /* 1st char will be checked by `memchr' */
451 l1
= l1
-l2
; /* `s2' cannot be found after that */
452 while (l1
> 0 && (init
= (const char *)memchr(s1
, *s2
, l1
)) != NULL
) {
453 init
++; /* 1st char is already checked */
454 if (memcmp(init
, s2
+1, l2
) == 0)
456 else { /* correct `l1' and `s1' to try again */
461 return NULL
; /* not found */
466 static void push_onecapture (MatchState
*ms
, int i
, const char *s
,
468 if (i
>= ms
->level
) {
469 if (i
== 0) /* ms->level == 0, too */
470 lua_pushlstring(ms
->L
, s
, e
- s
); /* add whole match */
472 luaL_error(ms
->L
, "invalid capture index");
475 ptrdiff_t l
= ms
->capture
[i
].len
;
476 if (l
== CAP_UNFINISHED
) luaL_error(ms
->L
, "unfinished capture");
477 if (l
== CAP_POSITION
)
478 lua_pushinteger(ms
->L
, ms
->capture
[i
].init
- ms
->src_init
+ 1);
480 lua_pushlstring(ms
->L
, ms
->capture
[i
].init
, l
);
485 static int push_captures (MatchState
*ms
, const char *s
, const char *e
) {
487 int nlevels
= (ms
->level
== 0 && s
) ? 1 : ms
->level
;
488 luaL_checkstack(ms
->L
, nlevels
, "too many captures");
489 for (i
= 0; i
< nlevels
; i
++)
490 push_onecapture(ms
, i
, s
, e
);
491 return nlevels
; /* number of strings pushed */
495 static int str_find_aux (lua_State
*L
, int find
) {
497 const char *s
= luaL_checklstring(L
, 1, &l1
);
498 const char *p
= luaL_checklstring(L
, 2, &l2
);
499 ptrdiff_t init
= posrelat(luaL_optinteger(L
, 3, 1), l1
) - 1;
500 if (init
< 0) init
= 0;
501 else if ((size_t)(init
) > l1
) init
= (ptrdiff_t)l1
;
502 if (find
&& (lua_toboolean(L
, 4) || /* explicit request? */
503 strpbrk(p
, SPECIALS
) == NULL
)) { /* or no special characters? */
504 /* do a plain search */
505 const char *s2
= lmemfind(s
+init
, l1
-init
, p
, l2
);
507 lua_pushinteger(L
, s2
-s
+1);
508 lua_pushinteger(L
, s2
-s
+l2
);
514 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
515 const char *s1
=s
+init
;
522 if ((res
=match(&ms
, s1
, p
)) != NULL
) {
524 lua_pushinteger(L
, s1
-s
+1); /* start */
525 lua_pushinteger(L
, res
-s
); /* end */
526 return push_captures(&ms
, NULL
, 0) + 2;
529 return push_captures(&ms
, s1
, res
);
531 } while (s1
++ < ms
.src_end
&& !anchor
);
533 lua_pushnil(L
); /* not found */
538 static int str_find (lua_State
*L
) {
539 return str_find_aux(L
, 1);
543 static int str_match (lua_State
*L
) {
544 return str_find_aux(L
, 0);
548 static int gmatch_aux (lua_State
*L
) {
551 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
552 const char *p
= lua_tostring(L
, lua_upvalueindex(2));
557 for (src
= s
+ (size_t)lua_tointeger(L
, lua_upvalueindex(3));
562 if ((e
= match(&ms
, src
, p
)) != NULL
) {
563 lua_Integer newstart
= e
-s
;
564 if (e
== src
) newstart
++; /* empty match? go at least one position */
565 lua_pushinteger(L
, newstart
);
566 lua_replace(L
, lua_upvalueindex(3));
567 return push_captures(&ms
, src
, e
);
570 return 0; /* not found */
574 static int gmatch (lua_State
*L
) {
575 luaL_checkstring(L
, 1);
576 luaL_checkstring(L
, 2);
578 lua_pushinteger(L
, 0);
579 lua_pushcclosure(L
, gmatch_aux
, 3);
584 static int gfind_nodef (lua_State
*L
) {
585 return luaL_error(L
, LUA_QL("string.gfind") " was renamed to "
586 LUA_QL("string.gmatch"));
590 static void add_s (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
593 const char *news
= lua_tolstring(ms
->L
, 3, &l
);
594 for (i
= 0; i
< l
; i
++) {
595 if (news
[i
] != L_ESC
)
596 luaL_addchar(b
, news
[i
]);
599 if (!isdigit(uchar(news
[i
])))
600 luaL_addchar(b
, news
[i
]);
601 else if (news
[i
] == '0')
602 luaL_addlstring(b
, s
, e
- s
);
604 push_onecapture(ms
, news
[i
] - '1', s
, e
);
605 luaL_addvalue(b
); /* add capture to accumulated result */
612 static void add_value (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
614 lua_State
*L
= ms
->L
;
615 switch (lua_type(L
, 3)) {
621 case LUA_TFUNCTION
: {
624 n
= push_captures(ms
, s
, e
);
629 push_onecapture(ms
, 0, s
, e
);
634 if (!lua_toboolean(L
, -1)) { /* nil or false? */
636 lua_pushlstring(L
, s
, e
- s
); /* keep original text */
638 else if (!lua_isstring(L
, -1))
639 luaL_error(L
, "invalid replacement value (a %s)", luaL_typename(L
, -1));
640 luaL_addvalue(b
); /* add result to accumulator */
644 static int str_gsub (lua_State
*L
) {
646 const char *src
= luaL_checklstring(L
, 1, &srcl
);
647 const char *p
= luaL_checkstring(L
, 2);
648 int tr
= lua_type(L
, 3);
649 int max_s
= luaL_optint(L
, 4, srcl
+1);
650 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
654 luaL_argcheck(L
, tr
== LUA_TNUMBER
|| tr
== LUA_TSTRING
||
655 tr
== LUA_TFUNCTION
|| tr
== LUA_TTABLE
, 3,
656 "string/function/table expected");
657 luaL_buffinit(L
, &b
);
660 ms
.src_end
= src
+srcl
;
664 e
= match(&ms
, src
, p
);
667 add_value(&ms
, &b
, src
, e
);
669 if (e
&& e
>src
) /* non empty match? */
670 src
= e
; /* skip it */
671 else if (src
< ms
.src_end
)
672 luaL_addchar(&b
, *src
++);
676 luaL_addlstring(&b
, src
, ms
.src_end
-src
);
678 lua_pushinteger(L
, n
); /* number of substitutions */
682 /* }====================================================== */
685 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
687 /* valid flags in a format specification */
688 #define FLAGS "-+ #0"
690 ** maximum size of each format specification (such as '%-099.99d')
691 ** (+10 accounts for %99.99x plus margin of error)
693 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
696 static void addquoted (lua_State
*L
, luaL_Buffer
*b
, int arg
) {
698 const char *s
= luaL_checklstring(L
, arg
, &l
);
699 luaL_addchar(b
, '"');
702 case '"': case '\\': case '\n': {
703 luaL_addchar(b
, '\\');
708 luaL_addlstring(b
, "\\r", 2);
712 luaL_addlstring(b
, "\\000", 4);
722 luaL_addchar(b
, '"');
725 static const char *scanformat (lua_State
*L
, const char *strfrmt
, char *form
) {
726 const char *p
= strfrmt
;
727 while (*p
!= '\0' && strchr(FLAGS
, *p
) != NULL
) p
++; /* skip flags */
728 if ((size_t)(p
- strfrmt
) >= sizeof(FLAGS
))
729 luaL_error(L
, "invalid format (repeated flags)");
730 if (isdigit(uchar(*p
))) p
++; /* skip width */
731 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
734 if (isdigit(uchar(*p
))) p
++; /* skip precision */
735 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
737 if (isdigit(uchar(*p
)))
738 luaL_error(L
, "invalid format (width or precision too long)");
740 strncpy(form
, strfrmt
, p
- strfrmt
+ 1);
741 form
+= p
- strfrmt
+ 1;
747 static void addintlen (char *form
) {
748 size_t l
= strlen(form
);
749 char spec
= form
[l
- 1];
750 strcpy(form
+ l
- 1, LUA_INTFRMLEN
);
751 form
[l
+ sizeof(LUA_INTFRMLEN
) - 2] = spec
;
752 form
[l
+ sizeof(LUA_INTFRMLEN
) - 1] = '\0';
756 static int str_format (lua_State
*L
) {
759 const char *strfrmt
= luaL_checklstring(L
, arg
, &sfl
);
760 const char *strfrmt_end
= strfrmt
+sfl
;
762 luaL_buffinit(L
, &b
);
763 while (strfrmt
< strfrmt_end
) {
764 if (*strfrmt
!= L_ESC
)
765 luaL_addchar(&b
, *strfrmt
++);
766 else if (*++strfrmt
== L_ESC
)
767 luaL_addchar(&b
, *strfrmt
++); /* %% */
768 else { /* format item */
769 char form
[MAX_FORMAT
]; /* to store the format (`%...') */
770 char buff
[MAX_ITEM
]; /* to store the formatted item */
772 strfrmt
= scanformat(L
, strfrmt
, form
);
773 switch (*strfrmt
++) {
775 snprintf(buff
, MAX_ITEM
, form
, (int)luaL_checknumber(L
, arg
));
778 case 'd': case 'i': {
780 snprintf(buff
, MAX_ITEM
, form
, (LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
783 case 'o': case 'u': case 'x': case 'X': {
785 snprintf(buff
, MAX_ITEM
, form
, (unsigned LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
788 case 'e': case 'E': case 'f':
789 case 'g': case 'G': {
790 snprintf(buff
, MAX_ITEM
, form
, (double)luaL_checknumber(L
, arg
));
794 addquoted(L
, &b
, arg
);
795 continue; /* skip the 'addsize' at the end */
799 const char *s
= luaL_checklstring(L
, arg
, &l
);
800 if (!strchr(form
, '.') && l
>= 100) {
801 /* no precision and string is too long to be formatted;
802 keep original string */
803 lua_pushvalue(L
, arg
);
805 continue; /* skip the `addsize' at the end */
808 snprintf(buff
, MAX_ITEM
, form
, s
);
812 default: { /* also treat cases `pnLlh' */
813 return luaL_error(L
, "invalid option " LUA_QL("%%%c") " to "
814 LUA_QL("format"), *(strfrmt
- 1));
817 luaL_addlstring(&b
, buff
, strlen(buff
));
825 static const luaL_Reg strlib
[] = {
830 {"format", str_format
},
831 {"gfind", gfind_nodef
},
835 {"lower", str_lower
},
836 {"match", str_match
},
838 {"reverse", str_reverse
},
840 {"upper", str_upper
},
845 static void createmetatable (lua_State
*L
) {
846 lua_createtable(L
, 0, 1); /* create metatable for strings */
847 lua_pushliteral(L
, ""); /* dummy string */
848 lua_pushvalue(L
, -2);
849 lua_setmetatable(L
, -2); /* set string metatable */
850 lua_pop(L
, 1); /* pop dummy string */
851 lua_pushvalue(L
, -2); /* string library... */
852 lua_setfield(L
, -2, "__index"); /* ...is the __index metamethod */
853 lua_pop(L
, 1); /* pop metatable */
858 ** Open string library
860 LUALIB_API
int luaopen_string (lua_State
*L
) {
861 luaL_register(L
, LUA_STRLIBNAME
, strlib
);
862 #if defined(LUA_COMPAT_GFIND)
863 lua_getfield(L
, -1, "gmatch");
864 lua_setfield(L
, -2, "gfind");