1 /* $NetBSD: lstrlib.c,v 1.1.1.2 2012/03/15 00:08:12 alnsn Exp $ */
4 ** $Id: lstrlib.c,v 1.1.1.2 2012/03/15 00:08:12 alnsn Exp $
5 ** Standard library for string operations and pattern-matching
6 ** See Copyright Notice in lua.h
25 /* macro to `unsign' a character */
26 #define uchar(c) ((unsigned char)(c))
30 static int str_len (lua_State
*L
) {
32 luaL_checklstring(L
, 1, &l
);
33 lua_pushinteger(L
, l
);
38 static ptrdiff_t posrelat (ptrdiff_t pos
, size_t len
) {
39 /* relative string position: negative means back from end */
40 if (pos
< 0) pos
+= (ptrdiff_t)len
+ 1;
41 return (pos
>= 0) ? pos
: 0;
45 static int str_sub (lua_State
*L
) {
47 const char *s
= luaL_checklstring(L
, 1, &l
);
48 ptrdiff_t start
= posrelat(luaL_checkinteger(L
, 2), l
);
49 ptrdiff_t end
= posrelat(luaL_optinteger(L
, 3, -1), l
);
50 if (start
< 1) start
= 1;
51 if (end
> (ptrdiff_t)l
) end
= (ptrdiff_t)l
;
53 lua_pushlstring(L
, s
+start
-1, end
-start
+1);
54 else lua_pushliteral(L
, "");
59 static int str_reverse (lua_State
*L
) {
62 const char *s
= luaL_checklstring(L
, 1, &l
);
64 while (l
--) luaL_addchar(&b
, s
[l
]);
70 static int str_lower (lua_State
*L
) {
74 const char *s
= luaL_checklstring(L
, 1, &l
);
77 luaL_addchar(&b
, tolower(uchar(s
[i
])));
83 static int str_upper (lua_State
*L
) {
87 const char *s
= luaL_checklstring(L
, 1, &l
);
90 luaL_addchar(&b
, toupper(uchar(s
[i
])));
95 static int str_rep (lua_State
*L
) {
98 const char *s
= luaL_checklstring(L
, 1, &l
);
99 int n
= luaL_checkint(L
, 2);
100 luaL_buffinit(L
, &b
);
102 luaL_addlstring(&b
, s
, l
);
108 static int str_byte (lua_State
*L
) {
110 const char *s
= luaL_checklstring(L
, 1, &l
);
111 ptrdiff_t posi
= posrelat(luaL_optinteger(L
, 2, 1), l
);
112 ptrdiff_t pose
= posrelat(luaL_optinteger(L
, 3, posi
), l
);
114 if (posi
<= 0) posi
= 1;
115 if ((size_t)pose
> l
) pose
= l
;
116 if (posi
> pose
) return 0; /* empty interval; return no values */
117 n
= (int)(pose
- posi
+ 1);
118 if (posi
+ n
<= pose
) /* overflow? */
119 luaL_error(L
, "string slice too long");
120 luaL_checkstack(L
, n
, "string slice too long");
122 lua_pushinteger(L
, uchar(s
[posi
+i
-1]));
127 static int str_char (lua_State
*L
) {
128 int n
= lua_gettop(L
); /* number of arguments */
131 luaL_buffinit(L
, &b
);
132 for (i
=1; i
<=n
; i
++) {
133 int c
= luaL_checkint(L
, i
);
134 luaL_argcheck(L
, uchar(c
) == c
, i
, "invalid value");
135 luaL_addchar(&b
, uchar(c
));
142 static int writer (lua_State
*L
, const void* b
, size_t size
, void* B
) {
144 luaL_addlstring((luaL_Buffer
*) B
, (const char *)b
, size
);
149 static int str_dump (lua_State
*L
) {
151 luaL_checktype(L
, 1, LUA_TFUNCTION
);
154 if (lua_dump(L
, writer
, &b
) != 0)
155 luaL_error(L
, "unable to dump given function");
163 ** {======================================================
165 ** =======================================================
169 #define CAP_UNFINISHED (-1)
170 #define CAP_POSITION (-2)
172 typedef struct MatchState
{
173 const char *src_init
; /* init of source string */
174 const char *src_end
; /* end (`\0') of source string */
176 int level
; /* total number of captures (finished or unfinished) */
180 } capture
[LUA_MAXCAPTURES
];
185 #define SPECIALS "^$*+?.([%-"
188 static int check_capture (MatchState
*ms
, int l
) {
190 if (l
< 0 || l
>= ms
->level
|| ms
->capture
[l
].len
== CAP_UNFINISHED
)
191 return luaL_error(ms
->L
, "invalid capture index");
196 static int capture_to_close (MatchState
*ms
) {
197 int level
= ms
->level
;
198 for (level
--; level
>=0; level
--)
199 if (ms
->capture
[level
].len
== CAP_UNFINISHED
) return level
;
200 return luaL_error(ms
->L
, "invalid pattern capture");
204 static const char *classend (MatchState
*ms
, const char *p
) {
208 luaL_error(ms
->L
, "malformed pattern (ends with " LUA_QL("%%") ")");
213 do { /* look for a `]' */
215 luaL_error(ms
->L
, "malformed pattern (missing " LUA_QL("]") ")");
216 if (*(p
++) == L_ESC
&& *p
!= '\0')
217 p
++; /* skip escapes (e.g. `%]') */
228 static int match_class (int c
, int cl
) {
230 switch (tolower(cl
)) {
231 case 'a' : res
= isalpha(c
); break;
232 case 'c' : res
= iscntrl(c
); break;
233 case 'd' : res
= isdigit(c
); break;
234 case 'l' : res
= islower(c
); break;
235 case 'p' : res
= ispunct(c
); break;
236 case 's' : res
= isspace(c
); break;
237 case 'u' : res
= isupper(c
); break;
238 case 'w' : res
= isalnum(c
); break;
239 case 'x' : res
= isxdigit(c
); break;
240 case 'z' : res
= (c
== 0); break;
241 default: return (cl
== c
);
243 return (islower(cl
) ? res
: !res
);
247 static int matchbracketclass (int c
, const char *p
, const char *ec
) {
251 p
++; /* skip the `^' */
256 if (match_class(c
, uchar(*p
)))
259 else if ((*(p
+1) == '-') && (p
+2 < ec
)) {
261 if (uchar(*(p
-2)) <= c
&& c
<= uchar(*p
))
264 else if (uchar(*p
) == c
) return sig
;
270 static int singlematch (int c
, const char *p
, const char *ep
) {
272 case '.': return 1; /* matches any char */
273 case L_ESC
: return match_class(c
, uchar(*(p
+1)));
274 case '[': return matchbracketclass(c
, p
, ep
-1);
275 default: return (uchar(*p
) == c
);
280 static const char *match (MatchState
*ms
, const char *s
, const char *p
);
283 static const char *matchbalance (MatchState
*ms
, const char *s
,
285 if (*p
== 0 || *(p
+1) == 0)
286 luaL_error(ms
->L
, "unbalanced pattern");
287 if (*s
!= *p
) return NULL
;
292 while (++s
< ms
->src_end
) {
294 if (--cont
== 0) return s
+1;
296 else if (*s
== b
) cont
++;
299 return NULL
; /* string ends out of balance */
303 static const char *max_expand (MatchState
*ms
, const char *s
,
304 const char *p
, const char *ep
) {
305 ptrdiff_t i
= 0; /* counts maximum expand for item */
306 while ((s
+i
)<ms
->src_end
&& singlematch(uchar(*(s
+i
)), p
, ep
))
308 /* keeps trying to match with the maximum repetitions */
310 const char *res
= match(ms
, (s
+i
), ep
+1);
312 i
--; /* else didn't match; reduce 1 repetition to try again */
318 static const char *min_expand (MatchState
*ms
, const char *s
,
319 const char *p
, const char *ep
) {
321 const char *res
= match(ms
, s
, ep
+1);
324 else if (s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
))
325 s
++; /* try with one more repetition */
331 static const char *start_capture (MatchState
*ms
, const char *s
,
332 const char *p
, int what
) {
334 int level
= ms
->level
;
335 if (level
>= LUA_MAXCAPTURES
) luaL_error(ms
->L
, "too many captures");
336 ms
->capture
[level
].init
= s
;
337 ms
->capture
[level
].len
= what
;
339 if ((res
=match(ms
, s
, p
)) == NULL
) /* match failed? */
340 ms
->level
--; /* undo capture */
345 static const char *end_capture (MatchState
*ms
, const char *s
,
347 int l
= capture_to_close(ms
);
349 ms
->capture
[l
].len
= s
- ms
->capture
[l
].init
; /* close capture */
350 if ((res
= match(ms
, s
, p
)) == NULL
) /* match failed? */
351 ms
->capture
[l
].len
= CAP_UNFINISHED
; /* undo capture */
356 static const char *match_capture (MatchState
*ms
, const char *s
, int l
) {
358 l
= check_capture(ms
, l
);
359 len
= ms
->capture
[l
].len
;
360 if ((size_t)(ms
->src_end
-s
) >= len
&&
361 memcmp(ms
->capture
[l
].init
, s
, len
) == 0)
367 static const char *match (MatchState
*ms
, const char *s
, const char *p
) {
368 init
: /* using goto's to optimize tail recursion */
370 case '(': { /* start capture */
371 if (*(p
+1) == ')') /* position capture? */
372 return start_capture(ms
, s
, p
+2, CAP_POSITION
);
374 return start_capture(ms
, s
, p
+1, CAP_UNFINISHED
);
376 case ')': { /* end capture */
377 return end_capture(ms
, s
, p
+1);
381 case 'b': { /* balanced string? */
382 s
= matchbalance(ms
, s
, p
+2);
383 if (s
== NULL
) return NULL
;
384 p
+=4; goto init
; /* else return match(ms, s, p+4); */
386 case 'f': { /* frontier? */
387 const char *ep
; char previous
;
390 luaL_error(ms
->L
, "missing " LUA_QL("[") " after "
391 LUA_QL("%%f") " in pattern");
392 ep
= classend(ms
, p
); /* points to what is next */
393 previous
= (s
== ms
->src_init
) ? '\0' : *(s
-1);
394 if (matchbracketclass(uchar(previous
), p
, ep
-1) ||
395 !matchbracketclass(uchar(*s
), p
, ep
-1)) return NULL
;
396 p
=ep
; goto init
; /* else return match(ms, s, ep); */
399 if (isdigit(uchar(*(p
+1)))) { /* capture results (%0-%9)? */
400 s
= match_capture(ms
, s
, uchar(*(p
+1)));
401 if (s
== NULL
) return NULL
;
402 p
+=2; goto init
; /* else return match(ms, s, p+2) */
404 goto dflt
; /* case default */
408 case '\0': { /* end of pattern */
409 return s
; /* match succeeded */
412 if (*(p
+1) == '\0') /* is the `$' the last char in pattern? */
413 return (s
== ms
->src_end
) ? s
: NULL
; /* check end of string */
416 default: dflt
: { /* it is a pattern item */
417 const char *ep
= classend(ms
, p
); /* points to what is next */
418 int m
= s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
);
420 case '?': { /* optional */
422 if (m
&& ((res
=match(ms
, s
+1, ep
+1)) != NULL
))
424 p
=ep
+1; goto init
; /* else return match(ms, s, ep+1); */
426 case '*': { /* 0 or more repetitions */
427 return max_expand(ms
, s
, p
, ep
);
429 case '+': { /* 1 or more repetitions */
430 return (m
? max_expand(ms
, s
+1, p
, ep
) : NULL
);
432 case '-': { /* 0 or more repetitions (minimum) */
433 return min_expand(ms
, s
, p
, ep
);
437 s
++; p
=ep
; goto init
; /* else return match(ms, s+1, ep); */
446 static const char *lmemfind (const char *s1
, size_t l1
,
447 const char *s2
, size_t l2
) {
448 if (l2
== 0) return s1
; /* empty strings are everywhere */
449 else if (l2
> l1
) return NULL
; /* avoids a negative `l1' */
451 const char *init
; /* to search for a `*s2' inside `s1' */
452 l2
--; /* 1st char will be checked by `memchr' */
453 l1
= l1
-l2
; /* `s2' cannot be found after that */
454 while (l1
> 0 && (init
= (const char *)memchr(s1
, *s2
, l1
)) != NULL
) {
455 init
++; /* 1st char is already checked */
456 if (memcmp(init
, s2
+1, l2
) == 0)
458 else { /* correct `l1' and `s1' to try again */
463 return NULL
; /* not found */
468 static void push_onecapture (MatchState
*ms
, int i
, const char *s
,
470 if (i
>= ms
->level
) {
471 if (i
== 0) /* ms->level == 0, too */
472 lua_pushlstring(ms
->L
, s
, e
- s
); /* add whole match */
474 luaL_error(ms
->L
, "invalid capture index");
477 ptrdiff_t l
= ms
->capture
[i
].len
;
478 if (l
== CAP_UNFINISHED
) luaL_error(ms
->L
, "unfinished capture");
479 if (l
== CAP_POSITION
)
480 lua_pushinteger(ms
->L
, ms
->capture
[i
].init
- ms
->src_init
+ 1);
482 lua_pushlstring(ms
->L
, ms
->capture
[i
].init
, l
);
487 static int push_captures (MatchState
*ms
, const char *s
, const char *e
) {
489 int nlevels
= (ms
->level
== 0 && s
) ? 1 : ms
->level
;
490 luaL_checkstack(ms
->L
, nlevels
, "too many captures");
491 for (i
= 0; i
< nlevels
; i
++)
492 push_onecapture(ms
, i
, s
, e
);
493 return nlevels
; /* number of strings pushed */
497 static int str_find_aux (lua_State
*L
, int find
) {
499 const char *s
= luaL_checklstring(L
, 1, &l1
);
500 const char *p
= luaL_checklstring(L
, 2, &l2
);
501 ptrdiff_t init
= posrelat(luaL_optinteger(L
, 3, 1), l1
) - 1;
502 if (init
< 0) init
= 0;
503 else if ((size_t)(init
) > l1
) init
= (ptrdiff_t)l1
;
504 if (find
&& (lua_toboolean(L
, 4) || /* explicit request? */
505 strpbrk(p
, SPECIALS
) == NULL
)) { /* or no special characters? */
506 /* do a plain search */
507 const char *s2
= lmemfind(s
+init
, l1
-init
, p
, l2
);
509 lua_pushinteger(L
, s2
-s
+1);
510 lua_pushinteger(L
, s2
-s
+l2
);
516 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
517 const char *s1
=s
+init
;
524 if ((res
=match(&ms
, s1
, p
)) != NULL
) {
526 lua_pushinteger(L
, s1
-s
+1); /* start */
527 lua_pushinteger(L
, res
-s
); /* end */
528 return push_captures(&ms
, NULL
, 0) + 2;
531 return push_captures(&ms
, s1
, res
);
533 } while (s1
++ < ms
.src_end
&& !anchor
);
535 lua_pushnil(L
); /* not found */
540 static int str_find (lua_State
*L
) {
541 return str_find_aux(L
, 1);
545 static int str_match (lua_State
*L
) {
546 return str_find_aux(L
, 0);
550 static int gmatch_aux (lua_State
*L
) {
553 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
554 const char *p
= lua_tostring(L
, lua_upvalueindex(2));
559 for (src
= s
+ (size_t)lua_tointeger(L
, lua_upvalueindex(3));
564 if ((e
= match(&ms
, src
, p
)) != NULL
) {
565 lua_Integer newstart
= e
-s
;
566 if (e
== src
) newstart
++; /* empty match? go at least one position */
567 lua_pushinteger(L
, newstart
);
568 lua_replace(L
, lua_upvalueindex(3));
569 return push_captures(&ms
, src
, e
);
572 return 0; /* not found */
576 static int gmatch (lua_State
*L
) {
577 luaL_checkstring(L
, 1);
578 luaL_checkstring(L
, 2);
580 lua_pushinteger(L
, 0);
581 lua_pushcclosure(L
, gmatch_aux
, 3);
586 static int gfind_nodef (lua_State
*L
) {
587 return luaL_error(L
, LUA_QL("string.gfind") " was renamed to "
588 LUA_QL("string.gmatch"));
592 static void add_s (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
595 const char *news
= lua_tolstring(ms
->L
, 3, &l
);
596 for (i
= 0; i
< l
; i
++) {
597 if (news
[i
] != L_ESC
)
598 luaL_addchar(b
, news
[i
]);
601 if (!isdigit(uchar(news
[i
])))
602 luaL_addchar(b
, news
[i
]);
603 else if (news
[i
] == '0')
604 luaL_addlstring(b
, s
, e
- s
);
606 push_onecapture(ms
, news
[i
] - '1', s
, e
);
607 luaL_addvalue(b
); /* add capture to accumulated result */
614 static void add_value (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
616 lua_State
*L
= ms
->L
;
617 switch (lua_type(L
, 3)) {
623 case LUA_TFUNCTION
: {
626 n
= push_captures(ms
, s
, e
);
631 push_onecapture(ms
, 0, s
, e
);
636 if (!lua_toboolean(L
, -1)) { /* nil or false? */
638 lua_pushlstring(L
, s
, e
- s
); /* keep original text */
640 else if (!lua_isstring(L
, -1))
641 luaL_error(L
, "invalid replacement value (a %s)", luaL_typename(L
, -1));
642 luaL_addvalue(b
); /* add result to accumulator */
646 static int str_gsub (lua_State
*L
) {
648 const char *src
= luaL_checklstring(L
, 1, &srcl
);
649 const char *p
= luaL_checkstring(L
, 2);
650 int tr
= lua_type(L
, 3);
651 int max_s
= luaL_optint(L
, 4, srcl
+1);
652 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
656 luaL_argcheck(L
, tr
== LUA_TNUMBER
|| tr
== LUA_TSTRING
||
657 tr
== LUA_TFUNCTION
|| tr
== LUA_TTABLE
, 3,
658 "string/function/table expected");
659 luaL_buffinit(L
, &b
);
662 ms
.src_end
= src
+srcl
;
666 e
= match(&ms
, src
, p
);
669 add_value(&ms
, &b
, src
, e
);
671 if (e
&& e
>src
) /* non empty match? */
672 src
= e
; /* skip it */
673 else if (src
< ms
.src_end
)
674 luaL_addchar(&b
, *src
++);
678 luaL_addlstring(&b
, src
, ms
.src_end
-src
);
680 lua_pushinteger(L
, n
); /* number of substitutions */
684 /* }====================================================== */
687 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
689 /* valid flags in a format specification */
690 #define FLAGS "-+ #0"
692 ** maximum size of each format specification (such as '%-099.99d')
693 ** (+10 accounts for %99.99x plus margin of error)
695 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
698 static void addquoted (lua_State
*L
, luaL_Buffer
*b
, int arg
) {
700 const char *s
= luaL_checklstring(L
, arg
, &l
);
701 luaL_addchar(b
, '"');
704 case '"': case '\\': case '\n': {
705 luaL_addchar(b
, '\\');
710 luaL_addlstring(b
, "\\r", 2);
714 luaL_addlstring(b
, "\\000", 4);
724 luaL_addchar(b
, '"');
727 static const char *scanformat (lua_State
*L
, const char *strfrmt
, char *form
) {
728 const char *p
= strfrmt
;
729 while (*p
!= '\0' && strchr(FLAGS
, *p
) != NULL
) p
++; /* skip flags */
730 if ((size_t)(p
- strfrmt
) >= sizeof(FLAGS
))
731 luaL_error(L
, "invalid format (repeated flags)");
732 if (isdigit(uchar(*p
))) p
++; /* skip width */
733 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
736 if (isdigit(uchar(*p
))) p
++; /* skip precision */
737 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
739 if (isdigit(uchar(*p
)))
740 luaL_error(L
, "invalid format (width or precision too long)");
742 strncpy(form
, strfrmt
, p
- strfrmt
+ 1);
743 form
+= p
- strfrmt
+ 1;
749 static void addintlen (char *form
) {
750 size_t l
= strlen(form
);
751 char spec
= form
[l
- 1];
752 strcpy(form
+ l
- 1, LUA_INTFRMLEN
);
753 form
[l
+ sizeof(LUA_INTFRMLEN
) - 2] = spec
;
754 form
[l
+ sizeof(LUA_INTFRMLEN
) - 1] = '\0';
758 static int str_format (lua_State
*L
) {
759 int top
= lua_gettop(L
);
762 const char *strfrmt
= luaL_checklstring(L
, arg
, &sfl
);
763 const char *strfrmt_end
= strfrmt
+sfl
;
765 luaL_buffinit(L
, &b
);
766 while (strfrmt
< strfrmt_end
) {
767 if (*strfrmt
!= L_ESC
)
768 luaL_addchar(&b
, *strfrmt
++);
769 else if (*++strfrmt
== L_ESC
)
770 luaL_addchar(&b
, *strfrmt
++); /* %% */
771 else { /* format item */
772 char form
[MAX_FORMAT
]; /* to store the format (`%...') */
773 char buff
[MAX_ITEM
]; /* to store the formatted item */
775 luaL_argerror(L
, arg
, "no value");
776 strfrmt
= scanformat(L
, strfrmt
, form
);
777 switch (*strfrmt
++) {
779 sprintf(buff
, form
, (int)luaL_checknumber(L
, arg
));
782 case 'd': case 'i': {
784 sprintf(buff
, form
, (LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
787 case 'o': case 'u': case 'x': case 'X': {
789 sprintf(buff
, form
, (unsigned LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
792 case 'e': case 'E': case 'f':
793 case 'g': case 'G': {
794 sprintf(buff
, form
, (double)luaL_checknumber(L
, arg
));
798 addquoted(L
, &b
, arg
);
799 continue; /* skip the 'addsize' at the end */
803 const char *s
= luaL_checklstring(L
, arg
, &l
);
804 if (!strchr(form
, '.') && l
>= 100) {
805 /* no precision and string is too long to be formatted;
806 keep original string */
807 lua_pushvalue(L
, arg
);
809 continue; /* skip the `addsize' at the end */
812 sprintf(buff
, form
, s
);
816 default: { /* also treat cases `pnLlh' */
817 return luaL_error(L
, "invalid option " LUA_QL("%%%c") " to "
818 LUA_QL("format"), *(strfrmt
- 1));
821 luaL_addlstring(&b
, buff
, strlen(buff
));
829 static const luaL_Reg strlib
[] = {
834 {"format", str_format
},
835 {"gfind", gfind_nodef
},
839 {"lower", str_lower
},
840 {"match", str_match
},
842 {"reverse", str_reverse
},
844 {"upper", str_upper
},
849 static void createmetatable (lua_State
*L
) {
850 lua_createtable(L
, 0, 1); /* create metatable for strings */
851 lua_pushliteral(L
, ""); /* dummy string */
852 lua_pushvalue(L
, -2);
853 lua_setmetatable(L
, -2); /* set string metatable */
854 lua_pop(L
, 1); /* pop dummy string */
855 lua_pushvalue(L
, -2); /* string library... */
856 lua_setfield(L
, -2, "__index"); /* ...is the __index metamethod */
857 lua_pop(L
, 1); /* pop metatable */
862 ** Open string library
864 LUALIB_API
int luaopen_string (lua_State
*L
) {
865 luaL_register(L
, LUA_STRLIBNAME
, strlib
);
866 #if defined(LUA_COMPAT_GFIND)
867 lua_getfield(L
, -1, "gmatch");
868 lua_setfield(L
, -2, "gfind");