2 XXX support range parameter on search
3 XXX support mstop parameter on search
6 /***********************************************************
7 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
12 Permission to use, copy, modify, and distribute this software and its
13 documentation for any purpose and without fee is hereby granted,
14 provided that the above copyright notice appear in all copies and that
15 both that copyright notice and this permission notice appear in
16 supporting documentation, and that the names of Stichting Mathematisch
17 Centrum or CWI not be used in advertising or publicity pertaining to
18 distribution of the software without specific, written prior permission.
20 STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23 FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28 ******************************************************************/
30 /* Regular expression objects */
31 /* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
34 #include "allobjects.h"
35 #include "modsupport.h"
40 static object
*RegexError
; /* Exception */
44 struct re_pattern_buffer re_patbuf
; /* The compiled expression */
45 struct re_registers re_regs
; /* The registers from the last match */
46 char re_fastmap
[256]; /* Storage for fastmap */
47 object
*re_translate
; /* String object for translate table */
48 object
*re_lastok
; /* String object last matched/searched */
49 object
*re_groupindex
; /* Group name to index dictionary */
50 object
*re_givenpat
; /* Pattern with symbolic groups */
51 object
*re_realpat
; /* Pattern without symbolic groups */
54 /* Regex object methods */
60 XDECREF(re
->re_translate
);
61 XDECREF(re
->re_lastok
);
62 XDECREF(re
->re_groupindex
);
63 XDECREF(re
->re_givenpat
);
64 XDECREF(re
->re_realpat
);
70 struct re_registers
*regs
;
72 object
*v
= newtupleobject(RE_NREGS
);
75 for (i
= 0; i
< RE_NREGS
; i
++) {
77 w
= mkvalue("(ii)", regs
->start
[i
], regs
->end
[i
]);
83 settupleitem(v
, i
, w
);
99 if (getargs(args
, "S", &argstring
)) {
104 if (!getargs(args
, "(Si)", &argstring
, &offset
))
107 buffer
= getstringvalue(argstring
);
108 size
= getstringsize(argstring
);
109 if (offset
< 0 || offset
> size
) {
110 err_setstr(RegexError
, "match offset out of range");
113 XDECREF(re
->re_lastok
);
114 re
->re_lastok
= NULL
;
115 result
= re_match(&re
->re_patbuf
, buffer
, size
, offset
, &re
->re_regs
);
117 /* Failure like stack overflow */
118 err_setstr(RegexError
, "match failure");
123 re
->re_lastok
= argstring
;
125 return newintobject((long)result
); /* Length of the match or -1 */
140 if (getargs(args
, "S", &argstring
)) {
145 if (!getargs(args
, "(Si)", &argstring
, &offset
))
148 buffer
= getstringvalue(argstring
);
149 size
= getstringsize(argstring
);
150 if (offset
< 0 || offset
> size
) {
151 err_setstr(RegexError
, "search offset out of range");
154 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
155 the implementation don't match: the documentation states that
156 |range| positions are tried, while the code tries |range|+1
157 positions. It seems more productive to believe the code! */
158 range
= size
- offset
;
159 XDECREF(re
->re_lastok
);
160 re
->re_lastok
= NULL
;
161 result
= re_search(&re
->re_patbuf
, buffer
, size
, offset
, range
,
164 /* Failure like stack overflow */
165 err_setstr(RegexError
, "match failure");
170 re
->re_lastok
= argstring
;
172 return newintobject((long)result
); /* Position of the match or -1 */
181 if (args
!= NULL
&& is_tupleobject(args
)) {
182 int n
= gettuplesize(args
);
183 object
*res
= newtupleobject(n
);
186 for (i
= 0; i
< n
; i
++) {
187 object
*v
= reg_group(re
, gettupleitem(args
, i
));
192 settupleitem(res
, i
, v
);
196 if (!getargs(args
, "i", &i
)) {
199 if (!getargs(args
, "S", &n
))
203 if (re
->re_groupindex
== NULL
)
206 index
= mappinglookup(re
->re_groupindex
, n
);
208 err_setstr(RegexError
, "group() group name doesn't exist");
211 i
= getintvalue(index
);
214 if (i
< 0 || i
>= RE_NREGS
) {
215 err_setstr(RegexError
, "group() index out of range");
218 if (re
->re_lastok
== NULL
) {
219 err_setstr(RegexError
,
220 "group() only valid after successful match/search");
223 a
= re
->re_regs
.start
[i
];
224 b
= re
->re_regs
.end
[i
];
225 if (a
< 0 || b
< 0) {
229 return newsizedstringobject(getstringvalue(re
->re_lastok
)+a
, b
-a
);
232 static struct methodlist reg_methods
[] = {
233 {"match", (method
)reg_match
},
234 {"search", (method
)reg_search
},
235 {"group", (method
)reg_group
},
236 {NULL
, NULL
} /* sentinel */
240 reg_getattr(re
, name
)
244 if (strcmp(name
, "regs") == 0) {
245 if (re
->re_lastok
== NULL
) {
249 return makeresult(&re
->re_regs
);
251 if (strcmp(name
, "last") == 0) {
252 if (re
->re_lastok
== NULL
) {
256 INCREF(re
->re_lastok
);
257 return re
->re_lastok
;
259 if (strcmp(name
, "translate") == 0) {
260 if (re
->re_translate
== NULL
) {
264 INCREF(re
->re_translate
);
265 return re
->re_translate
;
267 if (strcmp(name
, "groupindex") == 0) {
268 if (re
->re_groupindex
== NULL
) {
272 INCREF(re
->re_groupindex
);
273 return re
->re_groupindex
;
275 if (strcmp(name
, "realpat") == 0) {
276 if (re
->re_realpat
== NULL
) {
280 INCREF(re
->re_realpat
);
281 return re
->re_realpat
;
283 if (strcmp(name
, "givenpat") == 0) {
284 if (re
->re_givenpat
== NULL
) {
288 INCREF(re
->re_givenpat
);
289 return re
->re_givenpat
;
291 if (strcmp(name
, "__members__") == 0) {
292 object
*list
= newlistobject(6);
294 setlistitem(list
, 0, newstringobject("last"));
295 setlistitem(list
, 1, newstringobject("regs"));
296 setlistitem(list
, 2, newstringobject("translate"));
297 setlistitem(list
, 3, newstringobject("groupindex"));
298 setlistitem(list
, 4, newstringobject("realpat"));
299 setlistitem(list
, 5, newstringobject("givenpat"));
300 if (err_occurred()) {
307 return findmethod(reg_methods
, (object
*)re
, name
);
310 static typeobject Regextype
= {
311 OB_HEAD_INIT(&Typetype
)
314 sizeof(regexobject
), /*tp_size*/
317 (destructor
)reg_dealloc
, /*tp_dealloc*/
319 (getattrfunc
)reg_getattr
, /*tp_getattr*/
326 newregexobject(pattern
, translate
, givenpat
, groupindex
)
333 char *pat
= getstringvalue(pattern
);
334 int size
= getstringsize(pattern
);
336 if (translate
!= NULL
&& getstringsize(translate
) != 256) {
337 err_setstr(RegexError
,
338 "translation table must be 256 bytes");
341 re
= NEWOBJ(regexobject
, &Regextype
);
344 re
->re_patbuf
.buffer
= NULL
;
345 re
->re_patbuf
.allocated
= 0;
346 re
->re_patbuf
.fastmap
= re
->re_fastmap
;
348 re
->re_patbuf
.translate
= getstringvalue(translate
);
350 re
->re_patbuf
.translate
= NULL
;
352 re
->re_translate
= translate
;
353 re
->re_lastok
= NULL
;
354 re
->re_groupindex
= groupindex
;
356 re
->re_realpat
= pattern
;
358 re
->re_givenpat
= givenpat
;
359 error
= re_compile_pattern(pat
, size
, &re
->re_patbuf
);
361 err_setstr(RegexError
, error
);
370 regex_compile(self
, args
)
376 if (!getargs(args
, "S", &pat
)) {
378 if (!getargs(args
, "(SS)", &pat
, &tran
))
381 return newregexobject(pat
, tran
, pat
, NULL
);
385 symcomp(pattern
, gdict
)
389 char *opat
= getstringvalue(pattern
);
390 char *oend
= opat
+ getstringsize(pattern
);
398 int require_escape
= re_syntax
& RE_NO_BK_PARENS
? 0 : 1;
400 npattern
= newsizedstringobject((char*)NULL
, getstringsize(pattern
));
401 if (npattern
== NULL
)
403 n
= getstringvalue(npattern
);
406 if (*o
== '(' && escaped
== require_escape
) {
411 if (++o
>= oend
|| *o
!= '<')
414 if (o
+1 < oend
&& *(o
+1) == '>')
418 for (++o
; o
< oend
;) {
420 object
*group_name
= NULL
;
421 object
*group_index
= NULL
;
423 group_name
= newstringobject(name_buf
);
424 group_index
= newintobject(group_count
);
425 if (group_name
== NULL
|| group_index
== NULL
426 || mappinginsert(gdict
, group_name
, group_index
) != 0) {
428 XDECREF(group_index
);
432 ++o
; /* eat the '>' */
435 if (!isalnum(*o
) && *o
!= '_') {
442 if (*o
== '[' && !escaped
) {
444 ++o
; /* eat the char following '[' */
446 while (o
< oend
&& *o
!= ']') {
453 else if (*o
== '\\') {
465 if (resizestring(&npattern
, n
- getstringvalue(npattern
)) == 0)
475 regex_symcomp(self
, args
)
481 object
*gdict
= NULL
;
483 if (!getargs(args
, "S", &pattern
)) {
485 if (!getargs(args
, "(SS)", &pattern
, &tran
))
488 gdict
= newmappingobject();
490 || (npattern
= symcomp(pattern
, gdict
)) == NULL
) {
495 return newregexobject(npattern
, tran
, pattern
, gdict
);
499 static object
*cache_pat
;
500 static object
*cache_prog
;
506 if (pat
!= cache_pat
) {
510 cache_prog
= regex_compile((object
*)NULL
, pat
);
511 if (cache_prog
== NULL
)
520 regex_match(self
, args
)
524 object
*pat
, *string
;
525 if (!getargs(args
, "(SS)", &pat
, &string
))
527 if (update_cache(pat
) < 0)
529 return reg_match((regexobject
*)cache_prog
, string
);
533 regex_search(self
, args
)
537 object
*pat
, *string
;
538 if (!getargs(args
, "(SS)", &pat
, &string
))
540 if (update_cache(pat
) < 0)
542 return reg_search((regexobject
*)cache_prog
, string
);
546 regex_set_syntax(self
, args
)
550 if (!getintarg(args
, &syntax
))
552 syntax
= re_set_syntax(syntax
);
553 return newintobject((long)syntax
);
556 static struct methodlist regex_global_methods
[] = {
557 {"compile", regex_compile
},
558 {"symcomp", regex_symcomp
},
559 {"match", regex_match
},
560 {"search", regex_search
},
561 {"set_syntax", regex_set_syntax
},
562 {NULL
, NULL
} /* sentinel */
569 m
= initmodule("regex", regex_global_methods
);
570 d
= getmoduledict(m
);
572 /* Initialize regex.error exception */
573 RegexError
= newstringobject("regex.error");
574 if (RegexError
== NULL
|| dictinsert(d
, "error", RegexError
) != 0)
575 fatal("can't define regex.error");
577 /* Initialize regex.casefold constant */
578 v
= newsizedstringobject((char *)NULL
, 256);
581 char *s
= getstringvalue(v
);
582 for (i
= 0; i
< 256; i
++) {
588 dictinsert(d
, "casefold", v
);