2 XXX support range parameter on search
3 XXX support mstop parameter on search
6 /***********************************************************
7 Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
8 Amsterdam, The Netherlands.
12 Permission to use, copy, modify, and distribute this software and its
13 documentation for any purpose and without fee is hereby granted,
14 provided that the above copyright notice appear in all copies and that
15 both that copyright notice and this permission notice appear in
16 supporting documentation, and that the names of Stichting Mathematisch
17 Centrum or CWI not be used in advertising or publicity pertaining to
18 distribution of the software without specific, written prior permission.
20 STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23 FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28 ******************************************************************/
30 /* Regular expression objects */
31 /* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
34 #include "allobjects.h"
35 #include "modsupport.h"
39 static object
*RegexError
; /* Exception */
43 struct re_pattern_buffer re_patbuf
; /* The compiled expression */
44 struct re_registers re_regs
; /* The registers from the last match */
45 char re_fastmap
[256]; /* Storage for fastmap */
46 object
*re_translate
; /* String object for translate table */
47 object
*re_lastok
; /* String object last matched/searched */
50 /* Regex object methods */
56 XDECREF(re
->re_translate
);
57 XDECREF(re
->re_lastok
);
58 XDEL(re
->re_patbuf
.buffer
);
59 XDEL(re
->re_patbuf
.translate
);
65 struct re_registers
*regs
;
67 object
*v
= newtupleobject(RE_NREGS
);
70 for (i
= 0; i
< RE_NREGS
; i
++) {
72 w
= mkvalue("(ii)", regs
->start
[i
], regs
->end
[i
]);
78 settupleitem(v
, i
, w
);
94 if (getargs(args
, "S", &argstring
)) {
99 if (!getargs(args
, "(Si)", &argstring
, &offset
))
102 buffer
= getstringvalue(argstring
);
103 size
= getstringsize(argstring
);
104 if (offset
< 0 || offset
> size
) {
105 err_setstr(RegexError
, "match offset out of range");
108 XDECREF(re
->re_lastok
);
109 re
->re_lastok
= NULL
;
110 result
= re_match(&re
->re_patbuf
, buffer
, size
, offset
, &re
->re_regs
);
112 /* Failure like stack overflow */
113 err_setstr(RegexError
, "match failure");
118 re
->re_lastok
= argstring
;
120 return newintobject((long)result
); /* Length of the match or -1 */
135 if (getargs(args
, "S", &argstring
)) {
140 if (!getargs(args
, "(Si)", &argstring
, &offset
))
143 buffer
= getstringvalue(argstring
);
144 size
= getstringsize(argstring
);
145 if (offset
< 0 || offset
> size
) {
146 err_setstr(RegexError
, "search offset out of range");
149 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
150 the implementation don't match: the documentation states that
151 |range| positions are tried, while the code tries |range|+1
152 positions. It seems more productive to believe the code! */
153 range
= size
- offset
;
154 XDECREF(re
->re_lastok
);
155 re
->re_lastok
= NULL
;
156 result
= re_search(&re
->re_patbuf
, buffer
, size
, offset
, range
,
159 /* Failure like stack overflow */
160 err_setstr(RegexError
, "match failure");
165 re
->re_lastok
= argstring
;
167 return newintobject((long)result
); /* Position of the match or -1 */
176 if (args
!= NULL
&& is_tupleobject(args
)) {
177 int n
= gettuplesize(args
);
178 object
*res
= newtupleobject(n
);
181 for (i
= 0; i
< n
; i
++) {
182 object
*v
= reg_group(re
, gettupleitem(args
, i
));
187 settupleitem(res
, i
, v
);
191 if (!getargs(args
, "i", &i
))
193 if (i
< 0 || i
>= RE_NREGS
) {
194 err_setstr(RegexError
, "group() index out of range");
197 if (re
->re_lastok
== NULL
) {
198 err_setstr(RegexError
,
199 "group() only valid after successful match/search");
202 a
= re
->re_regs
.start
[i
];
203 b
= re
->re_regs
.end
[i
];
204 if (a
< 0 || b
< 0) {
208 return newsizedstringobject(getstringvalue(re
->re_lastok
)+a
, b
-a
);
211 static struct methodlist reg_methods
[] = {
212 {"match", reg_match
},
213 {"search", reg_search
},
214 {"group", reg_group
},
215 {NULL
, NULL
} /* sentinel */
219 reg_getattr(re
, name
)
223 if (strcmp(name
, "regs") == 0) {
224 if (re
->re_lastok
== NULL
) {
228 return makeresult(&re
->re_regs
);
230 if (strcmp(name
, "last") == 0) {
231 if (re
->re_lastok
== NULL
) {
235 INCREF(re
->re_lastok
);
236 return re
->re_lastok
;
238 if (strcmp(name
, "translate") == 0) {
239 if (re
->re_translate
== NULL
) {
243 INCREF(re
->re_translate
);
244 return re
->re_translate
;
246 if (strcmp(name
, "__members__") == 0) {
247 object
*list
= newlistobject(3);
249 setlistitem(list
, 0, newstringobject("last"));
250 setlistitem(list
, 1, newstringobject("regs"));
251 setlistitem(list
, 2, newstringobject("translate"));
252 if (err_occurred()) {
259 return findmethod(reg_methods
, (object
*)re
, name
);
262 static typeobject Regextype
= {
263 OB_HEAD_INIT(&Typetype
)
266 sizeof(regexobject
), /*tp_size*/
269 reg_dealloc
, /*tp_dealloc*/
271 reg_getattr
, /*tp_getattr*/
278 newregexobject(pat
, size
, translate
)
284 if (translate
!= NULL
&& getstringsize(translate
) != 256) {
285 err_setstr(RegexError
,
286 "translation table must be 256 bytes");
289 re
= NEWOBJ(regexobject
, &Regextype
);
292 re
->re_patbuf
.buffer
= NULL
;
293 re
->re_patbuf
.allocated
= 0;
294 re
->re_patbuf
.fastmap
= re
->re_fastmap
;
296 re
->re_patbuf
.translate
= getstringvalue(translate
);
298 re
->re_patbuf
.translate
= NULL
;
300 re
->re_translate
= translate
;
301 re
->re_lastok
= NULL
;
302 error
= re_compile_pattern(pat
, size
, &re
->re_patbuf
);
304 err_setstr(RegexError
, error
);
313 regex_compile(self
, args
)
320 if (!getargs(args
, "s#", &pat
, &size
)) {
322 if (!getargs(args
, "(s#S)", &pat
, &size
, &tran
))
325 return newregexobject(pat
, size
, tran
);
328 static object
*cache_pat
;
329 static object
*cache_prog
;
335 if (pat
!= cache_pat
) {
339 cache_prog
= regex_compile((object
*)NULL
, pat
);
340 if (cache_prog
== NULL
)
349 regex_match(self
, args
)
353 object
*pat
, *string
;
354 if (!getargs(args
, "(SS)", &pat
, &string
))
356 if (update_cache(pat
) < 0)
358 return reg_match((regexobject
*)cache_prog
, string
);
362 regex_search(self
, args
)
366 object
*pat
, *string
;
367 if (!getargs(args
, "(SS)", &pat
, &string
))
369 if (update_cache(pat
) < 0)
371 return reg_search((regexobject
*)cache_prog
, string
);
375 regex_set_syntax(self
, args
)
379 if (!getintarg(args
, &syntax
))
381 syntax
= re_set_syntax(syntax
);
382 return newintobject((long)syntax
);
385 static struct methodlist regex_global_methods
[] = {
386 {"compile", regex_compile
},
387 {"match", regex_match
},
388 {"search", regex_search
},
389 {"set_syntax", regex_set_syntax
},
390 {NULL
, NULL
} /* sentinel */
397 m
= initmodule("regex", regex_global_methods
);
398 d
= getmoduledict(m
);
400 /* Initialize regex.error exception */
401 RegexError
= newstringobject("regex.error");
402 if (RegexError
== NULL
|| dictinsert(d
, "error", RegexError
) != 0)
403 fatal("can't define regex.error");
405 /* Initialize regex.casefold constant */
406 v
= newsizedstringobject((char *)NULL
, 256);
409 char *s
= getstringvalue(v
);
410 for (i
= 0; i
< 256; i
++) {
416 dictinsert(d
, "casefold", v
);