At the release of 1.0.1.
[python/dscho.git] / Modules / regexmodule.c
blob13c6cb216fb7238c13eaba111694c17053424ffc
1 /*
2 XXX support range parameter on search
3 XXX support mstop parameter on search
4 */
6 /***********************************************************
7 Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
8 Amsterdam, The Netherlands.
10 All Rights Reserved
12 Permission to use, copy, modify, and distribute this software and its
13 documentation for any purpose and without fee is hereby granted,
14 provided that the above copyright notice appear in all copies and that
15 both that copyright notice and this permission notice appear in
16 supporting documentation, and that the names of Stichting Mathematisch
17 Centrum or CWI not be used in advertising or publicity pertaining to
18 distribution of the software without specific, written prior permission.
20 STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23 FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28 ******************************************************************/
30 /* Regular expression objects */
31 /* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
34 #include "allobjects.h"
35 #include "modsupport.h"
37 #include "regexpr.h"
39 static object *RegexError; /* Exception */
41 typedef struct {
42 OB_HEAD
43 struct re_pattern_buffer re_patbuf; /* The compiled expression */
44 struct re_registers re_regs; /* The registers from the last match */
45 char re_fastmap[256]; /* Storage for fastmap */
46 object *re_translate; /* String object for translate table */
47 object *re_lastok; /* String object last matched/searched */
48 } regexobject;
50 /* Regex object methods */
52 static void
53 reg_dealloc(re)
54 regexobject *re;
56 XDECREF(re->re_translate);
57 XDECREF(re->re_lastok);
58 XDEL(re->re_patbuf.buffer);
59 XDEL(re->re_patbuf.translate);
60 DEL(re);
63 static object *
64 makeresult(regs)
65 struct re_registers *regs;
67 object *v = newtupleobject(RE_NREGS);
68 if (v != NULL) {
69 int i;
70 for (i = 0; i < RE_NREGS; i++) {
71 object *w;
72 w = mkvalue("(ii)", regs->start[i], regs->end[i]);
73 if (w == NULL) {
74 XDECREF(v);
75 v = NULL;
76 break;
78 settupleitem(v, i, w);
81 return v;
84 static object *
85 reg_match(re, args)
86 regexobject *re;
87 object *args;
89 object *argstring;
90 char *buffer;
91 int size;
92 int offset;
93 int result;
94 if (getargs(args, "S", &argstring)) {
95 offset = 0;
97 else {
98 err_clear();
99 if (!getargs(args, "(Si)", &argstring, &offset))
100 return NULL;
102 buffer = getstringvalue(argstring);
103 size = getstringsize(argstring);
104 if (offset < 0 || offset > size) {
105 err_setstr(RegexError, "match offset out of range");
106 return NULL;
108 XDECREF(re->re_lastok);
109 re->re_lastok = NULL;
110 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
111 if (result < -1) {
112 /* Failure like stack overflow */
113 err_setstr(RegexError, "match failure");
114 return NULL;
116 if (result >= 0) {
117 INCREF(argstring);
118 re->re_lastok = argstring;
120 return newintobject((long)result); /* Length of the match or -1 */
123 static object *
124 reg_search(re, args)
125 regexobject *re;
126 object *args;
128 object *argstring;
129 char *buffer;
130 int size;
131 int offset;
132 int range;
133 int result;
135 if (getargs(args, "S", &argstring)) {
136 offset = 0;
138 else {
139 err_clear();
140 if (!getargs(args, "(Si)", &argstring, &offset))
141 return NULL;
143 buffer = getstringvalue(argstring);
144 size = getstringsize(argstring);
145 if (offset < 0 || offset > size) {
146 err_setstr(RegexError, "search offset out of range");
147 return NULL;
149 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
150 the implementation don't match: the documentation states that
151 |range| positions are tried, while the code tries |range|+1
152 positions. It seems more productive to believe the code! */
153 range = size - offset;
154 XDECREF(re->re_lastok);
155 re->re_lastok = NULL;
156 result = re_search(&re->re_patbuf, buffer, size, offset, range,
157 &re->re_regs);
158 if (result < -1) {
159 /* Failure like stack overflow */
160 err_setstr(RegexError, "match failure");
161 return NULL;
163 if (result >= 0) {
164 INCREF(argstring);
165 re->re_lastok = argstring;
167 return newintobject((long)result); /* Position of the match or -1 */
170 static object *
171 reg_group(re, args)
172 regexobject *re;
173 object *args;
175 int i, a, b;
176 if (args != NULL && is_tupleobject(args)) {
177 int n = gettuplesize(args);
178 object *res = newtupleobject(n);
179 if (res == NULL)
180 return NULL;
181 for (i = 0; i < n; i++) {
182 object *v = reg_group(re, gettupleitem(args, i));
183 if (v == NULL) {
184 DECREF(res);
185 return NULL;
187 settupleitem(res, i, v);
189 return res;
191 if (!getargs(args, "i", &i))
192 return NULL;
193 if (i < 0 || i >= RE_NREGS) {
194 err_setstr(RegexError, "group() index out of range");
195 return NULL;
197 if (re->re_lastok == NULL) {
198 err_setstr(RegexError,
199 "group() only valid after successful match/search");
200 return NULL;
202 a = re->re_regs.start[i];
203 b = re->re_regs.end[i];
204 if (a < 0 || b < 0) {
205 INCREF(None);
206 return None;
208 return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a);
211 static struct methodlist reg_methods[] = {
212 {"match", reg_match},
213 {"search", reg_search},
214 {"group", reg_group},
215 {NULL, NULL} /* sentinel */
218 static object *
219 reg_getattr(re, name)
220 regexobject *re;
221 char *name;
223 if (strcmp(name, "regs") == 0) {
224 if (re->re_lastok == NULL) {
225 INCREF(None);
226 return None;
228 return makeresult(&re->re_regs);
230 if (strcmp(name, "last") == 0) {
231 if (re->re_lastok == NULL) {
232 INCREF(None);
233 return None;
235 INCREF(re->re_lastok);
236 return re->re_lastok;
238 if (strcmp(name, "translate") == 0) {
239 if (re->re_translate == NULL) {
240 INCREF(None);
241 return None;
243 INCREF(re->re_translate);
244 return re->re_translate;
246 if (strcmp(name, "__members__") == 0) {
247 object *list = newlistobject(3);
248 if (list) {
249 setlistitem(list, 0, newstringobject("last"));
250 setlistitem(list, 1, newstringobject("regs"));
251 setlistitem(list, 2, newstringobject("translate"));
252 if (err_occurred()) {
253 DECREF(list);
254 list = NULL;
257 return list;
259 return findmethod(reg_methods, (object *)re, name);
262 static typeobject Regextype = {
263 OB_HEAD_INIT(&Typetype)
264 0, /*ob_size*/
265 "regex", /*tp_name*/
266 sizeof(regexobject), /*tp_size*/
267 0, /*tp_itemsize*/
268 /* methods */
269 reg_dealloc, /*tp_dealloc*/
270 0, /*tp_print*/
271 reg_getattr, /*tp_getattr*/
272 0, /*tp_setattr*/
273 0, /*tp_compare*/
274 0, /*tp_repr*/
277 static object *
278 newregexobject(pat, size, translate)
279 char *pat;
280 int size;
281 object *translate;
283 regexobject *re;
284 if (translate != NULL && getstringsize(translate) != 256) {
285 err_setstr(RegexError,
286 "translation table must be 256 bytes");
287 return NULL;
289 re = NEWOBJ(regexobject, &Regextype);
290 if (re != NULL) {
291 char *error;
292 re->re_patbuf.buffer = NULL;
293 re->re_patbuf.allocated = 0;
294 re->re_patbuf.fastmap = re->re_fastmap;
295 if (translate)
296 re->re_patbuf.translate = getstringvalue(translate);
297 else
298 re->re_patbuf.translate = NULL;
299 XINCREF(translate);
300 re->re_translate = translate;
301 re->re_lastok = NULL;
302 error = re_compile_pattern(pat, size, &re->re_patbuf);
303 if (error != NULL) {
304 err_setstr(RegexError, error);
305 DECREF(re);
306 re = NULL;
309 return (object *)re;
312 static object *
313 regex_compile(self, args)
314 object *self;
315 object *args;
317 char *pat;
318 int size;
319 object *tran = NULL;
320 if (!getargs(args, "s#", &pat, &size)) {
321 err_clear();
322 if (!getargs(args, "(s#S)", &pat, &size, &tran))
323 return NULL;
325 return newregexobject(pat, size, tran);
328 static object *cache_pat;
329 static object *cache_prog;
331 static int
332 update_cache(pat)
333 object *pat;
335 if (pat != cache_pat) {
336 XDECREF(cache_pat);
337 cache_pat = NULL;
338 XDECREF(cache_prog);
339 cache_prog = regex_compile((object *)NULL, pat);
340 if (cache_prog == NULL)
341 return -1;
342 cache_pat = pat;
343 INCREF(cache_pat);
345 return 0;
348 static object *
349 regex_match(self, args)
350 object *self;
351 object *args;
353 object *pat, *string;
354 if (!getargs(args, "(SS)", &pat, &string))
355 return NULL;
356 if (update_cache(pat) < 0)
357 return NULL;
358 return reg_match((regexobject *)cache_prog, string);
361 static object *
362 regex_search(self, args)
363 object *self;
364 object *args;
366 object *pat, *string;
367 if (!getargs(args, "(SS)", &pat, &string))
368 return NULL;
369 if (update_cache(pat) < 0)
370 return NULL;
371 return reg_search((regexobject *)cache_prog, string);
374 static object *
375 regex_set_syntax(self, args)
376 object *self, *args;
378 int syntax;
379 if (!getintarg(args, &syntax))
380 return NULL;
381 syntax = re_set_syntax(syntax);
382 return newintobject((long)syntax);
385 static struct methodlist regex_global_methods[] = {
386 {"compile", regex_compile},
387 {"match", regex_match},
388 {"search", regex_search},
389 {"set_syntax", regex_set_syntax},
390 {NULL, NULL} /* sentinel */
393 initregex()
395 object *m, *d, *v;
397 m = initmodule("regex", regex_global_methods);
398 d = getmoduledict(m);
400 /* Initialize regex.error exception */
401 RegexError = newstringobject("regex.error");
402 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
403 fatal("can't define regex.error");
405 /* Initialize regex.casefold constant */
406 v = newsizedstringobject((char *)NULL, 256);
407 if (v != NULL) {
408 int i;
409 char *s = getstringvalue(v);
410 for (i = 0; i < 256; i++) {
411 if (isupper(i))
412 s[i] = tolower(i);
413 else
414 s[i] = i;
416 dictinsert(d, "casefold", v);
417 DECREF(v);