2 XXX support range parameter on search
3 XXX support mstop parameter on search
6 /***********************************************************
7 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
12 Permission to use, copy, modify, and distribute this software and its
13 documentation for any purpose and without fee is hereby granted,
14 provided that the above copyright notice appear in all copies and that
15 both that copyright notice and this permission notice appear in
16 supporting documentation, and that the names of Stichting Mathematisch
17 Centrum or CWI or Corporation for National Research Initiatives or
18 CNRI not be used in advertising or publicity pertaining to
19 distribution of the software without specific, written prior
22 While CWI is the initial source for this software, a modified version
23 is made available by the Corporation for National Research Initiatives
24 (CNRI) at the Internet address ftp://ftp.python.org.
26 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33 PERFORMANCE OF THIS SOFTWARE.
35 ******************************************************************/
37 /* Regular expression objects */
38 /* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
47 static PyObject
*RegexError
; /* Exception */
51 struct re_pattern_buffer re_patbuf
; /* The compiled expression */
52 struct re_registers re_regs
; /* The registers from the last match */
53 char re_fastmap
[256]; /* Storage for fastmap */
54 PyObject
*re_translate
; /* String object for translate table */
55 PyObject
*re_lastok
; /* String object last matched/searched */
56 PyObject
*re_groupindex
; /* Group name to index dictionary */
57 PyObject
*re_givenpat
; /* Pattern with symbolic groups */
58 PyObject
*re_realpat
; /* Pattern without symbolic groups */
61 /* Regex object methods */
67 PyMem_XDEL(re
->re_patbuf
.buffer
);
68 Py_XDECREF(re
->re_translate
);
69 Py_XDECREF(re
->re_lastok
);
70 Py_XDECREF(re
->re_groupindex
);
71 Py_XDECREF(re
->re_givenpat
);
72 Py_XDECREF(re
->re_realpat
);
78 struct re_registers
*regs
;
82 static PyObject
*filler
= NULL
;
85 filler
= Py_BuildValue("(ii)", -1, -1);
89 v
= PyTuple_New(RE_NREGS
);
93 for (i
= 0; i
< RE_NREGS
; i
++) {
94 int lo
= regs
->start
[i
];
95 int hi
= regs
->end
[i
];
97 if (lo
== -1 && hi
== -1) {
102 w
= Py_BuildValue("(ii)", lo
, hi
);
103 if (w
== NULL
|| PyTuple_SetItem(v
, i
, w
) < 0) {
112 regobj_match(re
, args
)
122 if (!PyArg_ParseTuple(args
, "O|i", &argstring
, &offset
))
124 if (!PyArg_Parse(argstring
, "t#", &buffer
, &size
))
127 if (offset
< 0 || offset
> size
) {
128 PyErr_SetString(RegexError
, "match offset out of range");
131 Py_XDECREF(re
->re_lastok
);
132 re
->re_lastok
= NULL
;
133 result
= _Py_re_match(&re
->re_patbuf
, (unsigned char *)buffer
, size
, offset
,
136 /* Serious failure of some sort; if re_match didn't
137 set an exception, raise a generic error */
138 if (!PyErr_Occurred())
139 PyErr_SetString(RegexError
, "match failure");
143 Py_INCREF(argstring
);
144 re
->re_lastok
= argstring
;
146 return PyInt_FromLong((long)result
); /* Length of the match or -1 */
150 regobj_search(re
, args
)
161 if (!PyArg_ParseTuple(args
, "O|i", &argstring
, &offset
))
163 if (!PyArg_Parse(argstring
, "t#", &buffer
, &size
))
166 if (offset
< 0 || offset
> size
) {
167 PyErr_SetString(RegexError
, "search offset out of range");
170 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
171 the implementation don't match: the documentation states that
172 |range| positions are tried, while the code tries |range|+1
173 positions. It seems more productive to believe the code! */
174 range
= size
- offset
;
175 Py_XDECREF(re
->re_lastok
);
176 re
->re_lastok
= NULL
;
177 result
= _Py_re_search(&re
->re_patbuf
, (unsigned char *)buffer
, size
, offset
, range
,
180 /* Serious failure of some sort; if re_match didn't
181 set an exception, raise a generic error */
182 if (!PyErr_Occurred())
183 PyErr_SetString(RegexError
, "match failure");
187 Py_INCREF(argstring
);
188 re
->re_lastok
= argstring
;
190 return PyInt_FromLong((long)result
); /* Position of the match or -1 */
193 /* get the group from the regex where index can be a string (group name) or
194 an integer index [0 .. 99]
197 group_from_index(re
, index
)
204 if (PyString_Check(index
))
205 if (re
->re_groupindex
== NULL
||
206 !(index
= PyDict_GetItem(re
->re_groupindex
, index
)))
208 PyErr_SetString(RegexError
,
209 "group() group name doesn't exist");
213 i
= PyInt_AsLong(index
);
214 if (i
== -1 && PyErr_Occurred())
217 if (i
< 0 || i
>= RE_NREGS
) {
218 PyErr_SetString(RegexError
, "group() index out of range");
221 if (re
->re_lastok
== NULL
) {
222 PyErr_SetString(RegexError
,
223 "group() only valid after successful match/search");
226 a
= re
->re_regs
.start
[i
];
227 b
= re
->re_regs
.end
[i
];
228 if (a
< 0 || b
< 0) {
233 if (!(v
= PyString_AsString(re
->re_lastok
)))
236 return PyString_FromStringAndSize(v
+a
, b
-a
);
241 regobj_group(re
, args
)
245 int n
= PyTuple_Size(args
);
247 PyObject
*res
= NULL
;
252 PyErr_SetString(PyExc_TypeError
, "not enough arguments");
256 /* return value is a single string */
257 PyObject
*index
= PyTuple_GetItem(args
, 0);
261 return group_from_index(re
, index
);
264 /* return value is a tuple */
265 if (!(res
= PyTuple_New(n
)))
268 for (i
= 0; i
< n
; i
++) {
269 PyObject
*index
= PyTuple_GetItem(args
, i
);
270 PyObject
*group
= NULL
;
274 if (!(group
= group_from_index(re
, index
)))
276 if (PyTuple_SetItem(res
, i
, group
) < 0)
287 static struct PyMethodDef reg_methods
[] = {
288 {"match", (PyCFunction
)regobj_match
, 1},
289 {"search", (PyCFunction
)regobj_search
, 1},
290 {"group", (PyCFunction
)regobj_group
, 1},
291 {NULL
, NULL
} /* sentinel */
296 static char* members
[] = {
297 "last", "regs", "translate",
298 "groupindex", "realpat", "givenpat",
304 regobj_getattr(re
, name
)
308 if (strcmp(name
, "regs") == 0) {
309 if (re
->re_lastok
== NULL
) {
313 return makeresult(&re
->re_regs
);
315 if (strcmp(name
, "last") == 0) {
316 if (re
->re_lastok
== NULL
) {
320 Py_INCREF(re
->re_lastok
);
321 return re
->re_lastok
;
323 if (strcmp(name
, "translate") == 0) {
324 if (re
->re_translate
== NULL
) {
328 Py_INCREF(re
->re_translate
);
329 return re
->re_translate
;
331 if (strcmp(name
, "groupindex") == 0) {
332 if (re
->re_groupindex
== NULL
) {
336 Py_INCREF(re
->re_groupindex
);
337 return re
->re_groupindex
;
339 if (strcmp(name
, "realpat") == 0) {
340 if (re
->re_realpat
== NULL
) {
344 Py_INCREF(re
->re_realpat
);
345 return re
->re_realpat
;
347 if (strcmp(name
, "givenpat") == 0) {
348 if (re
->re_givenpat
== NULL
) {
352 Py_INCREF(re
->re_givenpat
);
353 return re
->re_givenpat
;
355 if (strcmp(name
, "__members__") == 0) {
357 PyObject
*list
= NULL
;
359 /* okay, so it's unlikely this list will change that often.
360 still, it's easier to change it in just one place.
364 if (!(list
= PyList_New(i
)))
369 PyObject
* v
= PyString_FromString(members
[i
]);
370 if (!v
|| PyList_SetItem(list
, i
, v
) < 0) {
378 return Py_FindMethod(reg_methods
, (PyObject
*)re
, name
);
381 static PyTypeObject Regextype
= {
382 PyObject_HEAD_INIT(&PyType_Type
)
385 sizeof(regexobject
), /*tp_size*/
388 (destructor
)reg_dealloc
, /*tp_dealloc*/
390 (getattrfunc
)regobj_getattr
, /*tp_getattr*/
396 /* reference counting invariants:
400 groupindex: transferred
403 newregexobject(pattern
, translate
, givenpat
, groupindex
)
407 PyObject
*groupindex
;
413 if (!PyArg_Parse(pattern
, "t#", &pat
, &size
))
416 if (translate
!= NULL
&& PyString_Size(translate
) != 256) {
417 PyErr_SetString(RegexError
,
418 "translation table must be 256 bytes");
421 re
= PyObject_NEW(regexobject
, &Regextype
);
424 re
->re_patbuf
.buffer
= NULL
;
425 re
->re_patbuf
.allocated
= 0;
426 re
->re_patbuf
.fastmap
= (unsigned char *)re
->re_fastmap
;
428 re
->re_patbuf
.translate
= (unsigned char *)PyString_AsString(translate
);
429 if (!re
->re_patbuf
.translate
)
431 Py_INCREF(translate
);
434 re
->re_patbuf
.translate
= NULL
;
435 re
->re_translate
= translate
;
436 re
->re_lastok
= NULL
;
437 re
->re_groupindex
= groupindex
;
439 re
->re_realpat
= pattern
;
441 re
->re_givenpat
= givenpat
;
442 error
= _Py_re_compile_pattern((unsigned char *)pat
, size
, &re
->re_patbuf
);
444 PyErr_SetString(RegexError
, error
);
448 return (PyObject
*)re
;
455 regex_compile(self
, args
)
459 PyObject
*pat
= NULL
;
460 PyObject
*tran
= NULL
;
462 if (!PyArg_ParseTuple(args
, "S|S", &pat
, &tran
))
464 return newregexobject(pat
, tran
, pat
, NULL
);
468 symcomp(pattern
, gdict
)
472 char *opat
, *oend
, *o
, *n
, *g
, *v
;
478 int require_escape
= re_syntax
& RE_NO_BK_PARENS
? 0 : 1;
480 if (!(opat
= PyString_AsString(pattern
)))
483 if ((sz
= PyString_Size(pattern
)) < 0)
494 if (!(npattern
= PyString_FromStringAndSize((char*)NULL
, sz
)) ||
495 !(n
= PyString_AsString(npattern
)))
499 if (*o
== '(' && escaped
== require_escape
) {
504 if (++o
>= oend
|| *o
!= '<')
507 if (o
+1 < oend
&& *(o
+1) == '>')
511 for (++o
; o
< oend
;) {
513 PyObject
*group_name
= NULL
;
514 PyObject
*group_index
= NULL
;
516 group_name
= PyString_FromString(name_buf
);
517 group_index
= PyInt_FromLong(group_count
);
518 if (group_name
== NULL
||
519 group_index
== NULL
||
520 PyDict_SetItem(gdict
, group_name
,
523 Py_XDECREF(group_name
);
524 Py_XDECREF(group_index
);
525 Py_XDECREF(npattern
);
528 Py_DECREF(group_name
);
529 Py_DECREF(group_index
);
530 ++o
; /* eat the '>' */
533 if (!isalnum(Py_CHARMASK(*o
)) && *o
!= '_') {
540 else if (*o
== '[' && !escaped
) {
542 ++o
; /* eat the char following '[' */
544 while (o
< oend
&& *o
!= ']') {
551 else if (*o
== '\\') {
563 if (!(v
= PyString_AsString(npattern
))) {
567 /* _PyString_Resize() decrements npattern on failure */
568 if (_PyString_Resize(&npattern
, n
- v
) == 0)
577 regex_symcomp(self
, args
)
582 PyObject
*tran
= NULL
;
583 PyObject
*gdict
= NULL
;
585 PyObject
*retval
= NULL
;
587 if (!PyArg_ParseTuple(args
, "S|S", &pattern
, &tran
))
590 gdict
= PyDict_New();
591 if (gdict
== NULL
|| (npattern
= symcomp(pattern
, gdict
)) == NULL
) {
596 retval
= newregexobject(npattern
, tran
, pattern
, gdict
);
602 static PyObject
*cache_pat
;
603 static PyObject
*cache_prog
;
609 PyObject
*tuple
= Py_BuildValue("(O)", pat
);
615 if (pat
!= cache_pat
) {
616 Py_XDECREF(cache_pat
);
618 Py_XDECREF(cache_prog
);
619 cache_prog
= regex_compile((PyObject
*)NULL
, tuple
);
620 if (cache_prog
== NULL
) {
625 Py_INCREF(cache_pat
);
633 regex_match(self
, args
)
637 PyObject
*pat
, *string
;
640 if (!PyArg_Parse(args
, "(SS)", &pat
, &string
))
642 if (update_cache(pat
) < 0)
645 if (!(tuple
= Py_BuildValue("(S)", string
)))
647 v
= regobj_match((regexobject
*)cache_prog
, tuple
);
653 regex_search(self
, args
)
657 PyObject
*pat
, *string
;
660 if (!PyArg_Parse(args
, "(SS)", &pat
, &string
))
662 if (update_cache(pat
) < 0)
665 if (!(tuple
= Py_BuildValue("(S)", string
)))
667 v
= regobj_search((regexobject
*)cache_prog
, tuple
);
673 regex_set_syntax(self
, args
)
678 if (!PyArg_Parse(args
, "i", &syntax
))
680 syntax
= re_set_syntax(syntax
);
681 /* wipe the global pattern cache */
682 Py_XDECREF(cache_pat
);
684 Py_XDECREF(cache_prog
);
686 return PyInt_FromLong((long)syntax
);
690 regex_get_syntax(self
, args
)
694 if (!PyArg_Parse(args
, ""))
696 return PyInt_FromLong((long)re_syntax
);
700 static struct PyMethodDef regex_global_methods
[] = {
701 {"compile", regex_compile
, 1},
702 {"symcomp", regex_symcomp
, 1},
703 {"match", regex_match
, 0},
704 {"search", regex_search
, 0},
705 {"set_syntax", regex_set_syntax
, 0},
706 {"get_syntax", regex_get_syntax
, 0},
707 {NULL
, NULL
} /* sentinel */
717 m
= Py_InitModule("regex", regex_global_methods
);
718 d
= PyModule_GetDict(m
);
720 /* Initialize regex.error exception */
721 v
= RegexError
= PyErr_NewException("regex.error", NULL
, NULL
);
722 if (v
== NULL
|| PyDict_SetItemString(d
, "error", v
) != 0)
725 /* Initialize regex.casefold constant */
726 if (!(v
= PyString_FromStringAndSize((char *)NULL
, 256)))
729 if (!(s
= PyString_AsString(v
)))
732 for (i
= 0; i
< 256; i
++) {
738 if (PyDict_SetItemString(d
, "casefold", v
) < 0)
742 if (!PyErr_Occurred())