2 XXX support range parameter on search
3 XXX support mstop parameter on search
6 /***********************************************************
7 Copyright (c) 2000, BeOpen.com.
8 Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9 Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
12 See the file "Misc/COPYRIGHT" for information on usage and
13 redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
14 ******************************************************************/
16 /* Regular expression objects */
17 /* This uses Tatu Ylonen's copyleft-free reimplementation of
18 GNU regular expressions */
26 static PyObject
*RegexError
; /* Exception */
30 struct re_pattern_buffer re_patbuf
; /* The compiled expression */
31 struct re_registers re_regs
; /* The registers from the last match */
32 char re_fastmap
[256]; /* Storage for fastmap */
33 PyObject
*re_translate
; /* String object for translate table */
34 PyObject
*re_lastok
; /* String object last matched/searched */
35 PyObject
*re_groupindex
; /* Group name to index dictionary */
36 PyObject
*re_givenpat
; /* Pattern with symbolic groups */
37 PyObject
*re_realpat
; /* Pattern without symbolic groups */
40 /* Regex object methods */
43 reg_dealloc(regexobject
*re
)
45 if (re
->re_patbuf
.buffer
)
46 free(re
->re_patbuf
.buffer
);
47 Py_XDECREF(re
->re_translate
);
48 Py_XDECREF(re
->re_lastok
);
49 Py_XDECREF(re
->re_groupindex
);
50 Py_XDECREF(re
->re_givenpat
);
51 Py_XDECREF(re
->re_realpat
);
56 makeresult(struct re_registers
*regs
)
60 static PyObject
*filler
= NULL
;
63 filler
= Py_BuildValue("(ii)", -1, -1);
67 v
= PyTuple_New(RE_NREGS
);
71 for (i
= 0; i
< RE_NREGS
; i
++) {
72 int lo
= regs
->start
[i
];
73 int hi
= regs
->end
[i
];
75 if (lo
== -1 && hi
== -1) {
80 w
= Py_BuildValue("(ii)", lo
, hi
);
81 if (w
== NULL
|| PyTuple_SetItem(v
, i
, w
) < 0) {
90 regobj_match(regexobject
*re
, PyObject
*args
)
98 if (!PyArg_ParseTuple(args
, "O|i:match", &argstring
, &offset
))
100 if (!PyArg_Parse(argstring
, "t#", &buffer
, &size
))
103 if (offset
< 0 || offset
> size
) {
104 PyErr_SetString(RegexError
, "match offset out of range");
107 Py_XDECREF(re
->re_lastok
);
108 re
->re_lastok
= NULL
;
109 result
= _Py_re_match(&re
->re_patbuf
, (unsigned char *)buffer
, size
, offset
,
112 /* Serious failure of some sort; if re_match didn't
113 set an exception, raise a generic error */
114 if (!PyErr_Occurred())
115 PyErr_SetString(RegexError
, "match failure");
119 Py_INCREF(argstring
);
120 re
->re_lastok
= argstring
;
122 return PyInt_FromLong((long)result
); /* Length of the match or -1 */
126 regobj_search(regexobject
*re
, PyObject
*args
)
135 if (!PyArg_ParseTuple(args
, "O|i:search", &argstring
, &offset
))
137 if (!PyArg_Parse(argstring
, "t#:search", &buffer
, &size
))
140 if (offset
< 0 || offset
> size
) {
141 PyErr_SetString(RegexError
, "search offset out of range");
144 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
145 the implementation don't match: the documentation states that
146 |range| positions are tried, while the code tries |range|+1
147 positions. It seems more productive to believe the code! */
148 range
= size
- offset
;
149 Py_XDECREF(re
->re_lastok
);
150 re
->re_lastok
= NULL
;
151 result
= _Py_re_search(&re
->re_patbuf
, (unsigned char *)buffer
, size
, offset
, range
,
154 /* Serious failure of some sort; if re_match didn't
155 set an exception, raise a generic error */
156 if (!PyErr_Occurred())
157 PyErr_SetString(RegexError
, "match failure");
161 Py_INCREF(argstring
);
162 re
->re_lastok
= argstring
;
164 return PyInt_FromLong((long)result
); /* Position of the match or -1 */
167 /* get the group from the regex where index can be a string (group name) or
168 an integer index [0 .. 99]
171 group_from_index(regexobject
*re
, PyObject
*index
)
176 if (PyString_Check(index
))
177 if (re
->re_groupindex
== NULL
||
178 !(index
= PyDict_GetItem(re
->re_groupindex
, index
)))
180 PyErr_SetString(RegexError
,
181 "group() group name doesn't exist");
185 i
= PyInt_AsLong(index
);
186 if (i
== -1 && PyErr_Occurred())
189 if (i
< 0 || i
>= RE_NREGS
) {
190 PyErr_SetString(RegexError
, "group() index out of range");
193 if (re
->re_lastok
== NULL
) {
194 PyErr_SetString(RegexError
,
195 "group() only valid after successful match/search");
198 a
= re
->re_regs
.start
[i
];
199 b
= re
->re_regs
.end
[i
];
200 if (a
< 0 || b
< 0) {
205 if (!(v
= PyString_AsString(re
->re_lastok
)))
208 return PyString_FromStringAndSize(v
+a
, b
-a
);
213 regobj_group(regexobject
*re
, PyObject
*args
)
215 int n
= PyTuple_Size(args
);
217 PyObject
*res
= NULL
;
222 PyErr_SetString(PyExc_TypeError
, "not enough arguments");
226 /* return value is a single string */
227 PyObject
*index
= PyTuple_GetItem(args
, 0);
231 return group_from_index(re
, index
);
234 /* return value is a tuple */
235 if (!(res
= PyTuple_New(n
)))
238 for (i
= 0; i
< n
; i
++) {
239 PyObject
*index
= PyTuple_GetItem(args
, i
);
240 PyObject
*group
= NULL
;
244 if (!(group
= group_from_index(re
, index
)))
246 if (PyTuple_SetItem(res
, i
, group
) < 0)
257 static struct PyMethodDef reg_methods
[] = {
258 {"match", (PyCFunction
)regobj_match
, 1},
259 {"search", (PyCFunction
)regobj_search
, 1},
260 {"group", (PyCFunction
)regobj_group
, 1},
261 {NULL
, NULL
} /* sentinel */
266 static char* members
[] = {
267 "last", "regs", "translate",
268 "groupindex", "realpat", "givenpat",
274 regobj_getattr(regexobject
*re
, char *name
)
276 if (strcmp(name
, "regs") == 0) {
277 if (re
->re_lastok
== NULL
) {
281 return makeresult(&re
->re_regs
);
283 if (strcmp(name
, "last") == 0) {
284 if (re
->re_lastok
== NULL
) {
288 Py_INCREF(re
->re_lastok
);
289 return re
->re_lastok
;
291 if (strcmp(name
, "translate") == 0) {
292 if (re
->re_translate
== NULL
) {
296 Py_INCREF(re
->re_translate
);
297 return re
->re_translate
;
299 if (strcmp(name
, "groupindex") == 0) {
300 if (re
->re_groupindex
== NULL
) {
304 Py_INCREF(re
->re_groupindex
);
305 return re
->re_groupindex
;
307 if (strcmp(name
, "realpat") == 0) {
308 if (re
->re_realpat
== NULL
) {
312 Py_INCREF(re
->re_realpat
);
313 return re
->re_realpat
;
315 if (strcmp(name
, "givenpat") == 0) {
316 if (re
->re_givenpat
== NULL
) {
320 Py_INCREF(re
->re_givenpat
);
321 return re
->re_givenpat
;
323 if (strcmp(name
, "__members__") == 0) {
325 PyObject
*list
= NULL
;
327 /* okay, so it's unlikely this list will change that often.
328 still, it's easier to change it in just one place.
332 if (!(list
= PyList_New(i
)))
337 PyObject
* v
= PyString_FromString(members
[i
]);
338 if (!v
|| PyList_SetItem(list
, i
, v
) < 0) {
346 return Py_FindMethod(reg_methods
, (PyObject
*)re
, name
);
349 static PyTypeObject Regextype
= {
350 PyObject_HEAD_INIT(&PyType_Type
)
353 sizeof(regexobject
), /*tp_size*/
356 (destructor
)reg_dealloc
, /*tp_dealloc*/
358 (getattrfunc
)regobj_getattr
, /*tp_getattr*/
364 /* reference counting invariants:
368 groupindex: transferred
371 newregexobject(PyObject
*pattern
, PyObject
*translate
, PyObject
*givenpat
, PyObject
*groupindex
)
377 if (!PyArg_Parse(pattern
, "t#", &pat
, &size
))
380 if (translate
!= NULL
&& PyString_Size(translate
) != 256) {
381 PyErr_SetString(RegexError
,
382 "translation table must be 256 bytes");
385 re
= PyObject_New(regexobject
, &Regextype
);
388 re
->re_patbuf
.buffer
= NULL
;
389 re
->re_patbuf
.allocated
= 0;
390 re
->re_patbuf
.fastmap
= (unsigned char *)re
->re_fastmap
;
392 re
->re_patbuf
.translate
= (unsigned char *)PyString_AsString(translate
);
393 if (!re
->re_patbuf
.translate
)
395 Py_INCREF(translate
);
398 re
->re_patbuf
.translate
= NULL
;
399 re
->re_translate
= translate
;
400 re
->re_lastok
= NULL
;
401 re
->re_groupindex
= groupindex
;
403 re
->re_realpat
= pattern
;
405 re
->re_givenpat
= givenpat
;
406 error
= _Py_re_compile_pattern((unsigned char *)pat
, size
, &re
->re_patbuf
);
408 PyErr_SetString(RegexError
, error
);
412 return (PyObject
*)re
;
419 regex_compile(PyObject
*self
, PyObject
*args
)
421 PyObject
*pat
= NULL
;
422 PyObject
*tran
= NULL
;
424 if (!PyArg_ParseTuple(args
, "S|S:compile", &pat
, &tran
))
426 return newregexobject(pat
, tran
, pat
, NULL
);
430 symcomp(PyObject
*pattern
, PyObject
*gdict
)
432 char *opat
, *oend
, *o
, *n
, *g
, *v
;
438 int require_escape
= re_syntax
& RE_NO_BK_PARENS
? 0 : 1;
440 if (!(opat
= PyString_AsString(pattern
)))
443 if ((sz
= PyString_Size(pattern
)) < 0)
454 if (!(npattern
= PyString_FromStringAndSize((char*)NULL
, sz
)) ||
455 !(n
= PyString_AsString(npattern
)))
459 if (*o
== '(' && escaped
== require_escape
) {
464 if (++o
>= oend
|| *o
!= '<')
467 if (o
+1 < oend
&& *(o
+1) == '>')
471 for (++o
; o
< oend
;) {
473 PyObject
*group_name
= NULL
;
474 PyObject
*group_index
= NULL
;
476 group_name
= PyString_FromString(name_buf
);
477 group_index
= PyInt_FromLong(group_count
);
478 if (group_name
== NULL
||
479 group_index
== NULL
||
480 PyDict_SetItem(gdict
, group_name
,
483 Py_XDECREF(group_name
);
484 Py_XDECREF(group_index
);
485 Py_XDECREF(npattern
);
488 Py_DECREF(group_name
);
489 Py_DECREF(group_index
);
490 ++o
; /* eat the '>' */
493 if (!isalnum(Py_CHARMASK(*o
)) && *o
!= '_') {
500 else if (*o
== '[' && !escaped
) {
502 ++o
; /* eat the char following '[' */
504 while (o
< oend
&& *o
!= ']') {
511 else if (*o
== '\\') {
523 if (!(v
= PyString_AsString(npattern
))) {
527 /* _PyString_Resize() decrements npattern on failure */
528 if (_PyString_Resize(&npattern
, n
- v
) == 0)
537 regex_symcomp(PyObject
*self
, PyObject
*args
)
540 PyObject
*tran
= NULL
;
541 PyObject
*gdict
= NULL
;
543 PyObject
*retval
= NULL
;
545 if (!PyArg_ParseTuple(args
, "S|S:symcomp", &pattern
, &tran
))
548 gdict
= PyDict_New();
549 if (gdict
== NULL
|| (npattern
= symcomp(pattern
, gdict
)) == NULL
) {
554 retval
= newregexobject(npattern
, tran
, pattern
, gdict
);
560 static PyObject
*cache_pat
;
561 static PyObject
*cache_prog
;
564 update_cache(PyObject
*pat
)
566 PyObject
*tuple
= Py_BuildValue("(O)", pat
);
572 if (pat
!= cache_pat
) {
573 Py_XDECREF(cache_pat
);
575 Py_XDECREF(cache_prog
);
576 cache_prog
= regex_compile((PyObject
*)NULL
, tuple
);
577 if (cache_prog
== NULL
) {
582 Py_INCREF(cache_pat
);
590 regex_match(PyObject
*self
, PyObject
*args
)
592 PyObject
*pat
, *string
;
595 if (!PyArg_Parse(args
, "(SS)", &pat
, &string
))
597 if (update_cache(pat
) < 0)
600 if (!(tuple
= Py_BuildValue("(S)", string
)))
602 v
= regobj_match((regexobject
*)cache_prog
, tuple
);
608 regex_search(PyObject
*self
, PyObject
*args
)
610 PyObject
*pat
, *string
;
613 if (!PyArg_Parse(args
, "(SS)", &pat
, &string
))
615 if (update_cache(pat
) < 0)
618 if (!(tuple
= Py_BuildValue("(S)", string
)))
620 v
= regobj_search((regexobject
*)cache_prog
, tuple
);
626 regex_set_syntax(PyObject
*self
, PyObject
*args
)
629 if (!PyArg_Parse(args
, "i", &syntax
))
631 syntax
= re_set_syntax(syntax
);
632 /* wipe the global pattern cache */
633 Py_XDECREF(cache_pat
);
635 Py_XDECREF(cache_prog
);
637 return PyInt_FromLong((long)syntax
);
641 regex_get_syntax(PyObject
*self
, PyObject
*args
)
643 if (!PyArg_Parse(args
, ""))
645 return PyInt_FromLong((long)re_syntax
);
649 static struct PyMethodDef regex_global_methods
[] = {
650 {"compile", regex_compile
, 1},
651 {"symcomp", regex_symcomp
, 1},
652 {"match", regex_match
, 0},
653 {"search", regex_search
, 0},
654 {"set_syntax", regex_set_syntax
, 0},
655 {"get_syntax", regex_get_syntax
, 0},
656 {NULL
, NULL
} /* sentinel */
666 m
= Py_InitModule("regex", regex_global_methods
);
667 d
= PyModule_GetDict(m
);
669 /* Initialize regex.error exception */
670 v
= RegexError
= PyErr_NewException("regex.error", NULL
, NULL
);
671 if (v
== NULL
|| PyDict_SetItemString(d
, "error", v
) != 0)
674 /* Initialize regex.casefold constant */
675 if (!(v
= PyString_FromStringAndSize((char *)NULL
, 256)))
678 if (!(s
= PyString_AsString(v
)))
681 for (i
= 0; i
< 256; i
++) {
687 if (PyDict_SetItemString(d
, "casefold", v
) < 0)
691 if (!PyErr_Occurred())