2 XXX support range parameter on search
3 XXX support mstop parameter on search
7 /* Regular expression objects */
8 /* This uses Tatu Ylonen's copyleft-free reimplementation of
9 GNU regular expressions */
17 static PyObject
*RegexError
; /* Exception */
21 struct re_pattern_buffer re_patbuf
; /* The compiled expression */
22 struct re_registers re_regs
; /* The registers from the last match */
23 char re_fastmap
[256]; /* Storage for fastmap */
24 PyObject
*re_translate
; /* String object for translate table */
25 PyObject
*re_lastok
; /* String object last matched/searched */
26 PyObject
*re_groupindex
; /* Group name to index dictionary */
27 PyObject
*re_givenpat
; /* Pattern with symbolic groups */
28 PyObject
*re_realpat
; /* Pattern without symbolic groups */
31 /* Regex object methods */
34 reg_dealloc(regexobject
*re
)
36 if (re
->re_patbuf
.buffer
)
37 free(re
->re_patbuf
.buffer
);
38 Py_XDECREF(re
->re_translate
);
39 Py_XDECREF(re
->re_lastok
);
40 Py_XDECREF(re
->re_groupindex
);
41 Py_XDECREF(re
->re_givenpat
);
42 Py_XDECREF(re
->re_realpat
);
47 makeresult(struct re_registers
*regs
)
51 static PyObject
*filler
= NULL
;
54 filler
= Py_BuildValue("(ii)", -1, -1);
58 v
= PyTuple_New(RE_NREGS
);
62 for (i
= 0; i
< RE_NREGS
; i
++) {
63 int lo
= regs
->start
[i
];
64 int hi
= regs
->end
[i
];
66 if (lo
== -1 && hi
== -1) {
71 w
= Py_BuildValue("(ii)", lo
, hi
);
72 if (w
== NULL
|| PyTuple_SetItem(v
, i
, w
) < 0) {
81 regobj_match(regexobject
*re
, PyObject
*args
)
89 if (!PyArg_ParseTuple(args
, "O|i:match", &argstring
, &offset
))
91 if (!PyArg_Parse(argstring
, "t#", &buffer
, &size
))
94 if (offset
< 0 || offset
> size
) {
95 PyErr_SetString(RegexError
, "match offset out of range");
98 Py_XDECREF(re
->re_lastok
);
100 result
= _Py_re_match(&re
->re_patbuf
, (unsigned char *)buffer
, size
, offset
,
103 /* Serious failure of some sort; if re_match didn't
104 set an exception, raise a generic error */
105 if (!PyErr_Occurred())
106 PyErr_SetString(RegexError
, "match failure");
110 Py_INCREF(argstring
);
111 re
->re_lastok
= argstring
;
113 return PyInt_FromLong((long)result
); /* Length of the match or -1 */
117 regobj_search(regexobject
*re
, PyObject
*args
)
126 if (!PyArg_ParseTuple(args
, "O|i:search", &argstring
, &offset
))
128 if (!PyArg_Parse(argstring
, "t#:search", &buffer
, &size
))
131 if (offset
< 0 || offset
> size
) {
132 PyErr_SetString(RegexError
, "search offset out of range");
135 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
136 the implementation don't match: the documentation states that
137 |range| positions are tried, while the code tries |range|+1
138 positions. It seems more productive to believe the code! */
139 range
= size
- offset
;
140 Py_XDECREF(re
->re_lastok
);
141 re
->re_lastok
= NULL
;
142 result
= _Py_re_search(&re
->re_patbuf
, (unsigned char *)buffer
, size
, offset
, range
,
145 /* Serious failure of some sort; if re_match didn't
146 set an exception, raise a generic error */
147 if (!PyErr_Occurred())
148 PyErr_SetString(RegexError
, "match failure");
152 Py_INCREF(argstring
);
153 re
->re_lastok
= argstring
;
155 return PyInt_FromLong((long)result
); /* Position of the match or -1 */
158 /* get the group from the regex where index can be a string (group name) or
159 an integer index [0 .. 99]
162 group_from_index(regexobject
*re
, PyObject
*index
)
167 if (PyString_Check(index
))
168 if (re
->re_groupindex
== NULL
||
169 !(index
= PyDict_GetItem(re
->re_groupindex
, index
)))
171 PyErr_SetString(RegexError
,
172 "group() group name doesn't exist");
176 i
= PyInt_AsLong(index
);
177 if (i
== -1 && PyErr_Occurred())
180 if (i
< 0 || i
>= RE_NREGS
) {
181 PyErr_SetString(RegexError
, "group() index out of range");
184 if (re
->re_lastok
== NULL
) {
185 PyErr_SetString(RegexError
,
186 "group() only valid after successful match/search");
189 a
= re
->re_regs
.start
[i
];
190 b
= re
->re_regs
.end
[i
];
191 if (a
< 0 || b
< 0) {
196 if (!(v
= PyString_AsString(re
->re_lastok
)))
199 return PyString_FromStringAndSize(v
+a
, b
-a
);
204 regobj_group(regexobject
*re
, PyObject
*args
)
206 int n
= PyTuple_Size(args
);
208 PyObject
*res
= NULL
;
213 PyErr_SetString(PyExc_TypeError
, "not enough arguments");
217 /* return value is a single string */
218 PyObject
*index
= PyTuple_GetItem(args
, 0);
222 return group_from_index(re
, index
);
225 /* return value is a tuple */
226 if (!(res
= PyTuple_New(n
)))
229 for (i
= 0; i
< n
; i
++) {
230 PyObject
*index
= PyTuple_GetItem(args
, i
);
231 PyObject
*group
= NULL
;
235 if (!(group
= group_from_index(re
, index
)))
237 if (PyTuple_SetItem(res
, i
, group
) < 0)
248 static struct PyMethodDef reg_methods
[] = {
249 {"match", (PyCFunction
)regobj_match
, METH_VARARGS
},
250 {"search", (PyCFunction
)regobj_search
, METH_VARARGS
},
251 {"group", (PyCFunction
)regobj_group
, METH_VARARGS
},
252 {NULL
, NULL
} /* sentinel */
257 static char* members
[] = {
258 "last", "regs", "translate",
259 "groupindex", "realpat", "givenpat",
265 regobj_getattr(regexobject
*re
, char *name
)
267 if (strcmp(name
, "regs") == 0) {
268 if (re
->re_lastok
== NULL
) {
272 return makeresult(&re
->re_regs
);
274 if (strcmp(name
, "last") == 0) {
275 if (re
->re_lastok
== NULL
) {
279 Py_INCREF(re
->re_lastok
);
280 return re
->re_lastok
;
282 if (strcmp(name
, "translate") == 0) {
283 if (re
->re_translate
== NULL
) {
287 Py_INCREF(re
->re_translate
);
288 return re
->re_translate
;
290 if (strcmp(name
, "groupindex") == 0) {
291 if (re
->re_groupindex
== NULL
) {
295 Py_INCREF(re
->re_groupindex
);
296 return re
->re_groupindex
;
298 if (strcmp(name
, "realpat") == 0) {
299 if (re
->re_realpat
== NULL
) {
303 Py_INCREF(re
->re_realpat
);
304 return re
->re_realpat
;
306 if (strcmp(name
, "givenpat") == 0) {
307 if (re
->re_givenpat
== NULL
) {
311 Py_INCREF(re
->re_givenpat
);
312 return re
->re_givenpat
;
314 if (strcmp(name
, "__members__") == 0) {
316 PyObject
*list
= NULL
;
318 /* okay, so it's unlikely this list will change that often.
319 still, it's easier to change it in just one place.
323 if (!(list
= PyList_New(i
)))
328 PyObject
* v
= PyString_FromString(members
[i
]);
329 if (!v
|| PyList_SetItem(list
, i
, v
) < 0) {
337 return Py_FindMethod(reg_methods
, (PyObject
*)re
, name
);
340 static PyTypeObject Regextype
= {
341 PyObject_HEAD_INIT(NULL
)
343 "regex.regex", /*tp_name*/
344 sizeof(regexobject
), /*tp_size*/
347 (destructor
)reg_dealloc
, /*tp_dealloc*/
349 (getattrfunc
)regobj_getattr
, /*tp_getattr*/
355 /* reference counting invariants:
359 groupindex: transferred
362 newregexobject(PyObject
*pattern
, PyObject
*translate
, PyObject
*givenpat
, PyObject
*groupindex
)
368 if (!PyArg_Parse(pattern
, "t#", &pat
, &size
))
371 if (translate
!= NULL
&& PyString_Size(translate
) != 256) {
372 PyErr_SetString(RegexError
,
373 "translation table must be 256 bytes");
376 re
= PyObject_New(regexobject
, &Regextype
);
379 re
->re_patbuf
.buffer
= NULL
;
380 re
->re_patbuf
.allocated
= 0;
381 re
->re_patbuf
.fastmap
= (unsigned char *)re
->re_fastmap
;
383 re
->re_patbuf
.translate
= (unsigned char *)PyString_AsString(translate
);
384 if (!re
->re_patbuf
.translate
)
386 Py_INCREF(translate
);
389 re
->re_patbuf
.translate
= NULL
;
390 re
->re_translate
= translate
;
391 re
->re_lastok
= NULL
;
392 re
->re_groupindex
= groupindex
;
394 re
->re_realpat
= pattern
;
396 re
->re_givenpat
= givenpat
;
397 error
= _Py_re_compile_pattern((unsigned char *)pat
, size
, &re
->re_patbuf
);
399 PyErr_SetString(RegexError
, error
);
403 return (PyObject
*)re
;
410 regex_compile(PyObject
*self
, PyObject
*args
)
412 PyObject
*pat
= NULL
;
413 PyObject
*tran
= NULL
;
415 if (!PyArg_ParseTuple(args
, "S|S:compile", &pat
, &tran
))
417 return newregexobject(pat
, tran
, pat
, NULL
);
421 symcomp(PyObject
*pattern
, PyObject
*gdict
)
423 char *opat
, *oend
, *o
, *n
, *g
, *v
;
429 int require_escape
= re_syntax
& RE_NO_BK_PARENS
? 0 : 1;
431 if (!(opat
= PyString_AsString(pattern
)))
434 if ((sz
= PyString_Size(pattern
)) < 0)
445 if (!(npattern
= PyString_FromStringAndSize((char*)NULL
, sz
)) ||
446 !(n
= PyString_AsString(npattern
)))
450 if (*o
== '(' && escaped
== require_escape
) {
455 if (++o
>= oend
|| *o
!= '<')
458 if (o
+1 < oend
&& *(o
+1) == '>')
462 for (++o
; o
< oend
;) {
464 PyObject
*group_name
= NULL
;
465 PyObject
*group_index
= NULL
;
467 group_name
= PyString_FromString(name_buf
);
468 group_index
= PyInt_FromLong(group_count
);
469 if (group_name
== NULL
||
470 group_index
== NULL
||
471 PyDict_SetItem(gdict
, group_name
,
474 Py_XDECREF(group_name
);
475 Py_XDECREF(group_index
);
476 Py_XDECREF(npattern
);
479 Py_DECREF(group_name
);
480 Py_DECREF(group_index
);
481 ++o
; /* eat the '>' */
484 if (!isalnum(Py_CHARMASK(*o
)) && *o
!= '_') {
491 else if (*o
== '[' && !escaped
) {
493 ++o
; /* eat the char following '[' */
495 while (o
< oend
&& *o
!= ']') {
502 else if (*o
== '\\') {
514 if (!(v
= PyString_AsString(npattern
))) {
518 /* _PyString_Resize() decrements npattern on failure */
519 _PyString_Resize(&npattern
, n
- v
);
525 regex_symcomp(PyObject
*self
, PyObject
*args
)
528 PyObject
*tran
= NULL
;
529 PyObject
*gdict
= NULL
;
531 PyObject
*retval
= NULL
;
533 if (!PyArg_ParseTuple(args
, "S|S:symcomp", &pattern
, &tran
))
536 gdict
= PyDict_New();
537 if (gdict
== NULL
|| (npattern
= symcomp(pattern
, gdict
)) == NULL
) {
542 retval
= newregexobject(npattern
, tran
, pattern
, gdict
);
548 static PyObject
*cache_pat
;
549 static PyObject
*cache_prog
;
552 update_cache(PyObject
*pat
)
554 PyObject
*tuple
= Py_BuildValue("(O)", pat
);
560 if (pat
!= cache_pat
) {
561 Py_XDECREF(cache_pat
);
563 Py_XDECREF(cache_prog
);
564 cache_prog
= regex_compile((PyObject
*)NULL
, tuple
);
565 if (cache_prog
== NULL
) {
570 Py_INCREF(cache_pat
);
578 regex_match(PyObject
*self
, PyObject
*args
)
580 PyObject
*pat
, *string
;
583 if (!PyArg_ParseTuple(args
, "SS:match", &pat
, &string
))
585 if (update_cache(pat
) < 0)
588 if (!(tuple
= Py_BuildValue("(S)", string
)))
590 v
= regobj_match((regexobject
*)cache_prog
, tuple
);
596 regex_search(PyObject
*self
, PyObject
*args
)
598 PyObject
*pat
, *string
;
601 if (!PyArg_ParseTuple(args
, "SS:search", &pat
, &string
))
603 if (update_cache(pat
) < 0)
606 if (!(tuple
= Py_BuildValue("(S)", string
)))
608 v
= regobj_search((regexobject
*)cache_prog
, tuple
);
614 regex_set_syntax(PyObject
*self
, PyObject
*args
)
617 if (!PyArg_ParseTuple(args
, "i:set_syntax", &syntax
))
619 syntax
= re_set_syntax(syntax
);
620 /* wipe the global pattern cache */
621 Py_XDECREF(cache_pat
);
623 Py_XDECREF(cache_prog
);
625 return PyInt_FromLong((long)syntax
);
629 regex_get_syntax(PyObject
*self
)
631 return PyInt_FromLong((long)re_syntax
);
635 static struct PyMethodDef regex_global_methods
[] = {
636 {"compile", regex_compile
, METH_VARARGS
},
637 {"symcomp", regex_symcomp
, METH_VARARGS
},
638 {"match", regex_match
, METH_VARARGS
},
639 {"search", regex_search
, METH_VARARGS
},
640 {"set_syntax", regex_set_syntax
, METH_VARARGS
},
641 {"get_syntax", (PyCFunction
)regex_get_syntax
, METH_NOARGS
},
642 {NULL
, NULL
} /* sentinel */
652 /* Initialize object type */
653 Regextype
.ob_type
= &PyType_Type
;
655 m
= Py_InitModule("regex", regex_global_methods
);
656 d
= PyModule_GetDict(m
);
658 if (PyErr_Warn(PyExc_DeprecationWarning
,
659 "the regex module is deprecated; "
660 "please use the re module") < 0)
663 /* Initialize regex.error exception */
664 v
= RegexError
= PyErr_NewException("regex.error", NULL
, NULL
);
665 if (v
== NULL
|| PyDict_SetItemString(d
, "error", v
) != 0)
668 /* Initialize regex.casefold constant */
669 if (!(v
= PyString_FromStringAndSize((char *)NULL
, 256)))
672 if (!(s
= PyString_AsString(v
)))
675 for (i
= 0; i
< 256; i
++) {
681 if (PyDict_SetItemString(d
, "casefold", v
) < 0)
685 if (!PyErr_Occurred())