7 /* For Python 1.4, graminit.h has to be explicitly included */
9 #define Py_eval_input eval_input
19 static PyObject
*ErrorObject
;
24 pcre_extra
*regex_extra
;
28 staticforward PyTypeObject Pcre_Type
;
30 #define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
36 #define MEMORY_REFERENCE 1
40 #define WORD_BOUNDARY 5
41 #define NOT_WORD_BOUNDARY 6
42 #define BEGINNING_OF_BUFFER 7
43 #define END_OF_BUFFER 8
47 newPcreObject(PyObject
*args
)
50 self
= PyObject_New(PcreObject
, &Pcre_Type
);
54 self
->regex_extra
= NULL
;
61 PyPcre_dealloc(PcreObject
*self
)
63 if (self
->regex
) (pcre_free
)(self
->regex
);
64 if (self
->regex_extra
) (pcre_free
)(self
->regex_extra
);
70 PyPcre_exec(PcreObject
*self
, PyObject
*args
)
73 int stringlen
, pos
= 0, options
=0, endpos
= -1, i
, count
;
77 if (!PyArg_ParseTuple(args
, "t#|iii:match", &string
, &stringlen
,
78 &pos
, &endpos
, &options
))
80 if (endpos
== -1) {endpos
= stringlen
;}
81 count
= pcre_exec(self
->regex
, self
->regex_extra
,
82 string
, endpos
, pos
, options
,
83 offsets
, sizeof(offsets
)/sizeof(int) );
84 /* If an error occurred during the match, and an exception was raised,
85 just return NULL and leave the exception alone. The most likely
86 problem to cause this would be running out of memory for
92 if (count
==PCRE_ERROR_NOMATCH
) {Py_INCREF(Py_None
); return Py_None
;}
95 PyObject
*errval
= Py_BuildValue("si", "Regex execution error", count
);
96 PyErr_SetObject(ErrorObject
, errval
);
101 list
=PyList_New(self
->num_groups
+1);
102 if (list
==NULL
) return NULL
;
103 for(i
=0; i
<=self
->num_groups
; i
++)
106 int start
=offsets
[i
*2], end
=offsets
[i
*2+1];
107 /* If the group wasn't affected by the match, return -1, -1 */
108 if (start
<0 || count
<=i
)
110 v
=Py_BuildValue("ii", start
, end
);
111 if (v
==NULL
) {Py_DECREF(list
); return NULL
;}
112 PyList_SetItem(list
, i
, v
);
117 static PyMethodDef Pcre_methods
[] = {
118 {"match", (PyCFunction
)PyPcre_exec
, 1},
119 {NULL
, NULL
} /* sentinel */
123 PyPcre_getattr(PcreObject
*self
, char *name
)
125 return Py_FindMethod(Pcre_methods
, (PyObject
*)self
, name
);
129 staticforward PyTypeObject Pcre_Type
= {
130 PyObject_HEAD_INIT(NULL
)
133 sizeof(PcreObject
), /*tp_basicsize*/
136 (destructor
)PyPcre_dealloc
, /*tp_dealloc*/
138 (getattrfunc
)PyPcre_getattr
, /*tp_getattr*/
143 0, /*tp_as_sequence*/
147 /* --------------------------------------------------------------------- */
150 PyPcre_compile(PyObject
*self
, PyObject
*args
)
153 PyObject
*dictionary
;
157 int options
, erroroffset
;
158 if (!PyArg_ParseTuple(args
, "siO!:pcre_compile", &pattern
, &options
,
159 &PyDict_Type
, &dictionary
))
161 rv
= newPcreObject(args
);
165 rv
->regex
= pcre_compile((char*)pattern
, options
,
166 &error
, &erroroffset
, dictionary
);
170 if (!PyErr_Occurred())
172 PyObject
*errval
= Py_BuildValue("si", error
, erroroffset
);
173 PyErr_SetObject(ErrorObject
, errval
);
178 rv
->regex_extra
=pcre_study(rv
->regex
, 0, &error
);
179 if (rv
->regex_extra
==NULL
&& error
!=NULL
)
181 PyObject
*errval
= Py_BuildValue("si", error
, 0);
183 PyErr_SetObject(ErrorObject
, errval
);
187 rv
->num_groups
= pcre_info(rv
->regex
, NULL
, NULL
);
188 if (rv
->num_groups
<0)
190 PyObject
*errval
= Py_BuildValue("si", error
, rv
->num_groups
);
191 PyErr_SetObject(ErrorObject
, errval
);
196 return (PyObject
*)rv
;
200 PyPcre_expand_escape(unsigned char *pattern
, int pattern_len
,
201 int *indexptr
, int *typeptr
)
204 int index
= *indexptr
;
206 if (pattern_len
<=index
)
208 PyErr_SetString(ErrorObject
, "escape ends too soon");
211 c
=pattern
[index
]; index
++;
218 return Py_BuildValue("c", (char)9);
221 return Py_BuildValue("c", (char)10);
224 return Py_BuildValue("c", (char)11);
227 return Py_BuildValue("c", (char)13);
230 return Py_BuildValue("c", (char)12);
233 return Py_BuildValue("c", (char)7);
236 return Py_BuildValue("c", (char)8);
239 return Py_BuildValue("c", '\\');
246 while ( (end
<pattern_len
&& pcre_ctypes
[ pattern
[end
] ] & ctype_xdigit
) != 0)
249 x
= x
* 16 + pcre_lcc
[ch
] -
250 (((pcre_ctypes
[ch
] & ctype_digit
) != 0)? '0' : 'W');
256 PyErr_SetString(ErrorObject
, "\\x must be followed by hex digits");
260 return Py_BuildValue("c", (char)x
);
264 case('E'): case('G'): case('L'): case('Q'):
265 case('U'): case('l'): case('u'):
268 sprintf(message
, "\\%c is not allowed", c
);
269 PyErr_SetString(ErrorObject
, message
);
276 int group_num
= 0, is_number
=0;
278 if (pattern_len
<=index
)
280 PyErr_SetString(ErrorObject
, "unfinished symbolic reference");
283 if (pattern
[index
]!='<')
285 PyErr_SetString(ErrorObject
, "missing < in symbolic reference");
290 while (end
<pattern_len
&& pattern
[end
]!='>')
292 if (end
==pattern_len
)
294 PyErr_SetString(ErrorObject
, "unfinished symbolic reference");
298 if (index
==end
) /* Zero-length name */
300 /* XXX should include the text of the reference */
301 PyErr_SetString(ErrorObject
, "zero-length symbolic reference");
304 if ((pcre_ctypes
[pattern
[index
]] & ctype_digit
)) /* First char. a digit */
307 group_num
= pattern
[index
] - '0';
310 for(i
=index
+1; i
<end
; i
++)
313 !(pcre_ctypes
[pattern
[i
]] & ctype_digit
) )
315 /* XXX should include the text of the reference */
316 PyErr_SetString(ErrorObject
, "illegal non-digit character in \\g<...> starting with digit");
319 else {group_num
= group_num
* 10 + pattern
[i
] - '0';}
320 if (!(pcre_ctypes
[pattern
[i
]] & ctype_word
) )
322 /* XXX should include the text of the reference */
323 PyErr_SetString(ErrorObject
, "illegal symbolic reference");
328 *typeptr
= MEMORY_REFERENCE
;
330 /* If it's a number, return the integer value of the group */
331 if (is_number
) return Py_BuildValue("i", group_num
);
332 /* Otherwise, return a string containing the group name */
333 return Py_BuildValue("s#", pattern
+index
, end
-index
);
339 /* \0 always indicates an octal escape, so we consume up to 3
340 characters, as long as they're all octal digits */
344 i
<=index
+2 && i
<pattern_len
345 && (pcre_ctypes
[ pattern
[i
] ] & ctype_odigit
);
348 octval
= octval
* 8 + pattern
[i
] - '0';
352 PyErr_SetString(ErrorObject
, "octal value out of range");
356 return Py_BuildValue("c", (unsigned char)octval
);
359 case('1'): case('2'): case('3'): case('4'):
360 case('5'): case('6'): case('7'): case('8'):
363 /* Handle \?, where ? is from 1 through 9 */
366 /* If it's at least a two-digit reference, like \34, it might
367 either be a 3-digit octal escape (\123) or a 2-digit
368 decimal memory reference (\34) */
370 if ( (index
+1) <pattern_len
&&
371 (pcre_ctypes
[ pattern
[index
+1] ] & ctype_digit
) )
373 if ( (index
+2) <pattern_len
&&
374 (pcre_ctypes
[ pattern
[index
+2] ] & ctype_odigit
) &&
375 (pcre_ctypes
[ pattern
[index
+1] ] & ctype_odigit
) &&
376 (pcre_ctypes
[ pattern
[index
] ] & ctype_odigit
)
380 value
= 8*8*(pattern
[index
]-'0') +
381 8*(pattern
[index
+1]-'0') +
382 (pattern
[index
+2]-'0');
385 PyErr_SetString(ErrorObject
, "octal value out of range");
389 return Py_BuildValue("c", (unsigned char)value
);
393 /* 2-digit form, so it's a memory reference */
394 value
= 10*(pattern
[index
]-'0') +
395 (pattern
[index
+1]-'0');
396 if (value
<1 || EXTRACT_MAX
<=value
)
398 PyErr_SetString(ErrorObject
, "memory reference out of range");
401 *typeptr
= MEMORY_REFERENCE
;
403 return Py_BuildValue("i", value
);
408 /* Single-digit form, like \2, so it's a memory reference */
409 *typeptr
= MEMORY_REFERENCE
;
411 return Py_BuildValue("i", pattern
[index
]-'0');
417 /* It's some unknown escape like \s, so return a string containing
421 return Py_BuildValue("s#", pattern
+index
-2, 2);
426 PyPcre_expand(PyObject
*self
, PyObject
*args
)
428 PyObject
*results
, *match_obj
;
429 PyObject
*repl_obj
, *newstring
;
431 int size
, total_len
, i
, start
, pos
;
433 if (!PyArg_ParseTuple(args
, "OS:pcre_expand", &match_obj
, &repl_obj
))
436 repl
=(unsigned char *)PyString_AsString(repl_obj
);
437 size
=PyString_Size(repl_obj
);
438 results
=PyList_New(0);
439 if (results
==NULL
) return NULL
;
440 for(start
=total_len
=i
=0; i
<size
; i
++)
450 PyObject
*s
= PyString_FromStringAndSize(
451 (char *)repl
+start
, i
-start
);
456 status
= PyList_Append(results
, s
);
462 total_len
+= i
-start
;
465 value
=PyPcre_expand_escape(repl
, size
, &i
, &escape_type
);
468 /* PyPcre_expand_escape triggered an exception of some sort,
476 PyList_Append(results
, value
);
477 total_len
+= PyString_Size(value
);
479 case(MEMORY_REFERENCE
):
481 PyObject
*r
, *tuple
, *result
;
482 r
=PyObject_GetAttrString(match_obj
, "group");
487 tuple
=PyTuple_New(1);
489 PyTuple_SetItem(tuple
, 0, value
);
490 result
=PyEval_CallObject(r
, tuple
);
491 Py_DECREF(r
); Py_DECREF(tuple
);
494 /* The group() method triggered an exception of some sort */
503 "group did not contribute to the match");
504 PyErr_SetString(ErrorObject
,
511 /* typecheck that it's a string! */
512 if (!PyString_Check(result
))
516 PyErr_SetString(ErrorObject
,
517 "group() must return a string value for replacement");
520 PyList_Append(results
, result
);
521 total_len
+= PyString_Size(result
);
527 PyList_Append(results
, value
);
528 total_len
+= PyString_Size(value
);
533 PyErr_SetString(ErrorObject
,
534 "bad escape in replacement");
539 i
--; /* Decrement now, because the 'for' loop will increment it */
541 } /* endif repl[i]!='\\' */
546 PyObject
*s
= PyString_FromStringAndSize((char *)repl
+start
,
552 status
= PyList_Append(results
, s
);
558 total_len
+= i
-start
;
561 /* Whew! Now we've constructed a list containing various pieces of
562 strings that will make up our final result. So, iterate over
563 the list concatenating them. A new string measuring total_len
564 bytes is allocated and filled in. */
566 newstring
=PyString_FromStringAndSize(NULL
, total_len
);
573 repl
=(unsigned char *)PyString_AsString(newstring
);
574 for (pos
=i
=0; i
<PyList_Size(results
); i
++)
576 PyObject
*item
=PyList_GetItem(results
, i
);
577 memcpy(repl
+pos
, PyString_AsString(item
), PyString_Size(item
) );
578 pos
+= PyString_Size(item
);
585 /* List of functions defined in the module */
587 static PyMethodDef pcre_methods
[] = {
588 {"pcre_compile", PyPcre_compile
, 1},
589 {"pcre_expand", PyPcre_expand
, 1},
590 {NULL
, NULL
} /* sentinel */
595 * Convenience routine to export an integer value.
596 * For simplicity, errors (which are unlikely anyway) are ignored.
600 insint(PyObject
*d
, char *name
, int value
)
602 PyObject
*v
= PyInt_FromLong((long) value
);
604 /* Don't bother reporting this error */
608 PyDict_SetItemString(d
, name
, v
);
614 /* Initialization function for the module (*must* be called initpcre) */
621 Pcre_Type
.ob_type
= &PyType_Type
;
623 /* Create the module and add the functions */
624 m
= Py_InitModule("pcre", pcre_methods
);
626 /* Add some symbolic constants to the module */
627 d
= PyModule_GetDict(m
);
628 ErrorObject
= PyErr_NewException("pcre.error", NULL
, NULL
);
629 PyDict_SetItemString(d
, "error", ErrorObject
);
631 /* Insert the flags */
632 insint(d
, "IGNORECASE", PCRE_CASELESS
);
633 insint(d
, "ANCHORED", PCRE_ANCHORED
);
634 insint(d
, "MULTILINE", PCRE_MULTILINE
);
635 insint(d
, "DOTALL", PCRE_DOTALL
);
636 insint(d
, "VERBOSE", PCRE_EXTENDED
);
637 insint(d
, "LOCALE", PCRE_LOCALE
);