6 /* For Python 1.4, graminit.h has to be explicitly included */
8 #define Py_eval_input eval_input
18 static PyObject
*ErrorObject
;
23 pcre_extra
*regex_extra
;
27 static PyTypeObject Pcre_Type
;
29 #define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
35 #define MEMORY_REFERENCE 1
39 #define WORD_BOUNDARY 5
40 #define NOT_WORD_BOUNDARY 6
41 #define BEGINNING_OF_BUFFER 7
42 #define END_OF_BUFFER 8
46 newPcreObject(PyObject
*args
)
49 self
= PyObject_New(PcreObject
, &Pcre_Type
);
53 self
->regex_extra
= NULL
;
60 PyPcre_dealloc(PcreObject
*self
)
62 if (self
->regex
) (pcre_free
)(self
->regex
);
63 if (self
->regex_extra
) (pcre_free
)(self
->regex_extra
);
69 PyPcre_exec(PcreObject
*self
, PyObject
*args
)
72 int stringlen
, pos
= 0, options
=0, endpos
= -1, i
, count
;
76 if (!PyArg_ParseTuple(args
, "t#|iii:match", &string
, &stringlen
,
77 &pos
, &endpos
, &options
))
79 if (endpos
== -1) {endpos
= stringlen
;}
80 count
= pcre_exec(self
->regex
, self
->regex_extra
,
81 string
, endpos
, pos
, options
,
82 offsets
, sizeof(offsets
)/sizeof(int) );
83 /* If an error occurred during the match, and an exception was raised,
84 just return NULL and leave the exception alone. The most likely
85 problem to cause this would be running out of memory for
91 if (count
==PCRE_ERROR_NOMATCH
) {Py_INCREF(Py_None
); return Py_None
;}
94 PyObject
*errval
= Py_BuildValue("si", "Regex execution error", count
);
95 PyErr_SetObject(ErrorObject
, errval
);
100 list
=PyList_New(self
->num_groups
+1);
101 if (list
==NULL
) return NULL
;
102 for(i
=0; i
<=self
->num_groups
; i
++)
105 int start
=offsets
[i
*2], end
=offsets
[i
*2+1];
106 /* If the group wasn't affected by the match, return -1, -1 */
107 if (start
<0 || count
<=i
)
109 v
=Py_BuildValue("ii", start
, end
);
110 if (v
==NULL
) {Py_DECREF(list
); return NULL
;}
111 PyList_SetItem(list
, i
, v
);
116 static PyMethodDef Pcre_methods
[] = {
117 {"match", (PyCFunction
)PyPcre_exec
, METH_VARARGS
},
118 {NULL
, NULL
} /* sentinel */
122 PyPcre_getattr(PcreObject
*self
, char *name
)
124 return Py_FindMethod(Pcre_methods
, (PyObject
*)self
, name
);
128 static PyTypeObject Pcre_Type
= {
129 PyObject_HEAD_INIT(NULL
)
131 "pcre.Pcre", /*tp_name*/
132 sizeof(PcreObject
), /*tp_basicsize*/
135 (destructor
)PyPcre_dealloc
, /*tp_dealloc*/
137 (getattrfunc
)PyPcre_getattr
, /*tp_getattr*/
142 0, /*tp_as_sequence*/
146 /* --------------------------------------------------------------------- */
149 PyPcre_compile(PyObject
*self
, PyObject
*args
)
152 PyObject
*dictionary
;
156 int options
, erroroffset
;
157 if (!PyArg_ParseTuple(args
, "siO!:pcre_compile", &pattern
, &options
,
158 &PyDict_Type
, &dictionary
))
160 rv
= newPcreObject(args
);
164 rv
->regex
= pcre_compile((char*)pattern
, options
,
165 &error
, &erroroffset
, dictionary
);
169 if (!PyErr_Occurred())
171 PyObject
*errval
= Py_BuildValue("si", error
, erroroffset
);
172 PyErr_SetObject(ErrorObject
, errval
);
177 rv
->regex_extra
=pcre_study(rv
->regex
, 0, &error
);
178 if (rv
->regex_extra
==NULL
&& error
!=NULL
)
180 PyObject
*errval
= Py_BuildValue("si", error
, 0);
182 PyErr_SetObject(ErrorObject
, errval
);
186 rv
->num_groups
= pcre_info(rv
->regex
, NULL
, NULL
);
187 if (rv
->num_groups
<0)
189 PyObject
*errval
= Py_BuildValue("si", error
, rv
->num_groups
);
190 PyErr_SetObject(ErrorObject
, errval
);
195 return (PyObject
*)rv
;
199 PyPcre_expand_escape(unsigned char *pattern
, int pattern_len
,
200 int *indexptr
, int *typeptr
)
203 int index
= *indexptr
;
205 if (pattern_len
<=index
)
207 PyErr_SetString(ErrorObject
, "escape ends too soon");
210 c
=pattern
[index
]; index
++;
217 return Py_BuildValue("c", (char)9);
220 return Py_BuildValue("c", (char)10);
223 return Py_BuildValue("c", (char)11);
226 return Py_BuildValue("c", (char)13);
229 return Py_BuildValue("c", (char)12);
232 return Py_BuildValue("c", (char)7);
235 return Py_BuildValue("c", (char)8);
238 return Py_BuildValue("c", '\\');
245 while ( (end
<pattern_len
&& pcre_ctypes
[ pattern
[end
] ] & ctype_xdigit
) != 0)
248 x
= x
* 16 + pcre_lcc
[ch
] -
249 (((pcre_ctypes
[ch
] & ctype_digit
) != 0)? '0' : 'W');
255 PyErr_SetString(ErrorObject
, "\\x must be followed by hex digits");
259 return Py_BuildValue("c", (char)x
);
262 case('E'): case('G'): case('L'): case('Q'):
263 case('U'): case('l'): case('u'):
266 PyOS_snprintf(message
, sizeof(message
),
267 "\\%c is not allowed", c
);
268 PyErr_SetString(ErrorObject
, message
);
275 int group_num
= 0, is_number
=0;
277 if (pattern_len
<=index
)
279 PyErr_SetString(ErrorObject
, "unfinished symbolic reference");
282 if (pattern
[index
]!='<')
284 PyErr_SetString(ErrorObject
, "missing < in symbolic reference");
289 while (end
<pattern_len
&& pattern
[end
]!='>')
291 if (end
==pattern_len
)
293 PyErr_SetString(ErrorObject
, "unfinished symbolic reference");
297 if (index
==end
) /* Zero-length name */
299 /* XXX should include the text of the reference */
300 PyErr_SetString(ErrorObject
, "zero-length symbolic reference");
303 if ((pcre_ctypes
[pattern
[index
]] & ctype_digit
)) /* First char. a digit */
306 group_num
= pattern
[index
] - '0';
309 for(i
=index
+1; i
<end
; i
++)
312 !(pcre_ctypes
[pattern
[i
]] & ctype_digit
) )
314 /* XXX should include the text of the reference */
315 PyErr_SetString(ErrorObject
, "illegal non-digit character in \\g<...> starting with digit");
318 else {group_num
= group_num
* 10 + pattern
[i
] - '0';}
319 if (!(pcre_ctypes
[pattern
[i
]] & ctype_word
) )
321 /* XXX should include the text of the reference */
322 PyErr_SetString(ErrorObject
, "illegal symbolic reference");
327 *typeptr
= MEMORY_REFERENCE
;
329 /* If it's a number, return the integer value of the group */
330 if (is_number
) return Py_BuildValue("i", group_num
);
331 /* Otherwise, return a string containing the group name */
332 return Py_BuildValue("s#", pattern
+index
, end
-index
);
337 /* \0 always indicates an octal escape, so we consume up to 3
338 characters, as long as they're all octal digits */
342 i
<=index
+2 && i
<pattern_len
343 && (pcre_ctypes
[ pattern
[i
] ] & ctype_odigit
);
346 octval
= octval
* 8 + pattern
[i
] - '0';
350 PyErr_SetString(ErrorObject
, "octal value out of range");
354 return Py_BuildValue("c", (unsigned char)octval
);
357 case('1'): case('2'): case('3'): case('4'):
358 case('5'): case('6'): case('7'): case('8'):
361 /* Handle \?, where ? is from 1 through 9 */
364 /* If it's at least a two-digit reference, like \34, it might
365 either be a 3-digit octal escape (\123) or a 2-digit
366 decimal memory reference (\34) */
368 if ( (index
+1) <pattern_len
&&
369 (pcre_ctypes
[ pattern
[index
+1] ] & ctype_digit
) )
371 if ( (index
+2) <pattern_len
&&
372 (pcre_ctypes
[ pattern
[index
+2] ] & ctype_odigit
) &&
373 (pcre_ctypes
[ pattern
[index
+1] ] & ctype_odigit
) &&
374 (pcre_ctypes
[ pattern
[index
] ] & ctype_odigit
)
378 value
= 8*8*(pattern
[index
]-'0') +
379 8*(pattern
[index
+1]-'0') +
380 (pattern
[index
+2]-'0');
383 PyErr_SetString(ErrorObject
, "octal value out of range");
387 return Py_BuildValue("c", (unsigned char)value
);
391 /* 2-digit form, so it's a memory reference */
392 value
= 10*(pattern
[index
]-'0') +
393 (pattern
[index
+1]-'0');
394 if (value
<1 || EXTRACT_MAX
<=value
)
396 PyErr_SetString(ErrorObject
, "memory reference out of range");
399 *typeptr
= MEMORY_REFERENCE
;
401 return Py_BuildValue("i", value
);
406 /* Single-digit form, like \2, so it's a memory reference */
407 *typeptr
= MEMORY_REFERENCE
;
409 return Py_BuildValue("i", pattern
[index
]-'0');
414 /* It's some unknown escape like \s, so return a string containing
418 return Py_BuildValue("s#", pattern
+index
-2, 2);
423 PyPcre_expand(PyObject
*self
, PyObject
*args
)
425 PyObject
*results
, *match_obj
;
426 PyObject
*repl_obj
, *newstring
;
428 int size
, total_len
, i
, start
, pos
;
430 if (!PyArg_ParseTuple(args
, "OS:pcre_expand", &match_obj
, &repl_obj
))
433 repl
=(unsigned char *)PyString_AsString(repl_obj
);
434 size
=PyString_Size(repl_obj
);
435 results
=PyList_New(0);
436 if (results
==NULL
) return NULL
;
437 for(start
=total_len
=i
=0; i
<size
; i
++)
447 PyObject
*s
= PyString_FromStringAndSize(
448 (char *)repl
+start
, i
-start
);
453 status
= PyList_Append(results
, s
);
459 total_len
+= i
-start
;
462 value
=PyPcre_expand_escape(repl
, size
, &i
, &escape_type
);
465 /* PyPcre_expand_escape triggered an exception of some sort,
473 PyList_Append(results
, value
);
474 total_len
+= PyString_Size(value
);
476 case(MEMORY_REFERENCE
):
478 PyObject
*r
, *tuple
, *result
;
479 r
=PyObject_GetAttrString(match_obj
, "group");
484 tuple
=PyTuple_New(1);
486 PyTuple_SetItem(tuple
, 0, value
);
487 result
=PyEval_CallObject(r
, tuple
);
488 Py_DECREF(r
); Py_DECREF(tuple
);
491 /* The group() method triggered an exception of some sort */
499 PyOS_snprintf(message
, sizeof(message
),
500 "group did not contribute to the match");
501 PyErr_SetString(ErrorObject
,
508 /* typecheck that it's a string! */
509 if (!PyString_Check(result
))
513 PyErr_SetString(ErrorObject
,
514 "group() must return a string value for replacement");
517 PyList_Append(results
, result
);
518 total_len
+= PyString_Size(result
);
524 PyList_Append(results
, value
);
525 total_len
+= PyString_Size(value
);
530 PyErr_SetString(ErrorObject
,
531 "bad escape in replacement");
536 i
--; /* Decrement now, because the 'for' loop will increment it */
538 } /* endif repl[i]!='\\' */
543 PyObject
*s
= PyString_FromStringAndSize((char *)repl
+start
,
549 status
= PyList_Append(results
, s
);
555 total_len
+= i
-start
;
558 /* Whew! Now we've constructed a list containing various pieces of
559 strings that will make up our final result. So, iterate over
560 the list concatenating them. A new string measuring total_len
561 bytes is allocated and filled in. */
563 newstring
=PyString_FromStringAndSize(NULL
, total_len
);
570 repl
=(unsigned char *)PyString_AsString(newstring
);
571 for (pos
=i
=0; i
<PyList_Size(results
); i
++)
573 PyObject
*item
=PyList_GetItem(results
, i
);
574 memcpy(repl
+pos
, PyString_AsString(item
), PyString_Size(item
) );
575 pos
+= PyString_Size(item
);
582 /* List of functions defined in the module */
584 static PyMethodDef pcre_methods
[] = {
585 {"pcre_compile", PyPcre_compile
, METH_VARARGS
},
586 {"pcre_expand", PyPcre_expand
, METH_VARARGS
},
587 {NULL
, NULL
} /* sentinel */
592 * Convenience routine to export an integer value.
593 * For simplicity, errors (which are unlikely anyway) are ignored.
597 insint(PyObject
*d
, char *name
, int value
)
599 PyObject
*v
= PyInt_FromLong((long) value
);
601 /* Don't bother reporting this error */
605 PyDict_SetItemString(d
, name
, v
);
611 /* Initialization function for the module (*must* be called initpcre) */
618 Pcre_Type
.ob_type
= &PyType_Type
;
620 /* Create the module and add the functions */
621 m
= Py_InitModule("pcre", pcre_methods
);
623 /* Add some symbolic constants to the module */
624 d
= PyModule_GetDict(m
);
625 ErrorObject
= PyErr_NewException("pcre.error", NULL
, NULL
);
626 PyDict_SetItemString(d
, "error", ErrorObject
);
628 /* Insert the flags */
629 insint(d
, "IGNORECASE", PCRE_CASELESS
);
630 insint(d
, "ANCHORED", PCRE_ANCHORED
);
631 insint(d
, "MULTILINE", PCRE_MULTILINE
);
632 insint(d
, "DOTALL", PCRE_DOTALL
);
633 insint(d
, "VERBOSE", PCRE_EXTENDED
);
634 insint(d
, "LOCALE", PCRE_LOCALE
);