1 /***********************************************************
2 Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
7 Permission to use, copy, modify, and distribute this software and its
8 documentation for any purpose and without fee is hereby granted,
9 provided that the above copyright notice appear in all copies and that
10 both that copyright notice and this permission notice appear in
11 supporting documentation, and that the names of Stichting Mathematisch
12 Centrum or CWI or Corporation for National Research Initiatives or
13 CNRI not be used in advertising or publicity pertaining to
14 distribution of the software without specific, written prior
17 While CWI is the initial source for this software, a modified version
18 is made available by the Corporation for National Research Initiatives
19 (CNRI) at the Internet address ftp://ftp.python.org.
21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28 PERFORMANCE OF THIS SOFTWARE.
30 ******************************************************************/
38 /* For Python 1.4, graminit.h has to be explicitly included */
40 #define Py_eval_input eval_input
50 static PyObject
*ErrorObject
;
55 pcre_extra
*regex_extra
;
59 staticforward PyTypeObject Pcre_Type
;
61 #define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
67 #define MEMORY_REFERENCE 1
71 #define WORD_BOUNDARY 5
72 #define NOT_WORD_BOUNDARY 6
73 #define BEGINNING_OF_BUFFER 7
74 #define END_OF_BUFFER 8
82 self
= PyObject_NEW(PcreObject
, &Pcre_Type
);
86 self
->regex_extra
= NULL
;
96 if (self
->regex
) free(self
->regex
);
97 if (self
->regex_extra
) free(self
->regex_extra
);
99 self
->regex_extra
=NULL
;
105 PyPcre_exec(self
, args
)
110 int stringlen
, pos
= 0, options
=0, endpos
= -1, i
, count
;
114 if (!PyArg_ParseTuple(args
, "t#|iiii", &string
, &stringlen
, &pos
, &endpos
, &options
))
116 if (endpos
== -1) {endpos
= stringlen
;}
117 count
= pcre_exec(self
->regex
, self
->regex_extra
,
118 string
, endpos
, pos
, options
,
119 offsets
, sizeof(offsets
)/sizeof(int) );
120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
128 if (count
==PCRE_ERROR_NOMATCH
) {Py_INCREF(Py_None
); return Py_None
;}
131 PyObject
*errval
= Py_BuildValue("si", "Regex execution error", count
);
132 PyErr_SetObject(ErrorObject
, errval
);
137 list
=PyList_New(self
->num_groups
+1);
138 if (list
==NULL
) return NULL
;
139 for(i
=0; i
<=self
->num_groups
; i
++)
142 int start
=offsets
[i
*2], end
=offsets
[i
*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start
<0 || count
<=i
)
146 v
=Py_BuildValue("ii", start
, end
);
147 if (v
==NULL
) {Py_DECREF(list
); return NULL
;}
148 PyList_SetItem(list
, i
, v
);
153 static PyMethodDef Pcre_methods
[] = {
154 {"match", (PyCFunction
)PyPcre_exec
, 1},
155 {NULL
, NULL
} /* sentinel */
159 PyPcre_getattr(self
, name
)
163 return Py_FindMethod(Pcre_methods
, (PyObject
*)self
, name
);
167 staticforward PyTypeObject Pcre_Type
= {
168 PyObject_HEAD_INIT(NULL
)
171 sizeof(PcreObject
), /*tp_basicsize*/
174 (destructor
)PyPcre_dealloc
, /*tp_dealloc*/
176 (getattrfunc
)PyPcre_getattr
, /*tp_getattr*/
181 0, /*tp_as_sequence*/
185 /* --------------------------------------------------------------------- */
188 PyPcre_compile(self
, args
)
189 PyObject
*self
; /* Not used */
193 PyObject
*dictionary
;
197 int options
, erroroffset
;
198 if (!PyArg_ParseTuple(args
, "siO!", &pattern
, &options
,
199 &PyDict_Type
, &dictionary
))
201 rv
= newPcreObject(args
);
205 rv
->regex
= pcre_compile((char*)pattern
, options
,
206 &error
, &erroroffset
, dictionary
);
210 if (!PyErr_Occurred())
212 PyObject
*errval
= Py_BuildValue("si", error
, erroroffset
);
213 PyErr_SetObject(ErrorObject
, errval
);
218 rv
->regex_extra
=pcre_study(rv
->regex
, 0, &error
);
219 if (rv
->regex_extra
==NULL
&& error
!=NULL
)
221 PyObject
*errval
= Py_BuildValue("si", error
, 0);
223 PyErr_SetObject(ErrorObject
, errval
);
227 rv
->num_groups
= pcre_info(rv
->regex
, NULL
, NULL
);
228 if (rv
->num_groups
<0)
230 PyObject
*errval
= Py_BuildValue("si", error
, rv
->num_groups
);
231 PyErr_SetObject(ErrorObject
, errval
);
236 return (PyObject
*)rv
;
240 PyPcre_expand_escape(pattern
, pattern_len
, indexptr
, typeptr
)
241 unsigned char *pattern
;
242 int pattern_len
, *indexptr
, *typeptr
;
245 int index
= *indexptr
;
247 if (pattern_len
<=index
)
249 PyErr_SetString(ErrorObject
, "escape ends too soon");
252 c
=pattern
[index
]; index
++;
259 return Py_BuildValue("c", (char)9);
262 return Py_BuildValue("c", (char)10);
265 return Py_BuildValue("c", (char)11);
268 return Py_BuildValue("c", (char)13);
271 return Py_BuildValue("c", (char)12);
274 return Py_BuildValue("c", (char)7);
277 return Py_BuildValue("c", (char)8);
280 return Py_BuildValue("c", '\\');
287 while ( (end
<pattern_len
&& pcre_ctypes
[ pattern
[end
] ] & ctype_xdigit
) != 0)
290 x
= x
* 16 + pcre_lcc
[ch
] -
291 (((pcre_ctypes
[ch
] & ctype_digit
) != 0)? '0' : 'W');
297 PyErr_SetString(ErrorObject
, "\\x must be followed by hex digits");
301 return Py_BuildValue("c", (char)x
);
305 case('E'): case('G'): case('L'): case('Q'):
306 case('U'): case('l'): case('u'):
309 sprintf(message
, "\\%c is not allowed", c
);
310 PyErr_SetString(ErrorObject
, message
);
317 int group_num
= 0, is_number
=0;
319 if (pattern_len
<=index
)
321 PyErr_SetString(ErrorObject
, "unfinished symbolic reference");
324 if (pattern
[index
]!='<')
326 PyErr_SetString(ErrorObject
, "missing < in symbolic reference");
331 while (end
<pattern_len
&& pattern
[end
]!='>')
333 if (end
==pattern_len
)
335 PyErr_SetString(ErrorObject
, "unfinished symbolic reference");
339 if (index
==end
) /* Zero-length name */
341 /* XXX should include the text of the reference */
342 PyErr_SetString(ErrorObject
, "zero-length symbolic reference");
345 if ((pcre_ctypes
[pattern
[index
]] & ctype_digit
)) /* First char. a digit */
348 group_num
= pattern
[index
] - '0';
351 for(i
=index
+1; i
<end
; i
++)
354 !(pcre_ctypes
[pattern
[i
]] & ctype_digit
) )
356 /* XXX should include the text of the reference */
357 PyErr_SetString(ErrorObject
, "illegal non-digit character in \\g<...> starting with digit");
360 else {group_num
= group_num
* 10 + pattern
[i
] - '0';}
361 if (!(pcre_ctypes
[pattern
[i
]] & ctype_word
) )
363 /* XXX should include the text of the reference */
364 PyErr_SetString(ErrorObject
, "illegal symbolic reference");
369 *typeptr
= MEMORY_REFERENCE
;
371 /* If it's a number, return the integer value of the group */
372 if (is_number
) return Py_BuildValue("i", group_num
);
373 /* Otherwise, return a string containing the group name */
374 return Py_BuildValue("s#", pattern
+index
, end
-index
);
380 /* \0 always indicates an octal escape, so we consume up to 3
381 characters, as long as they're all octal digits */
385 i
<=index
+2 && i
<pattern_len
386 && (pcre_ctypes
[ pattern
[i
] ] & ctype_odigit
);
389 octval
= octval
* 8 + pattern
[i
] - '0';
393 PyErr_SetString(ErrorObject
, "octal value out of range");
397 return Py_BuildValue("c", (unsigned char)octval
);
400 case('1'): case('2'): case('3'): case('4'):
401 case('5'): case('6'): case('7'): case('8'):
404 /* Handle \?, where ? is from 1 through 9 */
407 /* If it's at least a two-digit reference, like \34, it might
408 either be a 3-digit octal escape (\123) or a 2-digit
409 decimal memory reference (\34) */
411 if ( (index
+1) <pattern_len
&&
412 (pcre_ctypes
[ pattern
[index
+1] ] & ctype_digit
) )
414 if ( (index
+2) <pattern_len
&&
415 (pcre_ctypes
[ pattern
[index
+2] ] & ctype_odigit
) &&
416 (pcre_ctypes
[ pattern
[index
+1] ] & ctype_odigit
) &&
417 (pcre_ctypes
[ pattern
[index
] ] & ctype_odigit
)
421 value
= 8*8*(pattern
[index
]-'0') +
422 8*(pattern
[index
+1]-'0') +
423 (pattern
[index
+2]-'0');
426 PyErr_SetString(ErrorObject
, "octal value out of range");
430 return Py_BuildValue("c", (unsigned char)value
);
434 /* 2-digit form, so it's a memory reference */
435 value
= 10*(pattern
[index
]-'0') +
436 (pattern
[index
+1]-'0');
437 if (value
<1 || EXTRACT_MAX
<=value
)
439 PyErr_SetString(ErrorObject
, "memory reference out of range");
442 *typeptr
= MEMORY_REFERENCE
;
444 return Py_BuildValue("i", value
);
449 /* Single-digit form, like \2, so it's a memory reference */
450 *typeptr
= MEMORY_REFERENCE
;
452 return Py_BuildValue("i", pattern
[index
]-'0');
458 /* It's some unknown escape like \s, so return a string containing
462 return Py_BuildValue("s#", pattern
+index
-2, 2);
467 PyPcre_expand(self
, args
)
471 PyObject
*results
, *match_obj
;
472 PyObject
*repl_obj
, *newstring
;
474 int size
, total_len
, i
, start
, pos
;
476 if (!PyArg_ParseTuple(args
, "OS", &match_obj
, &repl_obj
))
479 repl
=(unsigned char *)PyString_AsString(repl_obj
);
480 size
=PyString_Size(repl_obj
);
481 results
=PyList_New(0);
482 if (results
==NULL
) return NULL
;
483 for(start
=total_len
=i
=0; i
<size
; i
++)
493 PyObject
*s
= PyString_FromStringAndSize(
494 (char *)repl
+start
, i
-start
);
499 status
= PyList_Append(results
, s
);
505 total_len
+= i
-start
;
508 value
=PyPcre_expand_escape(repl
, size
, &i
, &escape_type
);
511 /* PyPcre_expand_escape triggered an exception of some sort,
519 PyList_Append(results
, value
);
520 total_len
+= PyString_Size(value
);
522 case(MEMORY_REFERENCE
):
524 PyObject
*r
, *tuple
, *result
;
525 r
=PyObject_GetAttrString(match_obj
, "group");
530 tuple
=PyTuple_New(1);
532 PyTuple_SetItem(tuple
, 0, value
);
533 result
=PyEval_CallObject(r
, tuple
);
534 Py_DECREF(r
); Py_DECREF(tuple
);
537 /* The group() method triggered an exception of some sort */
546 "group did not contribute to the match");
547 PyErr_SetString(ErrorObject
,
554 /* typecheck that it's a string! */
555 if (!PyString_Check(result
))
559 PyErr_SetString(ErrorObject
,
560 "group() must return a string value for replacement");
563 PyList_Append(results
, result
);
564 total_len
+= PyString_Size(result
);
570 PyList_Append(results
, value
);
571 total_len
+= PyString_Size(value
);
576 PyErr_SetString(ErrorObject
,
577 "bad escape in replacement");
582 i
--; /* Decrement now, because the 'for' loop will increment it */
584 } /* endif repl[i]!='\\' */
589 PyObject
*s
= PyString_FromStringAndSize((char *)repl
+start
,
595 status
= PyList_Append(results
, s
);
601 total_len
+= i
-start
;
604 /* Whew! Now we've constructed a list containing various pieces of
605 strings that will make up our final result. So, iterate over
606 the list concatenating them. A new string measuring total_len
607 bytes is allocated and filled in. */
609 newstring
=PyString_FromStringAndSize(NULL
, total_len
);
616 repl
=(unsigned char *)PyString_AsString(newstring
);
617 for (pos
=i
=0; i
<PyList_Size(results
); i
++)
619 PyObject
*item
=PyList_GetItem(results
, i
);
620 memcpy(repl
+pos
, PyString_AsString(item
), PyString_Size(item
) );
621 pos
+= PyString_Size(item
);
628 /* List of functions defined in the module */
630 static PyMethodDef pcre_methods
[] = {
631 {"pcre_compile", PyPcre_compile
, 1},
632 {"pcre_expand", PyPcre_expand
, 1},
633 {NULL
, NULL
} /* sentinel */
638 * Convenience routine to export an integer value.
639 * For simplicity, errors (which are unlikely anyway) are ignored.
643 insint(d
, name
, value
)
648 PyObject
*v
= PyInt_FromLong((long) value
);
650 /* Don't bother reporting this error */
654 PyDict_SetItemString(d
, name
, v
);
660 /* Initialization function for the module (*must* be called initpcre) */
667 Pcre_Type
.ob_type
= &PyType_Type
;
669 /* Create the module and add the functions */
670 m
= Py_InitModule("pcre", pcre_methods
);
672 /* Add some symbolic constants to the module */
673 d
= PyModule_GetDict(m
);
674 ErrorObject
= PyString_FromString("pcre.error");
675 PyDict_SetItemString(d
, "error", ErrorObject
);
677 /* Insert the flags */
678 insint(d
, "IGNORECASE", PCRE_CASELESS
);
679 insint(d
, "ANCHORED", PCRE_ANCHORED
);
680 insint(d
, "MULTILINE", PCRE_MULTILINE
);
681 insint(d
, "DOTALL", PCRE_DOTALL
);
682 insint(d
, "VERBOSE", PCRE_EXTENDED
);
683 insint(d
, "LOCALE", PCRE_LOCALE
);
685 /* Check for errors */
686 if (PyErr_Occurred())
687 Py_FatalError("can't initialize module pcre");