2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
25 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
27 * This version of the SRE library can be redistributed under CNRI's
28 * Python 1.6 license. For any other use, please contact Secret Labs
29 * AB (info@pythonware.com).
31 * Portions of this engine have been developed in cooperation with
32 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
33 * other compatibility work.
38 static char copyright
[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
42 #include "structmember.h" /* offsetof */
48 /* name of this module, minus the leading underscore */
49 #if !defined(SRE_MODULE)
50 #define SRE_MODULE "sre"
53 /* defining this one enables tracing */
56 #if PY_VERSION_HEX >= 0x01060000
57 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
58 /* defining this enables unicode support (default under 1.6a1 and later) */
63 /* -------------------------------------------------------------------- */
64 /* optional features */
66 /* prevent run-away recursion (bad patterns on long strings) */
68 #if !defined(USE_STACKCHECK)
69 #if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
70 /* require smaller recursion limit for a number of 64-bit platforms:
71 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
72 /* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
73 #define USE_RECURSION_LIMIT 7500
76 #if defined(__GNUC__) && defined(WITH_THREAD) && defined(__FreeBSD__)
77 /* the pthreads library on FreeBSD has a fixed 1MB stack size for the
78 * initial (or "primary") thread, which is insufficient for the default
79 * recursion limit. gcc 3.x at the default optimisation
80 * level (-O3) uses stack space more aggressively than gcc 2.95.
83 #define USE_RECURSION_LIMIT 6500
85 #define USE_RECURSION_LIMIT 7500
89 #define USE_RECURSION_LIMIT 10000
94 /* enables fast searching */
95 #define USE_FAST_SEARCH
97 /* enables aggressive inlining (always on for Visual C) */
100 /* enables copy/deepcopy handling (work in progress) */
101 #undef USE_BUILTIN_COPY
103 #if PY_VERSION_HEX < 0x01060000
104 #define PyObject_DEL(op) PyMem_DEL((op))
107 /* -------------------------------------------------------------------- */
109 #if defined(_MSC_VER)
110 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
111 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
112 /* fastest possible local call under MSVC */
113 #define LOCAL(type) static __inline type __fastcall
114 #elif defined(USE_INLINE)
115 #define LOCAL(type) static inline type
117 #define LOCAL(type) static type
121 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
122 #define SRE_ERROR_STATE -2 /* illegal state */
123 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
124 #define SRE_ERROR_MEMORY -9 /* out of memory */
127 #define TRACE(v) printf v
132 /* -------------------------------------------------------------------- */
133 /* search engine state */
135 /* default character predicates (run sre_chars.py to regenerate tables) */
137 #define SRE_DIGIT_MASK 1
138 #define SRE_SPACE_MASK 2
139 #define SRE_LINEBREAK_MASK 4
140 #define SRE_ALNUM_MASK 8
141 #define SRE_WORD_MASK 16
143 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
145 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
146 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
147 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
148 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
149 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
150 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
151 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
153 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
154 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
155 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
156 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
157 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
158 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
159 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
160 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
161 120, 121, 122, 123, 124, 125, 126, 127 };
163 #define SRE_IS_DIGIT(ch)\
164 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
165 #define SRE_IS_SPACE(ch)\
166 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
167 #define SRE_IS_LINEBREAK(ch)\
168 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
169 #define SRE_IS_ALNUM(ch)\
170 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
171 #define SRE_IS_WORD(ch)\
172 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
174 static unsigned int sre_lower(unsigned int ch
)
176 return ((ch
) < 128 ? sre_char_lower
[ch
] : ch
);
179 /* locale-specific character predicates */
181 #define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
182 #define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
183 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
184 #define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
185 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
187 static unsigned int sre_lower_locale(unsigned int ch
)
189 return ((ch
) < 256 ? tolower((ch
)) : ch
);
192 /* unicode-specific character predicates */
194 #if defined(HAVE_UNICODE)
196 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
197 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
198 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
199 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
200 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
202 static unsigned int sre_lower_unicode(unsigned int ch
)
204 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
210 sre_category(SRE_CODE category
, unsigned int ch
)
214 case SRE_CATEGORY_DIGIT
:
215 return SRE_IS_DIGIT(ch
);
216 case SRE_CATEGORY_NOT_DIGIT
:
217 return !SRE_IS_DIGIT(ch
);
218 case SRE_CATEGORY_SPACE
:
219 return SRE_IS_SPACE(ch
);
220 case SRE_CATEGORY_NOT_SPACE
:
221 return !SRE_IS_SPACE(ch
);
222 case SRE_CATEGORY_WORD
:
223 return SRE_IS_WORD(ch
);
224 case SRE_CATEGORY_NOT_WORD
:
225 return !SRE_IS_WORD(ch
);
226 case SRE_CATEGORY_LINEBREAK
:
227 return SRE_IS_LINEBREAK(ch
);
228 case SRE_CATEGORY_NOT_LINEBREAK
:
229 return !SRE_IS_LINEBREAK(ch
);
231 case SRE_CATEGORY_LOC_WORD
:
232 return SRE_LOC_IS_WORD(ch
);
233 case SRE_CATEGORY_LOC_NOT_WORD
:
234 return !SRE_LOC_IS_WORD(ch
);
236 #if defined(HAVE_UNICODE)
237 case SRE_CATEGORY_UNI_DIGIT
:
238 return SRE_UNI_IS_DIGIT(ch
);
239 case SRE_CATEGORY_UNI_NOT_DIGIT
:
240 return !SRE_UNI_IS_DIGIT(ch
);
241 case SRE_CATEGORY_UNI_SPACE
:
242 return SRE_UNI_IS_SPACE(ch
);
243 case SRE_CATEGORY_UNI_NOT_SPACE
:
244 return !SRE_UNI_IS_SPACE(ch
);
245 case SRE_CATEGORY_UNI_WORD
:
246 return SRE_UNI_IS_WORD(ch
);
247 case SRE_CATEGORY_UNI_NOT_WORD
:
248 return !SRE_UNI_IS_WORD(ch
);
249 case SRE_CATEGORY_UNI_LINEBREAK
:
250 return SRE_UNI_IS_LINEBREAK(ch
);
251 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
252 return !SRE_UNI_IS_LINEBREAK(ch
);
254 case SRE_CATEGORY_UNI_DIGIT
:
255 return SRE_IS_DIGIT(ch
);
256 case SRE_CATEGORY_UNI_NOT_DIGIT
:
257 return !SRE_IS_DIGIT(ch
);
258 case SRE_CATEGORY_UNI_SPACE
:
259 return SRE_IS_SPACE(ch
);
260 case SRE_CATEGORY_UNI_NOT_SPACE
:
261 return !SRE_IS_SPACE(ch
);
262 case SRE_CATEGORY_UNI_WORD
:
263 return SRE_LOC_IS_WORD(ch
);
264 case SRE_CATEGORY_UNI_NOT_WORD
:
265 return !SRE_LOC_IS_WORD(ch
);
266 case SRE_CATEGORY_UNI_LINEBREAK
:
267 return SRE_IS_LINEBREAK(ch
);
268 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
269 return !SRE_IS_LINEBREAK(ch
);
278 mark_fini(SRE_STATE
* state
)
280 if (state
->mark_stack
) {
281 free(state
->mark_stack
);
282 state
->mark_stack
= NULL
;
284 state
->mark_stack_size
= state
->mark_stack_base
= 0;
288 mark_save(SRE_STATE
* state
, int lo
, int hi
, int *mark_stack_base
)
292 int minsize
, newsize
;
297 size
= (hi
- lo
) + 1;
299 newsize
= state
->mark_stack_size
;
300 minsize
= state
->mark_stack_base
+ size
;
302 if (newsize
< minsize
) {
303 /* create new stack */
306 if (newsize
< minsize
)
308 TRACE(("allocate stack %d\n", newsize
));
309 stack
= malloc(sizeof(void*) * newsize
);
312 while (newsize
< minsize
)
314 TRACE(("grow stack to %d\n", newsize
));
315 stack
= realloc(state
->mark_stack
, sizeof(void*) * newsize
);
319 return SRE_ERROR_MEMORY
;
321 state
->mark_stack
= stack
;
322 state
->mark_stack_size
= newsize
;
325 TRACE(("copy %d:%d to %d (%d)\n", lo
, hi
, state
->mark_stack_base
, size
));
327 memcpy(state
->mark_stack
+ state
->mark_stack_base
, state
->mark
+ lo
,
328 size
* sizeof(void*));
330 state
->mark_stack_base
+= size
;
332 *mark_stack_base
= state
->mark_stack_base
;
338 mark_restore(SRE_STATE
* state
, int lo
, int hi
, int *mark_stack_base
)
345 size
= (hi
- lo
) + 1;
347 state
->mark_stack_base
= *mark_stack_base
- size
;
349 TRACE(("copy %d:%d from %d\n", lo
, hi
, state
->mark_stack_base
));
351 memcpy(state
->mark
+ lo
, state
->mark_stack
+ state
->mark_stack_base
,
352 size
* sizeof(void*));
357 /* generate 8-bit version */
359 #define SRE_CHAR unsigned char
360 #define SRE_AT sre_at
361 #define SRE_COUNT sre_count
362 #define SRE_CHARSET sre_charset
363 #define SRE_INFO sre_info
364 #define SRE_MATCH sre_match
365 #define SRE_SEARCH sre_search
366 #define SRE_LITERAL_TEMPLATE sre_literal_template
368 #if defined(HAVE_UNICODE)
370 #define SRE_RECURSIVE
374 #undef SRE_LITERAL_TEMPLATE
383 /* generate 16-bit unicode version */
385 #define SRE_CHAR Py_UNICODE
386 #define SRE_AT sre_uat
387 #define SRE_COUNT sre_ucount
388 #define SRE_CHARSET sre_ucharset
389 #define SRE_INFO sre_uinfo
390 #define SRE_MATCH sre_umatch
391 #define SRE_SEARCH sre_usearch
392 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
395 #endif /* SRE_RECURSIVE */
397 /* -------------------------------------------------------------------- */
398 /* String matching engine */
400 /* the following section is compiled twice, with different character
404 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
406 /* check if pointer is at given position */
412 case SRE_AT_BEGINNING
:
413 case SRE_AT_BEGINNING_STRING
:
414 return ((void*) ptr
== state
->beginning
);
416 case SRE_AT_BEGINNING_LINE
:
417 return ((void*) ptr
== state
->beginning
||
418 SRE_IS_LINEBREAK((int) ptr
[-1]));
421 return (((void*) (ptr
+1) == state
->end
&&
422 SRE_IS_LINEBREAK((int) ptr
[0])) ||
423 ((void*) ptr
== state
->end
));
425 case SRE_AT_END_LINE
:
426 return ((void*) ptr
== state
->end
||
427 SRE_IS_LINEBREAK((int) ptr
[0]));
429 case SRE_AT_END_STRING
:
430 return ((void*) ptr
== state
->end
);
432 case SRE_AT_BOUNDARY
:
433 if (state
->beginning
== state
->end
)
435 that
= ((void*) ptr
> state
->beginning
) ?
436 SRE_IS_WORD((int) ptr
[-1]) : 0;
437 this = ((void*) ptr
< state
->end
) ?
438 SRE_IS_WORD((int) ptr
[0]) : 0;
441 case SRE_AT_NON_BOUNDARY
:
442 if (state
->beginning
== state
->end
)
444 that
= ((void*) ptr
> state
->beginning
) ?
445 SRE_IS_WORD((int) ptr
[-1]) : 0;
446 this = ((void*) ptr
< state
->end
) ?
447 SRE_IS_WORD((int) ptr
[0]) : 0;
450 case SRE_AT_LOC_BOUNDARY
:
451 if (state
->beginning
== state
->end
)
453 that
= ((void*) ptr
> state
->beginning
) ?
454 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
455 this = ((void*) ptr
< state
->end
) ?
456 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
459 case SRE_AT_LOC_NON_BOUNDARY
:
460 if (state
->beginning
== state
->end
)
462 that
= ((void*) ptr
> state
->beginning
) ?
463 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
464 this = ((void*) ptr
< state
->end
) ?
465 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
468 #if defined(HAVE_UNICODE)
469 case SRE_AT_UNI_BOUNDARY
:
470 if (state
->beginning
== state
->end
)
472 that
= ((void*) ptr
> state
->beginning
) ?
473 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
474 this = ((void*) ptr
< state
->end
) ?
475 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
478 case SRE_AT_UNI_NON_BOUNDARY
:
479 if (state
->beginning
== state
->end
)
481 that
= ((void*) ptr
> state
->beginning
) ?
482 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
483 this = ((void*) ptr
< state
->end
) ?
484 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
494 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
496 /* check if character is a member of the given set */
504 /* <LITERAL> <code> */
511 /* <RANGE> <lower> <upper> */
512 if (set
[0] <= ch
&& ch
<= set
[1])
518 if (sizeof(SRE_CODE
) == 2) {
519 /* <CHARSET> <bitmap> (16 bits per code word) */
520 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
525 /* <CHARSET> <bitmap> (32 bits per code word) */
526 if (ch
< 256 && (set
[ch
>> 5] & (1 << (ch
& 31))))
532 case SRE_OP_BIGCHARSET
:
533 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
538 if (sizeof(SRE_CODE
) == 2) {
539 block
= ((unsigned char*)set
)[ch
>> 8];
541 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
547 block
= ((unsigned char*)set
)[ch
>> 8];
552 (set
[block
*8 + ((ch
& 255)>>5)] & (1 << (ch
& 31))))
559 case SRE_OP_CATEGORY
:
560 /* <CATEGORY> <code> */
561 if (sre_category(set
[0], (int) ch
))
574 /* internal error -- there's not much we can do about it
575 here, so let's just pretend it didn't match... */
581 LOCAL(int) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
, int level
);
584 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, int maxcount
, int level
)
587 SRE_CHAR
* ptr
= state
->ptr
;
588 SRE_CHAR
* end
= state
->end
;
592 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
593 end
= ptr
+ maxcount
;
595 switch (pattern
[0]) {
598 /* repeated dot wildcard. */
599 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
600 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
605 /* repeated dot wildcare. skip to the end of the target
606 string, and backtrack from there */
607 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
612 /* repeated literal */
614 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
615 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
619 case SRE_OP_LITERAL_IGNORE
:
620 /* repeated literal */
622 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
623 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
627 case SRE_OP_NOT_LITERAL
:
628 /* repeated non-literal */
630 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
631 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
635 case SRE_OP_NOT_LITERAL_IGNORE
:
636 /* repeated non-literal */
638 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
639 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
645 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
646 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
651 /* repeated single character pattern */
652 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
653 while ((SRE_CHAR
*) state
->ptr
< end
) {
654 i
= SRE_MATCH(state
, pattern
, level
);
660 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
661 (SRE_CHAR
*) state
->ptr
- ptr
));
662 return (SRE_CHAR
*) state
->ptr
- ptr
;
665 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
666 return ptr
- (SRE_CHAR
*) state
->ptr
;
669 #if 0 /* not used in this release */
671 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
673 /* check if an SRE_OP_INFO block matches at the current position.
674 returns the number of SRE_CODE objects to skip if successful, 0
677 SRE_CHAR
* end
= state
->end
;
678 SRE_CHAR
* ptr
= state
->ptr
;
681 /* check minimal length */
682 if (pattern
[3] && (end
- ptr
) < pattern
[3])
685 /* check known prefix */
686 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
687 /* <length> <skip> <prefix data> <overlap data> */
688 for (i
= 0; i
< pattern
[5]; i
++)
689 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
691 return pattern
[0] + 2 * pattern
[6];
697 /* The macros below should be used to protect recursive SRE_MATCH()
698 * calls that *failed* and do *not* return immediately (IOW, those
699 * that will backtrack). Explaining:
701 * - Recursive SRE_MATCH() returned true: that's usually a success
702 * (besides atypical cases like ASSERT_NOT), therefore there's no
703 * reason to restore lastmark;
705 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
706 * is returning to the caller: If the current SRE_MATCH() is the
707 * top function of the recursion, returning false will be a matching
708 * failure, and it doesn't matter where lastmark is pointing to.
709 * If it's *not* the top function, it will be a recursive SRE_MATCH()
710 * failure by itself, and the calling SRE_MATCH() will have to deal
711 * with the failure by the same rules explained here (it will restore
712 * lastmark by itself if necessary);
714 * - Recursive SRE_MATCH() returned false, and will continue the
715 * outside 'for' loop: must be protected when breaking, since the next
716 * OP could potentially depend on lastmark;
718 * - Recursive SRE_MATCH() returned false, and will be called again
719 * inside a local for/while loop: must be protected between each
720 * loop iteration, since the recursive SRE_MATCH() could do anything,
721 * and could potentially depend on lastmark.
723 * For more information, check the discussion at SF patch #712900.
725 #define LASTMARK_SAVE() \
727 lastmark = state->lastmark; \
728 lastindex = state->lastindex; \
730 #define LASTMARK_RESTORE() \
732 if (state->lastmark > lastmark) { \
733 memset(state->mark + lastmark + 1, 0, \
734 (state->lastmark - lastmark) * sizeof(void*)); \
735 state->lastmark = lastmark; \
736 state->lastindex = lastindex; \
741 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
, int level
)
743 /* check if string matches the given pattern. returns <0 for
744 error, 0 for failure, and 1 for success */
746 SRE_CHAR
* end
= state
->end
;
747 SRE_CHAR
* ptr
= state
->ptr
;
750 int lastmark
, lastindex
, mark_stack_base
;
753 SRE_REPEAT rep
; /* FIXME: <fl> allocate in STATE instead */
755 TRACE(("|%p|%p|ENTER %d\n", pattern
, ptr
, level
));
757 #if defined(USE_STACKCHECK)
758 if (level
% 10 == 0 && PyOS_CheckStack())
759 return SRE_ERROR_RECURSION_LIMIT
;
762 #if defined(USE_RECURSION_LIMIT)
763 if (level
> USE_RECURSION_LIMIT
)
764 return SRE_ERROR_RECURSION_LIMIT
;
767 if (pattern
[0] == SRE_OP_INFO
) {
768 /* optimization info block */
769 /* <INFO> <1=skip> <2=flags> <3=min> ... */
770 if (pattern
[3] && (end
- ptr
) < pattern
[3]) {
771 TRACE(("reject (got %d chars, need %d)\n",
772 (end
- ptr
), pattern
[3]));
775 pattern
+= pattern
[1] + 1;
780 switch (*pattern
++) {
783 /* immediate failure */
784 TRACE(("|%p|%p|FAILURE\n", pattern
, ptr
));
789 TRACE(("|%p|%p|SUCCESS\n", pattern
, ptr
));
794 /* match at given position */
796 TRACE(("|%p|%p|AT %d\n", pattern
, ptr
, *pattern
));
797 if (!SRE_AT(state
, ptr
, *pattern
))
802 case SRE_OP_CATEGORY
:
803 /* match at given category */
804 /* <CATEGORY> <code> */
805 TRACE(("|%p|%p|CATEGORY %d\n", pattern
, ptr
, *pattern
));
806 if (ptr
>= end
|| !sre_category(pattern
[0], ptr
[0]))
813 /* match literal string */
814 /* <LITERAL> <code> */
815 TRACE(("|%p|%p|LITERAL %d\n", pattern
, ptr
, *pattern
));
816 if (ptr
>= end
|| (SRE_CODE
) ptr
[0] != pattern
[0])
822 case SRE_OP_NOT_LITERAL
:
823 /* match anything that is not literal character */
824 /* <NOT_LITERAL> <code> */
825 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern
, ptr
, *pattern
));
826 if (ptr
>= end
|| (SRE_CODE
) ptr
[0] == pattern
[0])
833 /* match anything (except a newline) */
835 TRACE(("|%p|%p|ANY\n", pattern
, ptr
));
836 if (ptr
>= end
|| SRE_IS_LINEBREAK(ptr
[0]))
844 TRACE(("|%p|%p|ANY_ALL\n", pattern
, ptr
));
851 /* match set member (or non_member) */
852 /* <IN> <skip> <set> */
853 TRACE(("|%p|%p|IN\n", pattern
, ptr
));
854 if (ptr
>= end
|| !SRE_CHARSET(pattern
+ 1, *ptr
))
856 pattern
+= pattern
[0];
860 case SRE_OP_GROUPREF
:
861 /* match backreference */
862 TRACE(("|%p|%p|GROUPREF %d\n", pattern
, ptr
, pattern
[0]));
865 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[i
+i
];
866 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[i
+i
+1];
867 if (!p
|| !e
|| e
< p
)
870 if (ptr
>= end
|| *ptr
!= *p
)
878 case SRE_OP_GROUPREF_IGNORE
:
879 /* match backreference */
880 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern
, ptr
, pattern
[0]));
883 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[i
+i
];
884 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[i
+i
+1];
885 if (!p
|| !e
|| e
< p
)
889 state
->lower(*ptr
) != state
->lower(*p
))
897 case SRE_OP_LITERAL_IGNORE
:
898 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern
, ptr
, pattern
[0]));
900 state
->lower(*ptr
) != state
->lower(*pattern
))
906 case SRE_OP_NOT_LITERAL_IGNORE
:
907 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, *pattern
));
909 state
->lower(*ptr
) == state
->lower(*pattern
))
915 case SRE_OP_IN_IGNORE
:
916 TRACE(("|%p|%p|IN_IGNORE\n", pattern
, ptr
));
918 || !SRE_CHARSET(pattern
+ 1, (SRE_CODE
) state
->lower(*ptr
)))
920 pattern
+= pattern
[0];
927 TRACE(("|%p|%p|MARK %d\n", pattern
, ptr
, pattern
[0]));
930 state
->lastindex
= i
/2 + 1;
931 if (i
> state
->lastmark
)
933 state
->mark
[i
] = ptr
;
940 /* <JUMP> <offset> */
941 TRACE(("|%p|%p|JUMP %d\n", pattern
, ptr
, pattern
[0]));
942 pattern
+= pattern
[0];
946 /* assert subpattern */
947 /* <ASSERT> <skip> <back> <pattern> */
948 TRACE(("|%p|%p|ASSERT %d\n", pattern
, ptr
, pattern
[1]));
949 state
->ptr
= ptr
- pattern
[1];
950 if (state
->ptr
< state
->beginning
)
952 i
= SRE_MATCH(state
, pattern
+ 2, level
+ 1);
955 pattern
+= pattern
[0];
958 case SRE_OP_ASSERT_NOT
:
959 /* assert not subpattern */
960 /* <ASSERT_NOT> <skip> <back> <pattern> */
961 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern
, ptr
, pattern
[1]));
962 state
->ptr
= ptr
- pattern
[1];
963 if (state
->ptr
>= state
->beginning
) {
964 i
= SRE_MATCH(state
, pattern
+ 2, level
+ 1);
970 pattern
+= pattern
[0];
975 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
976 TRACE(("|%p|%p|BRANCH\n", pattern
, ptr
));
979 i
= mark_save(state
, 0, lastmark
, &mark_stack_base
);
983 for (; pattern
[0]; pattern
+= pattern
[0]) {
984 if (pattern
[1] == SRE_OP_LITERAL
&&
985 (ptr
>= end
|| (SRE_CODE
) *ptr
!= pattern
[2]))
987 if (pattern
[1] == SRE_OP_IN
&&
988 (ptr
>= end
|| !SRE_CHARSET(pattern
+ 3, (SRE_CODE
) *ptr
)))
991 i
= SRE_MATCH(state
, pattern
+ 1, level
+ 1);
995 i
= mark_restore(state
, 0, lastmark
, &mark_stack_base
);
1003 case SRE_OP_REPEAT_ONE
:
1004 /* match repeated sequence (maximizing regexp) */
1006 /* this operator only works if the repeated item is
1007 exactly one character wide, and we're not already
1008 collecting backtracking points. for other cases,
1009 use the MAX_REPEAT operator */
1011 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1013 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern
, ptr
,
1014 pattern
[1], pattern
[2]));
1016 if (ptr
+ pattern
[1] > end
)
1017 return 0; /* cannot match */
1021 count
= SRE_COUNT(state
, pattern
+ 3, pattern
[2], level
+ 1);
1027 /* when we arrive here, count contains the number of
1028 matches, and ptr points to the tail of the target
1029 string. check if the rest of the pattern matches,
1030 and backtrack if not. */
1032 if (count
< (int) pattern
[1])
1035 if (pattern
[pattern
[0]] == SRE_OP_SUCCESS
) {
1036 /* tail is empty. we're finished */
1043 if (pattern
[pattern
[0]] == SRE_OP_LITERAL
) {
1044 /* tail starts with a literal. skip positions where
1045 the rest of the pattern cannot possibly match */
1046 chr
= pattern
[pattern
[0]+1];
1048 while (count
>= (int) pattern
[1] &&
1049 (ptr
>= end
|| *ptr
!= chr
)) {
1053 if (count
< (int) pattern
[1])
1056 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1066 while (count
>= (int) pattern
[1]) {
1068 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1078 case SRE_OP_MIN_REPEAT_ONE
:
1079 /* match repeated sequence (minimizing regexp) */
1081 /* this operator only works if the repeated item is
1082 exactly one character wide, and we're not already
1083 collecting backtracking points. for other cases,
1084 use the MIN_REPEAT operator */
1086 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1088 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern
, ptr
,
1089 pattern
[1], pattern
[2]));
1091 if (ptr
+ pattern
[1] > end
)
1092 return 0; /* cannot match */
1096 if (pattern
[1] == 0)
1099 /* count using pattern min as the maximum */
1100 count
= SRE_COUNT(state
, pattern
+ 3, pattern
[1], level
+ 1);
1103 return count
; /* exception */
1104 if (count
< (int) pattern
[1])
1105 return 0; /* did not match minimum number of times */
1106 ptr
+= count
; /* advance past minimum matches of repeat */
1109 if (pattern
[pattern
[0]] == SRE_OP_SUCCESS
) {
1110 /* tail is empty. we're finished */
1116 int matchmax
= ((int)pattern
[2] == 65535);
1119 while (matchmax
|| count
<= (int) pattern
[2]) {
1121 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1125 c
= SRE_COUNT(state
, pattern
+3, 1, level
+1);
1139 /* create repeat context. all the hard work is done
1140 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1141 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1142 TRACE(("|%p|%p|REPEAT %d %d\n", pattern
, ptr
,
1143 pattern
[1], pattern
[2]));
1146 rep
.pattern
= pattern
;
1148 /* install new repeat context */
1149 rep
.prev
= state
->repeat
;
1150 state
->repeat
= &rep
;
1153 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1155 state
->repeat
= rep
.prev
;
1159 case SRE_OP_MAX_UNTIL
:
1160 /* maximizing repeat */
1161 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1163 /* FIXME: we probably need to deal with zero-width
1164 matches in here... */
1168 return SRE_ERROR_STATE
;
1172 count
= rp
->count
+ 1;
1174 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern
, ptr
, count
));
1176 if (count
< rp
->pattern
[1]) {
1177 /* not enough matches */
1180 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1183 rp
->count
= count
- 1;
1188 if (count
< rp
->pattern
[2] || rp
->pattern
[2] == 65535) {
1189 /* we may have enough matches, but if we can
1190 match another item, do so */
1193 i
= mark_save(state
, 0, lastmark
, &mark_stack_base
);
1197 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1200 i
= mark_restore(state
, 0, lastmark
, &mark_stack_base
);
1204 rp
->count
= count
- 1;
1208 /* cannot match more repeated items here. make sure the
1210 state
->repeat
= rp
->prev
;
1211 i
= SRE_MATCH(state
, pattern
, level
+ 1);
1218 case SRE_OP_MIN_UNTIL
:
1219 /* minimizing repeat */
1220 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1224 return SRE_ERROR_STATE
;
1228 count
= rp
->count
+ 1;
1230 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern
, ptr
, count
,
1233 if (count
< rp
->pattern
[1]) {
1234 /* not enough matches */
1237 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1240 rp
->count
= count
-1;
1247 /* see if the tail matches */
1248 state
->repeat
= rp
->prev
;
1249 i
= SRE_MATCH(state
, pattern
, level
+ 1);
1256 if (count
>= rp
->pattern
[2] && rp
->pattern
[2] != 65535)
1263 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1266 rp
->count
= count
- 1;
1272 TRACE(("|%p|%p|UNKNOWN %d\n", pattern
, ptr
, pattern
[-1]));
1273 return SRE_ERROR_ILLEGAL
;
1277 /* can't end up here */
1278 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
1282 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1284 SRE_CHAR
* ptr
= state
->start
;
1285 SRE_CHAR
* end
= state
->end
;
1288 int prefix_skip
= 0;
1289 SRE_CODE
* prefix
= NULL
;
1290 SRE_CODE
* charset
= NULL
;
1291 SRE_CODE
* overlap
= NULL
;
1294 if (pattern
[0] == SRE_OP_INFO
) {
1295 /* optimization info block */
1296 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1300 if (pattern
[3] > 1) {
1301 /* adjust end point (but make sure we leave at least one
1302 character in there, so literal search will work) */
1303 end
-= pattern
[3]-1;
1308 if (flags
& SRE_INFO_PREFIX
) {
1309 /* pattern starts with a known prefix */
1310 /* <length> <skip> <prefix data> <overlap data> */
1311 prefix_len
= pattern
[5];
1312 prefix_skip
= pattern
[6];
1313 prefix
= pattern
+ 7;
1314 overlap
= prefix
+ prefix_len
- 1;
1315 } else if (flags
& SRE_INFO_CHARSET
)
1316 /* pattern starts with a character from a known set */
1318 charset
= pattern
+ 5;
1320 pattern
+= 1 + pattern
[1];
1323 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1324 TRACE(("charset = %p\n", charset
));
1326 #if defined(USE_FAST_SEARCH)
1327 if (prefix_len
> 1) {
1328 /* pattern starts with a known prefix. use the overlap
1329 table to skip forward as fast as we possibly can */
1334 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1340 if (++i
== prefix_len
) {
1341 /* found a potential match */
1342 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1343 state
->start
= ptr
+ 1 - prefix_len
;
1344 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1345 if (flags
& SRE_INFO_LITERAL
)
1346 return 1; /* we got all of it */
1347 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
, 1);
1350 /* close but no cigar -- try again */
1363 if (pattern
[0] == SRE_OP_LITERAL
) {
1364 /* pattern starts with a literal character. this is used
1365 for short prefixes, and if fast search is disabled */
1366 SRE_CODE chr
= pattern
[1];
1369 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1373 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1376 if (flags
& SRE_INFO_LITERAL
)
1377 return 1; /* we got all of it */
1378 status
= SRE_MATCH(state
, pattern
+ 2, 1);
1382 } else if (charset
) {
1383 /* pattern starts with a character from a known set */
1386 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1390 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1393 status
= SRE_MATCH(state
, pattern
, 1);
1400 while (ptr
<= end
) {
1401 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1402 state
->start
= state
->ptr
= ptr
++;
1403 status
= SRE_MATCH(state
, pattern
, 1);
1412 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, int len
)
1414 /* check if given string is a literal template (i.e. no escapes) */
1421 #if !defined(SRE_RECURSIVE)
1423 /* -------------------------------------------------------------------- */
1424 /* factories and destructors */
1426 /* see sre.h for object declarations */
1428 static PyTypeObject Pattern_Type
;
1429 static PyTypeObject Match_Type
;
1430 static PyTypeObject Scanner_Type
;
1433 _compile(PyObject
* self_
, PyObject
* args
)
1435 /* "compile" pattern descriptor to pattern object */
1437 PatternObject
* self
;
1444 PyObject
* groupindex
= NULL
;
1445 PyObject
* indexgroup
= NULL
;
1446 if (!PyArg_ParseTuple(args
, "OiO!|iOO", &pattern
, &flags
,
1447 &PyList_Type
, &code
, &groups
,
1448 &groupindex
, &indexgroup
))
1451 n
= PyList_GET_SIZE(code
);
1453 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
1459 for (i
= 0; i
< n
; i
++) {
1460 PyObject
*o
= PyList_GET_ITEM(code
, i
);
1462 self
->code
[i
] = (SRE_CODE
) PyInt_AsLong(o
);
1464 self
->code
[i
] = (SRE_CODE
) PyLong_AsUnsignedLong(o
);
1467 if (PyErr_Occurred()) {
1473 self
->pattern
= pattern
;
1475 self
->flags
= flags
;
1477 self
->groups
= groups
;
1479 Py_XINCREF(groupindex
);
1480 self
->groupindex
= groupindex
;
1482 Py_XINCREF(indexgroup
);
1483 self
->indexgroup
= indexgroup
;
1485 return (PyObject
*) self
;
1489 sre_codesize(PyObject
* self
, PyObject
* args
)
1491 return Py_BuildValue("i", sizeof(SRE_CODE
));
1495 sre_getlower(PyObject
* self
, PyObject
* args
)
1497 int character
, flags
;
1498 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1500 if (flags
& SRE_FLAG_LOCALE
)
1501 return Py_BuildValue("i", sre_lower_locale(character
));
1502 if (flags
& SRE_FLAG_UNICODE
)
1503 #if defined(HAVE_UNICODE)
1504 return Py_BuildValue("i", sre_lower_unicode(character
));
1506 return Py_BuildValue("i", sre_lower_locale(character
));
1508 return Py_BuildValue("i", sre_lower(character
));
1512 state_reset(SRE_STATE
* state
)
1514 state
->lastmark
= 0;
1516 /* FIXME: dynamic! */
1517 memset(state
->mark
, 0, sizeof(*state
->mark
) * SRE_MARK_SIZE
);
1519 state
->lastindex
= -1;
1521 state
->repeat
= NULL
;
1527 getstring(PyObject
* string
, int* p_length
, int* p_charsize
)
1529 /* given a python object, return a data pointer, a length (in
1530 characters), and a character size. return NULL if the object
1531 is not a string (or not compatible) */
1533 PyBufferProcs
*buffer
;
1534 int size
, bytes
, charsize
;
1537 #if defined(HAVE_UNICODE)
1538 if (PyUnicode_Check(string
)) {
1539 /* unicode strings doesn't always support the buffer interface */
1540 ptr
= (void*) PyUnicode_AS_DATA(string
);
1541 bytes
= PyUnicode_GET_DATA_SIZE(string
);
1542 size
= PyUnicode_GET_SIZE(string
);
1543 charsize
= sizeof(Py_UNICODE
);
1548 /* get pointer to string buffer */
1549 buffer
= string
->ob_type
->tp_as_buffer
;
1550 if (!buffer
|| !buffer
->bf_getreadbuffer
|| !buffer
->bf_getsegcount
||
1551 buffer
->bf_getsegcount(string
, NULL
) != 1) {
1552 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1556 /* determine buffer size */
1557 bytes
= buffer
->bf_getreadbuffer(string
, 0, &ptr
);
1559 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1563 /* determine character size */
1564 #if PY_VERSION_HEX >= 0x01060000
1565 size
= PyObject_Size(string
);
1567 size
= PyObject_Length(string
);
1570 if (PyString_Check(string
) || bytes
== size
)
1572 #if defined(HAVE_UNICODE)
1573 else if (bytes
== (int) (size
* sizeof(Py_UNICODE
)))
1574 charsize
= sizeof(Py_UNICODE
);
1577 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1581 #if defined(HAVE_UNICODE)
1586 *p_charsize
= charsize
;
1592 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1595 /* prepare state object */
1601 memset(state
, 0, sizeof(SRE_STATE
));
1603 state
->lastindex
= -1;
1605 ptr
= getstring(string
, &length
, &charsize
);
1609 /* adjust boundaries */
1612 else if (start
> length
)
1617 else if (end
> length
)
1620 state
->charsize
= charsize
;
1622 state
->beginning
= ptr
;
1624 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1625 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1628 state
->string
= string
;
1630 state
->endpos
= end
;
1632 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1633 state
->lower
= sre_lower_locale
;
1634 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1635 #if defined(HAVE_UNICODE)
1636 state
->lower
= sre_lower_unicode
;
1638 state
->lower
= sre_lower_locale
;
1641 state
->lower
= sre_lower
;
1647 state_fini(SRE_STATE
* state
)
1649 Py_XDECREF(state
->string
);
1653 /* calculate offset from start of string */
1654 #define STATE_OFFSET(state, member)\
1655 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1658 state_getslice(SRE_STATE
* state
, int index
, PyObject
* string
, int empty
)
1662 index
= (index
- 1) * 2;
1664 if (string
== Py_None
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1666 /* want empty string */
1673 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1674 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1677 return PySequence_GetSlice(string
, i
, j
);
1681 pattern_error(int status
)
1684 case SRE_ERROR_RECURSION_LIMIT
:
1687 "maximum recursion limit exceeded"
1690 case SRE_ERROR_MEMORY
:
1694 /* other error codes indicate compiler/engine bugs */
1697 "internal error in regular expression engine"
1703 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
1705 /* create match object (from state object) */
1714 /* create match object (with room for extra group marks) */
1715 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
1716 2*(pattern
->groups
+1));
1721 match
->pattern
= pattern
;
1723 Py_INCREF(state
->string
);
1724 match
->string
= state
->string
;
1727 match
->groups
= pattern
->groups
+1;
1729 /* fill in group slices */
1731 base
= (char*) state
->beginning
;
1732 n
= state
->charsize
;
1734 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
1735 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
1737 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
1738 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
1739 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
1740 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
1742 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
1744 match
->pos
= state
->pos
;
1745 match
->endpos
= state
->endpos
;
1747 match
->lastindex
= state
->lastindex
;
1749 return (PyObject
*) match
;
1751 } else if (status
== 0) {
1759 /* internal error */
1760 pattern_error(status
);
1765 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
1767 /* create search state object */
1769 ScannerObject
* self
;
1774 if (!PyArg_ParseTuple(args
, "O|ii:scanner", &string
, &start
, &end
))
1777 /* create scanner object */
1778 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
1782 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
1789 self
->pattern
= (PyObject
*) pattern
;
1791 return (PyObject
*) self
;
1795 pattern_dealloc(PatternObject
* self
)
1797 Py_XDECREF(self
->pattern
);
1798 Py_XDECREF(self
->groupindex
);
1799 Py_XDECREF(self
->indexgroup
);
1804 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1812 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1813 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:match", kwlist
,
1814 &string
, &start
, &end
))
1817 string
= state_init(&state
, self
, string
, start
, end
);
1821 state
.ptr
= state
.start
;
1823 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1825 if (state
.charsize
== 1) {
1826 status
= sre_match(&state
, PatternObject_GetCode(self
), 1);
1828 #if defined(HAVE_UNICODE)
1829 status
= sre_umatch(&state
, PatternObject_GetCode(self
), 1);
1833 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1837 return pattern_new_match(self
, &state
, status
);
1841 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1849 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1850 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:search", kwlist
,
1851 &string
, &start
, &end
))
1854 string
= state_init(&state
, self
, string
, start
, end
);
1858 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1860 if (state
.charsize
== 1) {
1861 status
= sre_search(&state
, PatternObject_GetCode(self
));
1863 #if defined(HAVE_UNICODE)
1864 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1868 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1872 return pattern_new_match(self
, &state
, status
);
1876 call(char* module
, char* function
, PyObject
* args
)
1885 name
= PyString_FromString(module
);
1888 mod
= PyImport_Import(name
);
1892 func
= PyObject_GetAttrString(mod
, function
);
1896 result
= PyObject_CallObject(func
, args
);
1902 #ifdef USE_BUILTIN_COPY
1904 deepcopy(PyObject
** object
, PyObject
* memo
)
1910 Py_BuildValue("OO", *object
, memo
)
1918 return 1; /* success */
1923 join_list(PyObject
* list
, PyObject
* pattern
)
1925 /* join list elements */
1928 #if PY_VERSION_HEX >= 0x01060000
1934 switch (PyList_GET_SIZE(list
)) {
1937 return PySequence_GetSlice(pattern
, 0, 0);
1939 result
= PyList_GET_ITEM(list
, 0);
1945 /* two or more elements: slice out a suitable separator from the
1946 first member, and use that to join the entire list */
1948 joiner
= PySequence_GetSlice(pattern
, 0, 0);
1952 #if PY_VERSION_HEX >= 0x01060000
1953 function
= PyObject_GetAttrString(joiner
, "join");
1958 args
= PyTuple_New(1);
1960 Py_DECREF(function
);
1964 PyTuple_SET_ITEM(args
, 0, list
);
1965 result
= PyObject_CallObject(function
, args
);
1966 Py_DECREF(args
); /* also removes list */
1967 Py_DECREF(function
);
1971 Py_BuildValue("OO", list
, joiner
)
1980 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1990 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
1991 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:findall", kwlist
,
1992 &string
, &start
, &end
))
1995 string
= state_init(&state
, self
, string
, start
, end
);
1999 list
= PyList_New(0);
2005 while (state
.start
<= state
.end
) {
2009 state_reset(&state
);
2011 state
.ptr
= state
.start
;
2013 if (state
.charsize
== 1) {
2014 status
= sre_search(&state
, PatternObject_GetCode(self
));
2016 #if defined(HAVE_UNICODE)
2017 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2024 pattern_error(status
);
2028 /* don't bother to build a match object */
2029 switch (self
->groups
) {
2031 b
= STATE_OFFSET(&state
, state
.start
);
2032 e
= STATE_OFFSET(&state
, state
.ptr
);
2033 item
= PySequence_GetSlice(string
, b
, e
);
2038 item
= state_getslice(&state
, 1, string
, 1);
2043 item
= PyTuple_New(self
->groups
);
2046 for (i
= 0; i
< self
->groups
; i
++) {
2047 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
2052 PyTuple_SET_ITEM(item
, i
, o
);
2057 status
= PyList_Append(list
, item
);
2062 if (state
.ptr
== state
.start
)
2063 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2065 state
.start
= state
.ptr
;
2079 #if PY_VERSION_HEX >= 0x02020000
2081 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
2087 scanner
= pattern_scanner(pattern
, args
);
2091 search
= PyObject_GetAttrString(scanner
, "search");
2096 iterator
= PyCallIter_New(search
, Py_None
);
2104 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2116 static char* kwlist
[] = { "source", "maxsplit", NULL
};
2117 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|i:split", kwlist
,
2118 &string
, &maxsplit
))
2121 string
= state_init(&state
, self
, string
, 0, INT_MAX
);
2125 list
= PyList_New(0);
2134 while (!maxsplit
|| n
< maxsplit
) {
2136 state_reset(&state
);
2138 state
.ptr
= state
.start
;
2140 if (state
.charsize
== 1) {
2141 status
= sre_search(&state
, PatternObject_GetCode(self
));
2143 #if defined(HAVE_UNICODE)
2144 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2151 pattern_error(status
);
2155 if (state
.start
== state
.ptr
) {
2156 if (last
== state
.end
)
2158 /* skip one character */
2159 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2163 /* get segment before this match */
2164 item
= PySequence_GetSlice(
2165 string
, STATE_OFFSET(&state
, last
),
2166 STATE_OFFSET(&state
, state
.start
)
2170 status
= PyList_Append(list
, item
);
2175 /* add groups (if any) */
2176 for (i
= 0; i
< self
->groups
; i
++) {
2177 item
= state_getslice(&state
, i
+1, string
, 0);
2180 status
= PyList_Append(list
, item
);
2188 last
= state
.start
= state
.ptr
;
2192 /* get segment following last match (even if empty) */
2193 item
= PySequence_GetSlice(
2194 string
, STATE_OFFSET(&state
, last
), state
.endpos
2198 status
= PyList_Append(list
, item
);
2214 pattern_subx(PatternObject
* self
, PyObject
* template, PyObject
* string
,
2215 int count
, int subn
)
2227 int filter_is_callable
;
2229 if (PyCallable_Check(template)) {
2230 /* sub/subn takes either a function or a template */
2233 filter_is_callable
= 1;
2235 /* if not callable, check if it's a literal string */
2237 ptr
= getstring(template, &n
, &b
);
2240 literal
= sre_literal_template(ptr
, n
);
2242 #if defined(HAVE_UNICODE)
2243 literal
= sre_uliteral_template(ptr
, n
);
2253 filter_is_callable
= 0;
2255 /* not a literal; hand it over to the template compiler */
2257 SRE_MODULE
, "_subx",
2258 Py_BuildValue("OO", self
, template)
2262 filter_is_callable
= PyCallable_Check(filter
);
2266 string
= state_init(&state
, self
, string
, 0, INT_MAX
);
2272 list
= PyList_New(0);
2281 while (!count
|| n
< count
) {
2283 state_reset(&state
);
2285 state
.ptr
= state
.start
;
2287 if (state
.charsize
== 1) {
2288 status
= sre_search(&state
, PatternObject_GetCode(self
));
2290 #if defined(HAVE_UNICODE)
2291 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2298 pattern_error(status
);
2302 b
= STATE_OFFSET(&state
, state
.start
);
2303 e
= STATE_OFFSET(&state
, state
.ptr
);
2306 /* get segment before this match */
2307 item
= PySequence_GetSlice(string
, i
, b
);
2310 status
= PyList_Append(list
, item
);
2315 } else if (i
== b
&& i
== e
&& n
> 0)
2316 /* ignore empty match on latest position */
2319 if (filter_is_callable
) {
2320 /* pass match object through filter */
2321 match
= pattern_new_match(self
, &state
, 1);
2324 args
= Py_BuildValue("(O)", match
);
2329 item
= PyObject_CallObject(filter
, args
);
2335 /* filter is literal string */
2341 if (item
!= Py_None
) {
2342 status
= PyList_Append(list
, item
);
2353 if (state
.ptr
== state
.start
)
2354 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2356 state
.start
= state
.ptr
;
2360 /* get segment following last match */
2361 if (i
< state
.endpos
) {
2362 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2365 status
= PyList_Append(list
, item
);
2375 /* convert list to single string (also removes list) */
2376 item
= join_list(list
, self
->pattern
);
2382 return Py_BuildValue("Ni", item
, n
);
2395 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2400 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2401 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|i:sub", kwlist
,
2402 &template, &string
, &count
))
2405 return pattern_subx(self
, template, string
, count
, 0);
2409 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2414 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2415 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|i:subn", kwlist
,
2416 &template, &string
, &count
))
2419 return pattern_subx(self
, template, string
, count
, 1);
2423 pattern_copy(PatternObject
* self
, PyObject
* args
)
2425 #ifdef USE_BUILTIN_COPY
2426 PatternObject
* copy
;
2429 if (args
!= Py_None
&& !PyArg_ParseTuple(args
, ":__copy__"))
2432 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2436 offset
= offsetof(PatternObject
, groups
);
2438 Py_XINCREF(self
->groupindex
);
2439 Py_XINCREF(self
->indexgroup
);
2440 Py_XINCREF(self
->pattern
);
2442 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2443 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2445 return (PyObject
*) copy
;
2447 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2453 pattern_deepcopy(PatternObject
* self
, PyObject
* args
)
2455 #ifdef USE_BUILTIN_COPY
2456 PatternObject
* copy
;
2459 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
2462 copy
= (PatternObject
*) pattern_copy(self
, Py_None
);
2466 if (!deepcopy(©
->groupindex
, memo
) ||
2467 !deepcopy(©
->indexgroup
, memo
) ||
2468 !deepcopy(©
->pattern
, memo
)) {
2474 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2479 static PyMethodDef pattern_methods
[] = {
2480 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
},
2481 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
},
2482 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
},
2483 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
},
2484 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
},
2485 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
},
2486 #if PY_VERSION_HEX >= 0x02020000
2487 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
},
2489 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2490 {"__copy__", (PyCFunction
) pattern_copy
, METH_VARARGS
},
2491 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_VARARGS
},
2496 pattern_getattr(PatternObject
* self
, char* name
)
2500 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2508 if (!strcmp(name
, "pattern")) {
2509 Py_INCREF(self
->pattern
);
2510 return self
->pattern
;
2513 if (!strcmp(name
, "flags"))
2514 return Py_BuildValue("i", self
->flags
);
2516 if (!strcmp(name
, "groups"))
2517 return Py_BuildValue("i", self
->groups
);
2519 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2520 Py_INCREF(self
->groupindex
);
2521 return self
->groupindex
;
2524 PyErr_SetString(PyExc_AttributeError
, name
);
2528 statichere PyTypeObject Pattern_Type
= {
2529 PyObject_HEAD_INIT(NULL
)
2530 0, "_" SRE_MODULE
".SRE_Pattern",
2531 sizeof(PatternObject
), sizeof(SRE_CODE
),
2532 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2534 (getattrfunc
)pattern_getattr
/*tp_getattr*/
2537 /* -------------------------------------------------------------------- */
2541 match_dealloc(MatchObject
* self
)
2543 Py_XDECREF(self
->regs
);
2544 Py_XDECREF(self
->string
);
2545 Py_DECREF(self
->pattern
);
2550 match_getslice_by_index(MatchObject
* self
, int index
, PyObject
* def
)
2552 if (index
< 0 || index
>= self
->groups
) {
2553 /* raise IndexError if we were given a bad group number */
2563 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2564 /* return default value if the string or group is undefined */
2569 return PySequence_GetSlice(
2570 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2575 match_getindex(MatchObject
* self
, PyObject
* index
)
2579 if (PyInt_Check(index
))
2580 return (int) PyInt_AS_LONG(index
);
2584 if (self
->pattern
->groupindex
) {
2585 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2587 if (PyInt_Check(index
))
2588 i
= (int) PyInt_AS_LONG(index
);
2598 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2600 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2604 match_expand(MatchObject
* self
, PyObject
* args
)
2607 if (!PyArg_ParseTuple(args
, "O:expand", &template))
2610 /* delegate to Python code */
2612 SRE_MODULE
, "_expand",
2613 Py_BuildValue("OOO", self
->pattern
, self
, template)
2618 match_group(MatchObject
* self
, PyObject
* args
)
2623 size
= PyTuple_GET_SIZE(args
);
2627 result
= match_getslice(self
, Py_False
, Py_None
);
2630 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2633 /* fetch multiple items */
2634 result
= PyTuple_New(size
);
2637 for (i
= 0; i
< size
; i
++) {
2638 PyObject
* item
= match_getslice(
2639 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2645 PyTuple_SET_ITEM(result
, i
, item
);
2653 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2658 PyObject
* def
= Py_None
;
2659 static char* kwlist
[] = { "default", NULL
};
2660 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2663 result
= PyTuple_New(self
->groups
-1);
2667 for (index
= 1; index
< self
->groups
; index
++) {
2669 item
= match_getslice_by_index(self
, index
, def
);
2674 PyTuple_SET_ITEM(result
, index
-1, item
);
2681 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2687 PyObject
* def
= Py_None
;
2688 static char* kwlist
[] = { "default", NULL
};
2689 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2692 result
= PyDict_New();
2693 if (!result
|| !self
->pattern
->groupindex
)
2696 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2700 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2704 key
= PyList_GET_ITEM(keys
, index
);
2707 value
= match_getslice(self
, key
, def
);
2712 status
= PyDict_SetItem(result
, key
, value
);
2729 match_start(MatchObject
* self
, PyObject
* args
)
2733 PyObject
* index_
= Py_False
; /* zero */
2734 if (!PyArg_ParseTuple(args
, "|O:start", &index_
))
2737 index
= match_getindex(self
, index_
);
2739 if (index
< 0 || index
>= self
->groups
) {
2747 /* mark is -1 if group is undefined */
2748 return Py_BuildValue("i", self
->mark
[index
*2]);
2752 match_end(MatchObject
* self
, PyObject
* args
)
2756 PyObject
* index_
= Py_False
; /* zero */
2757 if (!PyArg_ParseTuple(args
, "|O:end", &index_
))
2760 index
= match_getindex(self
, index_
);
2762 if (index
< 0 || index
>= self
->groups
) {
2770 /* mark is -1 if group is undefined */
2771 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2775 _pair(int i1
, int i2
)
2780 pair
= PyTuple_New(2);
2784 item
= PyInt_FromLong(i1
);
2787 PyTuple_SET_ITEM(pair
, 0, item
);
2789 item
= PyInt_FromLong(i2
);
2792 PyTuple_SET_ITEM(pair
, 1, item
);
2802 match_span(MatchObject
* self
, PyObject
* args
)
2806 PyObject
* index_
= Py_False
; /* zero */
2807 if (!PyArg_ParseTuple(args
, "|O:span", &index_
))
2810 index
= match_getindex(self
, index_
);
2812 if (index
< 0 || index
>= self
->groups
) {
2820 /* marks are -1 if group is undefined */
2821 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2825 match_regs(MatchObject
* self
)
2831 regs
= PyTuple_New(self
->groups
);
2835 for (index
= 0; index
< self
->groups
; index
++) {
2836 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2841 PyTuple_SET_ITEM(regs
, index
, item
);
2851 match_copy(MatchObject
* self
, PyObject
* args
)
2853 #ifdef USE_BUILTIN_COPY
2857 if (args
!= Py_None
&& !PyArg_ParseTuple(args
, ":__copy__"))
2860 slots
= 2 * (self
->pattern
->groups
+1);
2862 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
2866 /* this value a constant, but any compiler should be able to
2867 figure that out all by itself */
2868 offset
= offsetof(MatchObject
, string
);
2870 Py_XINCREF(self
->pattern
);
2871 Py_XINCREF(self
->string
);
2872 Py_XINCREF(self
->regs
);
2874 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2875 sizeof(MatchObject
) + slots
* sizeof(int) - offset
);
2877 return (PyObject
*) copy
;
2879 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
2885 match_deepcopy(MatchObject
* self
, PyObject
* args
)
2887 #ifdef USE_BUILTIN_COPY
2891 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
2894 copy
= (MatchObject
*) match_copy(self
, Py_None
);
2898 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
2899 !deepcopy(©
->string
, memo
) ||
2900 !deepcopy(©
->regs
, memo
)) {
2906 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
2911 static PyMethodDef match_methods
[] = {
2912 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
2913 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
2914 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
2915 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
2916 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
2917 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
2918 {"expand", (PyCFunction
) match_expand
, METH_VARARGS
},
2919 {"__copy__", (PyCFunction
) match_copy
, METH_VARARGS
},
2920 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_VARARGS
},
2925 match_getattr(MatchObject
* self
, char* name
)
2929 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
2935 if (!strcmp(name
, "lastindex")) {
2936 if (self
->lastindex
>= 0)
2937 return Py_BuildValue("i", self
->lastindex
);
2942 if (!strcmp(name
, "lastgroup")) {
2943 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
2944 PyObject
* result
= PySequence_GetItem(
2945 self
->pattern
->indexgroup
, self
->lastindex
2955 if (!strcmp(name
, "string")) {
2957 Py_INCREF(self
->string
);
2958 return self
->string
;
2965 if (!strcmp(name
, "regs")) {
2967 Py_INCREF(self
->regs
);
2970 return match_regs(self
);
2973 if (!strcmp(name
, "re")) {
2974 Py_INCREF(self
->pattern
);
2975 return (PyObject
*) self
->pattern
;
2978 if (!strcmp(name
, "pos"))
2979 return Py_BuildValue("i", self
->pos
);
2981 if (!strcmp(name
, "endpos"))
2982 return Py_BuildValue("i", self
->endpos
);
2984 PyErr_SetString(PyExc_AttributeError
, name
);
2988 /* FIXME: implement setattr("string", None) as a special case (to
2989 detach the associated string, if any */
2991 statichere PyTypeObject Match_Type
= {
2992 PyObject_HEAD_INIT(NULL
)
2993 0, "_" SRE_MODULE
".SRE_Match",
2994 sizeof(MatchObject
), sizeof(int),
2995 (destructor
)match_dealloc
, /*tp_dealloc*/
2997 (getattrfunc
)match_getattr
/*tp_getattr*/
3000 /* -------------------------------------------------------------------- */
3001 /* scanner methods (experimental) */
3004 scanner_dealloc(ScannerObject
* self
)
3006 state_fini(&self
->state
);
3007 Py_DECREF(self
->pattern
);
3012 scanner_match(ScannerObject
* self
, PyObject
* args
)
3014 SRE_STATE
* state
= &self
->state
;
3020 state
->ptr
= state
->start
;
3022 if (state
->charsize
== 1) {
3023 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
), 1);
3025 #if defined(HAVE_UNICODE)
3026 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
), 1);
3030 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3033 if ((status
== 0 || state
->ptr
== state
->start
) &&
3034 state
->ptr
< state
->end
)
3035 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3037 state
->start
= state
->ptr
;
3044 scanner_search(ScannerObject
* self
, PyObject
* args
)
3046 SRE_STATE
* state
= &self
->state
;
3052 state
->ptr
= state
->start
;
3054 if (state
->charsize
== 1) {
3055 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
3057 #if defined(HAVE_UNICODE)
3058 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
3062 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3065 if ((status
== 0 || state
->ptr
== state
->start
) &&
3066 state
->ptr
< state
->end
)
3067 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3069 state
->start
= state
->ptr
;
3074 static PyMethodDef scanner_methods
[] = {
3075 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
3076 /* METH_OLDARGS is not in Python 1.5.2 */
3077 {"match", (PyCFunction
) scanner_match
, 0},
3078 {"search", (PyCFunction
) scanner_search
, 0},
3083 scanner_getattr(ScannerObject
* self
, char* name
)
3087 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
3094 if (!strcmp(name
, "pattern")) {
3095 Py_INCREF(self
->pattern
);
3096 return self
->pattern
;
3099 PyErr_SetString(PyExc_AttributeError
, name
);
3103 statichere PyTypeObject Scanner_Type
= {
3104 PyObject_HEAD_INIT(NULL
)
3105 0, "_" SRE_MODULE
".SRE_Scanner",
3106 sizeof(ScannerObject
), 0,
3107 (destructor
)scanner_dealloc
, /*tp_dealloc*/
3109 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
3112 static PyMethodDef _functions
[] = {
3113 {"compile", _compile
, METH_VARARGS
},
3114 {"getcodesize", sre_codesize
, METH_VARARGS
},
3115 {"getlower", sre_getlower
, METH_VARARGS
},
3119 #if PY_VERSION_HEX < 0x02030000
3120 DL_EXPORT(void) init_sre(void)
3122 PyMODINIT_FUNC
init_sre(void)
3129 /* Patch object types */
3130 Pattern_Type
.ob_type
= Match_Type
.ob_type
=
3131 Scanner_Type
.ob_type
= &PyType_Type
;
3133 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
3134 d
= PyModule_GetDict(m
);
3136 x
= PyInt_FromLong(SRE_MAGIC
);
3138 PyDict_SetItemString(d
, "MAGIC", x
);
3142 x
= PyInt_FromLong(sizeof(SRE_CODE
));
3144 PyDict_SetItemString(d
, "CODESIZE", x
);
3148 x
= PyString_FromString(copyright
);
3150 PyDict_SetItemString(d
, "copyright", x
);
3155 #endif /* !defined(SRE_RECURSIVE) */