2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
32 * Portions of this engine have been developed in cooperation with
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
34 * other compatibility work.
39 static char copyright
[] =
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
42 #define PY_SSIZE_T_CLEAN
45 #include "structmember.h" /* offsetof */
51 /* name of this module, minus the leading underscore */
52 #if !defined(SRE_MODULE)
53 #define SRE_MODULE "sre"
56 #define SRE_PY_MODULE "re"
58 /* defining this one enables tracing */
61 /* defining this enables unicode support (default under 1.6a1 and later) */
64 /* -------------------------------------------------------------------- */
65 /* optional features */
67 /* enables fast searching */
68 #define USE_FAST_SEARCH
70 /* enables aggressive inlining (always on for Visual C) */
73 /* enables copy/deepcopy handling (work in progress) */
74 #undef USE_BUILTIN_COPY
76 #if PY_VERSION_HEX < 0x01060000
77 #define PyObject_DEL(op) PyMem_DEL((op))
80 /* -------------------------------------------------------------------- */
83 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
84 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
85 /* fastest possible local call under MSVC */
86 #define LOCAL(type) static __inline type __fastcall
87 #elif defined(USE_INLINE)
88 #define LOCAL(type) static inline type
90 #define LOCAL(type) static type
94 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
95 #define SRE_ERROR_STATE -2 /* illegal state */
96 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
97 #define SRE_ERROR_MEMORY -9 /* out of memory */
98 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
101 #define TRACE(v) printf v
106 /* -------------------------------------------------------------------- */
107 /* search engine state */
109 /* default character predicates (run sre_chars.py to regenerate tables) */
111 #define SRE_DIGIT_MASK 1
112 #define SRE_SPACE_MASK 2
113 #define SRE_LINEBREAK_MASK 4
114 #define SRE_ALNUM_MASK 8
115 #define SRE_WORD_MASK 16
117 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
119 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
120 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
122 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
123 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
124 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
125 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
127 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
128 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
129 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
130 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
131 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
132 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
133 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
134 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
135 120, 121, 122, 123, 124, 125, 126, 127 };
137 #define SRE_IS_DIGIT(ch)\
138 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
139 #define SRE_IS_SPACE(ch)\
140 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
141 #define SRE_IS_LINEBREAK(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
143 #define SRE_IS_ALNUM(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
145 #define SRE_IS_WORD(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
148 static unsigned int sre_lower(unsigned int ch
)
150 return ((ch
) < 128 ? (unsigned int)sre_char_lower
[ch
] : ch
);
153 /* locale-specific character predicates */
154 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
155 * warnings when c's type supports only numbers < N+1 */
156 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
157 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
158 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
159 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
160 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
162 static unsigned int sre_lower_locale(unsigned int ch
)
164 return ((ch
) < 256 ? (unsigned int)tolower((ch
)) : ch
);
167 /* unicode-specific character predicates */
169 #if defined(HAVE_UNICODE)
171 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
172 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
173 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
174 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
175 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
177 static unsigned int sre_lower_unicode(unsigned int ch
)
179 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
185 sre_category(SRE_CODE category
, unsigned int ch
)
189 case SRE_CATEGORY_DIGIT
:
190 return SRE_IS_DIGIT(ch
);
191 case SRE_CATEGORY_NOT_DIGIT
:
192 return !SRE_IS_DIGIT(ch
);
193 case SRE_CATEGORY_SPACE
:
194 return SRE_IS_SPACE(ch
);
195 case SRE_CATEGORY_NOT_SPACE
:
196 return !SRE_IS_SPACE(ch
);
197 case SRE_CATEGORY_WORD
:
198 return SRE_IS_WORD(ch
);
199 case SRE_CATEGORY_NOT_WORD
:
200 return !SRE_IS_WORD(ch
);
201 case SRE_CATEGORY_LINEBREAK
:
202 return SRE_IS_LINEBREAK(ch
);
203 case SRE_CATEGORY_NOT_LINEBREAK
:
204 return !SRE_IS_LINEBREAK(ch
);
206 case SRE_CATEGORY_LOC_WORD
:
207 return SRE_LOC_IS_WORD(ch
);
208 case SRE_CATEGORY_LOC_NOT_WORD
:
209 return !SRE_LOC_IS_WORD(ch
);
211 #if defined(HAVE_UNICODE)
212 case SRE_CATEGORY_UNI_DIGIT
:
213 return SRE_UNI_IS_DIGIT(ch
);
214 case SRE_CATEGORY_UNI_NOT_DIGIT
:
215 return !SRE_UNI_IS_DIGIT(ch
);
216 case SRE_CATEGORY_UNI_SPACE
:
217 return SRE_UNI_IS_SPACE(ch
);
218 case SRE_CATEGORY_UNI_NOT_SPACE
:
219 return !SRE_UNI_IS_SPACE(ch
);
220 case SRE_CATEGORY_UNI_WORD
:
221 return SRE_UNI_IS_WORD(ch
);
222 case SRE_CATEGORY_UNI_NOT_WORD
:
223 return !SRE_UNI_IS_WORD(ch
);
224 case SRE_CATEGORY_UNI_LINEBREAK
:
225 return SRE_UNI_IS_LINEBREAK(ch
);
226 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
227 return !SRE_UNI_IS_LINEBREAK(ch
);
229 case SRE_CATEGORY_UNI_DIGIT
:
230 return SRE_IS_DIGIT(ch
);
231 case SRE_CATEGORY_UNI_NOT_DIGIT
:
232 return !SRE_IS_DIGIT(ch
);
233 case SRE_CATEGORY_UNI_SPACE
:
234 return SRE_IS_SPACE(ch
);
235 case SRE_CATEGORY_UNI_NOT_SPACE
:
236 return !SRE_IS_SPACE(ch
);
237 case SRE_CATEGORY_UNI_WORD
:
238 return SRE_LOC_IS_WORD(ch
);
239 case SRE_CATEGORY_UNI_NOT_WORD
:
240 return !SRE_LOC_IS_WORD(ch
);
241 case SRE_CATEGORY_UNI_LINEBREAK
:
242 return SRE_IS_LINEBREAK(ch
);
243 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
244 return !SRE_IS_LINEBREAK(ch
);
253 data_stack_dealloc(SRE_STATE
* state
)
255 if (state
->data_stack
) {
256 PyMem_FREE(state
->data_stack
);
257 state
->data_stack
= NULL
;
259 state
->data_stack_size
= state
->data_stack_base
= 0;
263 data_stack_grow(SRE_STATE
* state
, Py_ssize_t size
)
265 Py_ssize_t minsize
, cursize
;
266 minsize
= state
->data_stack_base
+size
;
267 cursize
= state
->data_stack_size
;
268 if (cursize
< minsize
) {
270 cursize
= minsize
+minsize
/4+1024;
271 TRACE(("allocate/grow stack %d\n", cursize
));
272 stack
= PyMem_REALLOC(state
->data_stack
, cursize
);
274 data_stack_dealloc(state
);
275 return SRE_ERROR_MEMORY
;
277 state
->data_stack
= (char *)stack
;
278 state
->data_stack_size
= cursize
;
283 /* generate 8-bit version */
285 #define SRE_CHAR unsigned char
286 #define SRE_AT sre_at
287 #define SRE_COUNT sre_count
288 #define SRE_CHARSET sre_charset
289 #define SRE_INFO sre_info
290 #define SRE_MATCH sre_match
291 #define SRE_MATCH_CONTEXT sre_match_context
292 #define SRE_SEARCH sre_search
293 #define SRE_LITERAL_TEMPLATE sre_literal_template
295 #if defined(HAVE_UNICODE)
297 #define SRE_RECURSIVE
301 #undef SRE_LITERAL_TEMPLATE
304 #undef SRE_MATCH_CONTEXT
311 /* generate 16-bit unicode version */
313 #define SRE_CHAR Py_UNICODE
314 #define SRE_AT sre_uat
315 #define SRE_COUNT sre_ucount
316 #define SRE_CHARSET sre_ucharset
317 #define SRE_INFO sre_uinfo
318 #define SRE_MATCH sre_umatch
319 #define SRE_MATCH_CONTEXT sre_umatch_context
320 #define SRE_SEARCH sre_usearch
321 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
324 #endif /* SRE_RECURSIVE */
326 /* -------------------------------------------------------------------- */
327 /* String matching engine */
329 /* the following section is compiled twice, with different character
333 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
335 /* check if pointer is at given position */
337 Py_ssize_t thisp
, thatp
;
341 case SRE_AT_BEGINNING
:
342 case SRE_AT_BEGINNING_STRING
:
343 return ((void*) ptr
== state
->beginning
);
345 case SRE_AT_BEGINNING_LINE
:
346 return ((void*) ptr
== state
->beginning
||
347 SRE_IS_LINEBREAK((int) ptr
[-1]));
350 return (((void*) (ptr
+1) == state
->end
&&
351 SRE_IS_LINEBREAK((int) ptr
[0])) ||
352 ((void*) ptr
== state
->end
));
354 case SRE_AT_END_LINE
:
355 return ((void*) ptr
== state
->end
||
356 SRE_IS_LINEBREAK((int) ptr
[0]));
358 case SRE_AT_END_STRING
:
359 return ((void*) ptr
== state
->end
);
361 case SRE_AT_BOUNDARY
:
362 if (state
->beginning
== state
->end
)
364 thatp
= ((void*) ptr
> state
->beginning
) ?
365 SRE_IS_WORD((int) ptr
[-1]) : 0;
366 thisp
= ((void*) ptr
< state
->end
) ?
367 SRE_IS_WORD((int) ptr
[0]) : 0;
368 return thisp
!= thatp
;
370 case SRE_AT_NON_BOUNDARY
:
371 if (state
->beginning
== state
->end
)
373 thatp
= ((void*) ptr
> state
->beginning
) ?
374 SRE_IS_WORD((int) ptr
[-1]) : 0;
375 thisp
= ((void*) ptr
< state
->end
) ?
376 SRE_IS_WORD((int) ptr
[0]) : 0;
377 return thisp
== thatp
;
379 case SRE_AT_LOC_BOUNDARY
:
380 if (state
->beginning
== state
->end
)
382 thatp
= ((void*) ptr
> state
->beginning
) ?
383 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
384 thisp
= ((void*) ptr
< state
->end
) ?
385 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
386 return thisp
!= thatp
;
388 case SRE_AT_LOC_NON_BOUNDARY
:
389 if (state
->beginning
== state
->end
)
391 thatp
= ((void*) ptr
> state
->beginning
) ?
392 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
393 thisp
= ((void*) ptr
< state
->end
) ?
394 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
395 return thisp
== thatp
;
397 #if defined(HAVE_UNICODE)
398 case SRE_AT_UNI_BOUNDARY
:
399 if (state
->beginning
== state
->end
)
401 thatp
= ((void*) ptr
> state
->beginning
) ?
402 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
403 thisp
= ((void*) ptr
< state
->end
) ?
404 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
405 return thisp
!= thatp
;
407 case SRE_AT_UNI_NON_BOUNDARY
:
408 if (state
->beginning
== state
->end
)
410 thatp
= ((void*) ptr
> state
->beginning
) ?
411 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
412 thisp
= ((void*) ptr
< state
->end
) ?
413 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
414 return thisp
== thatp
;
423 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
425 /* check if character is a member of the given set */
436 /* <LITERAL> <code> */
442 case SRE_OP_CATEGORY
:
443 /* <CATEGORY> <code> */
444 if (sre_category(set
[0], (int) ch
))
450 if (sizeof(SRE_CODE
) == 2) {
451 /* <CHARSET> <bitmap> (16 bits per code word) */
452 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
457 /* <CHARSET> <bitmap> (32 bits per code word) */
458 if (ch
< 256 && (set
[ch
>> 5] & (1 << (ch
& 31))))
465 /* <RANGE> <lower> <upper> */
466 if (set
[0] <= ch
&& ch
<= set
[1])
475 case SRE_OP_BIGCHARSET
:
476 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
478 Py_ssize_t count
, block
;
481 if (sizeof(SRE_CODE
) == 2) {
482 block
= ((unsigned char*)set
)[ch
>> 8];
484 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
489 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
490 * warnings when c's type supports only numbers < N+1 */
492 block
= ((unsigned char*)set
)[ch
>> 8];
497 (set
[block
*8 + ((ch
& 255)>>5)] & (1 << (ch
& 31))))
505 /* internal error -- there's not much we can do about it
506 here, so let's just pretend it didn't match... */
512 LOCAL(Py_ssize_t
) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
);
515 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, Py_ssize_t maxcount
)
518 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->ptr
;
519 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
523 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
524 end
= ptr
+ maxcount
;
526 switch (pattern
[0]) {
530 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
531 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
536 /* repeated dot wildcard. */
537 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
538 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
543 /* repeated dot wildcard. skip to the end of the target
544 string, and backtrack from there */
545 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
550 /* repeated literal */
552 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
553 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
557 case SRE_OP_LITERAL_IGNORE
:
558 /* repeated literal */
560 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
561 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
565 case SRE_OP_NOT_LITERAL
:
566 /* repeated non-literal */
568 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
569 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
573 case SRE_OP_NOT_LITERAL_IGNORE
:
574 /* repeated non-literal */
576 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
577 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
582 /* repeated single character pattern */
583 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
584 while ((SRE_CHAR
*) state
->ptr
< end
) {
585 i
= SRE_MATCH(state
, pattern
);
591 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
592 (SRE_CHAR
*) state
->ptr
- ptr
));
593 return (SRE_CHAR
*) state
->ptr
- ptr
;
596 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
597 return ptr
- (SRE_CHAR
*) state
->ptr
;
600 #if 0 /* not used in this release */
602 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
604 /* check if an SRE_OP_INFO block matches at the current position.
605 returns the number of SRE_CODE objects to skip if successful, 0
608 SRE_CHAR
* end
= state
->end
;
609 SRE_CHAR
* ptr
= state
->ptr
;
612 /* check minimal length */
613 if (pattern
[3] && (end
- ptr
) < pattern
[3])
616 /* check known prefix */
617 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
618 /* <length> <skip> <prefix data> <overlap data> */
619 for (i
= 0; i
< pattern
[5]; i
++)
620 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
622 return pattern
[0] + 2 * pattern
[6];
628 /* The macros below should be used to protect recursive SRE_MATCH()
629 * calls that *failed* and do *not* return immediately (IOW, those
630 * that will backtrack). Explaining:
632 * - Recursive SRE_MATCH() returned true: that's usually a success
633 * (besides atypical cases like ASSERT_NOT), therefore there's no
634 * reason to restore lastmark;
636 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
637 * is returning to the caller: If the current SRE_MATCH() is the
638 * top function of the recursion, returning false will be a matching
639 * failure, and it doesn't matter where lastmark is pointing to.
640 * If it's *not* the top function, it will be a recursive SRE_MATCH()
641 * failure by itself, and the calling SRE_MATCH() will have to deal
642 * with the failure by the same rules explained here (it will restore
643 * lastmark by itself if necessary);
645 * - Recursive SRE_MATCH() returned false, and will continue the
646 * outside 'for' loop: must be protected when breaking, since the next
647 * OP could potentially depend on lastmark;
649 * - Recursive SRE_MATCH() returned false, and will be called again
650 * inside a local for/while loop: must be protected between each
651 * loop iteration, since the recursive SRE_MATCH() could do anything,
652 * and could potentially depend on lastmark.
654 * For more information, check the discussion at SF patch #712900.
656 #define LASTMARK_SAVE() \
658 ctx->lastmark = state->lastmark; \
659 ctx->lastindex = state->lastindex; \
661 #define LASTMARK_RESTORE() \
663 state->lastmark = ctx->lastmark; \
664 state->lastindex = ctx->lastindex; \
667 #define RETURN_ERROR(i) do { return i; } while(0)
668 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
669 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
671 #define RETURN_ON_ERROR(i) \
672 do { if (i < 0) RETURN_ERROR(i); } while (0)
673 #define RETURN_ON_SUCCESS(i) \
674 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
675 #define RETURN_ON_FAILURE(i) \
676 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
680 #define DATA_STACK_ALLOC(state, type, ptr) \
682 alloc_pos = state->data_stack_base; \
683 TRACE(("allocating %s in %d (%d)\n", \
684 SFY(type), alloc_pos, sizeof(type))); \
685 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
686 int j = data_stack_grow(state, sizeof(type)); \
687 if (j < 0) return j; \
689 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
691 ptr = (type*)(state->data_stack+alloc_pos); \
692 state->data_stack_base += sizeof(type); \
695 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
697 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
698 ptr = (type*)(state->data_stack+pos); \
701 #define DATA_STACK_PUSH(state, data, size) \
703 TRACE(("copy data in %p to %d (%d)\n", \
704 data, state->data_stack_base, size)); \
705 if (state->data_stack_size < state->data_stack_base+size) { \
706 int j = data_stack_grow(state, size); \
707 if (j < 0) return j; \
709 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
711 memcpy(state->data_stack+state->data_stack_base, data, size); \
712 state->data_stack_base += size; \
715 #define DATA_STACK_POP(state, data, size, discard) \
717 TRACE(("copy data to %p from %d (%d)\n", \
718 data, state->data_stack_base-size, size)); \
719 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
721 state->data_stack_base -= size; \
724 #define DATA_STACK_POP_DISCARD(state, size) \
726 TRACE(("discard data from %d (%d)\n", \
727 state->data_stack_base-size, size)); \
728 state->data_stack_base -= size; \
731 #define DATA_PUSH(x) \
732 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
733 #define DATA_POP(x) \
734 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
735 #define DATA_POP_DISCARD(x) \
736 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
737 #define DATA_ALLOC(t,p) \
738 DATA_STACK_ALLOC(state, t, p)
739 #define DATA_LOOKUP_AT(t,p,pos) \
740 DATA_STACK_LOOKUP_AT(state,t,p,pos)
742 #define MARK_PUSH(lastmark) \
743 do if (lastmark > 0) { \
744 i = lastmark; /* ctx->lastmark may change if reallocated */ \
745 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
747 #define MARK_POP(lastmark) \
748 do if (lastmark > 0) { \
749 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
751 #define MARK_POP_KEEP(lastmark) \
752 do if (lastmark > 0) { \
753 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
755 #define MARK_POP_DISCARD(lastmark) \
756 do if (lastmark > 0) { \
757 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
761 #define JUMP_MAX_UNTIL_1 1
762 #define JUMP_MAX_UNTIL_2 2
763 #define JUMP_MAX_UNTIL_3 3
764 #define JUMP_MIN_UNTIL_1 4
765 #define JUMP_MIN_UNTIL_2 5
766 #define JUMP_MIN_UNTIL_3 6
767 #define JUMP_REPEAT 7
768 #define JUMP_REPEAT_ONE_1 8
769 #define JUMP_REPEAT_ONE_2 9
770 #define JUMP_MIN_REPEAT_ONE 10
771 #define JUMP_BRANCH 11
772 #define JUMP_ASSERT 12
773 #define JUMP_ASSERT_NOT 13
775 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
776 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
777 nextctx->last_ctx_pos = ctx_pos; \
778 nextctx->jump = jumpvalue; \
779 nextctx->pattern = nextpattern; \
780 ctx_pos = alloc_pos; \
784 while (0) /* gcc doesn't like labels at end of scopes */ \
787 Py_ssize_t last_ctx_pos
;
793 Py_ssize_t lastindex
;
800 /* check if string matches the given pattern. returns <0 for
801 error, 0 for failure, and 1 for success */
803 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
805 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
806 Py_ssize_t alloc_pos
, ctx_pos
= -1;
807 Py_ssize_t i
, ret
= 0;
809 unsigned int sigcount
=0;
811 SRE_MATCH_CONTEXT
* ctx
;
812 SRE_MATCH_CONTEXT
* nextctx
;
814 TRACE(("|%p|%p|ENTER\n", pattern
, state
->ptr
));
816 DATA_ALLOC(SRE_MATCH_CONTEXT
, ctx
);
817 ctx
->last_ctx_pos
= -1;
818 ctx
->jump
= JUMP_NONE
;
819 ctx
->pattern
= pattern
;
824 ctx
->ptr
= (SRE_CHAR
*)state
->ptr
;
826 if (ctx
->pattern
[0] == SRE_OP_INFO
) {
827 /* optimization info block */
828 /* <INFO> <1=skip> <2=flags> <3=min> ... */
829 if (ctx
->pattern
[3] && (end
- ctx
->ptr
) < ctx
->pattern
[3]) {
830 TRACE(("reject (got %d chars, need %d)\n",
831 (end
- ctx
->ptr
), ctx
->pattern
[3]));
834 ctx
->pattern
+= ctx
->pattern
[1] + 1;
839 if ((0 == (sigcount
& 0xfff)) && PyErr_CheckSignals())
840 RETURN_ERROR(SRE_ERROR_INTERRUPTED
);
842 switch (*ctx
->pattern
++) {
847 TRACE(("|%p|%p|MARK %d\n", ctx
->pattern
,
848 ctx
->ptr
, ctx
->pattern
[0]));
851 state
->lastindex
= i
/2 + 1;
852 if (i
> state
->lastmark
) {
853 /* state->lastmark is the highest valid index in the
854 state->mark array. If it is increased by more than 1,
855 the intervening marks must be set to NULL to signal
856 that these marks have not been encountered. */
857 Py_ssize_t j
= state
->lastmark
+ 1;
859 state
->mark
[j
++] = NULL
;
862 state
->mark
[i
] = ctx
->ptr
;
867 /* match literal string */
868 /* <LITERAL> <code> */
869 TRACE(("|%p|%p|LITERAL %d\n", ctx
->pattern
,
870 ctx
->ptr
, *ctx
->pattern
));
871 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] != ctx
->pattern
[0])
877 case SRE_OP_NOT_LITERAL
:
878 /* match anything that is not literal character */
879 /* <NOT_LITERAL> <code> */
880 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx
->pattern
,
881 ctx
->ptr
, *ctx
->pattern
));
882 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] == ctx
->pattern
[0])
890 TRACE(("|%p|%p|SUCCESS\n", ctx
->pattern
, ctx
->ptr
));
891 state
->ptr
= ctx
->ptr
;
895 /* match at given position */
897 TRACE(("|%p|%p|AT %d\n", ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
898 if (!SRE_AT(state
, ctx
->ptr
, *ctx
->pattern
))
903 case SRE_OP_CATEGORY
:
904 /* match at given category */
905 /* <CATEGORY> <code> */
906 TRACE(("|%p|%p|CATEGORY %d\n", ctx
->pattern
,
907 ctx
->ptr
, *ctx
->pattern
));
908 if (ctx
->ptr
>= end
|| !sre_category(ctx
->pattern
[0], ctx
->ptr
[0]))
915 /* match anything (except a newline) */
917 TRACE(("|%p|%p|ANY\n", ctx
->pattern
, ctx
->ptr
));
918 if (ctx
->ptr
>= end
|| SRE_IS_LINEBREAK(ctx
->ptr
[0]))
926 TRACE(("|%p|%p|ANY_ALL\n", ctx
->pattern
, ctx
->ptr
));
933 /* match set member (or non_member) */
934 /* <IN> <skip> <set> */
935 TRACE(("|%p|%p|IN\n", ctx
->pattern
, ctx
->ptr
));
936 if (ctx
->ptr
>= end
|| !SRE_CHARSET(ctx
->pattern
+ 1, *ctx
->ptr
))
938 ctx
->pattern
+= ctx
->pattern
[0];
942 case SRE_OP_LITERAL_IGNORE
:
943 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
944 ctx
->pattern
, ctx
->ptr
, ctx
->pattern
[0]));
945 if (ctx
->ptr
>= end
||
946 state
->lower(*ctx
->ptr
) != state
->lower(*ctx
->pattern
))
952 case SRE_OP_NOT_LITERAL_IGNORE
:
953 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
954 ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
955 if (ctx
->ptr
>= end
||
956 state
->lower(*ctx
->ptr
) == state
->lower(*ctx
->pattern
))
962 case SRE_OP_IN_IGNORE
:
963 TRACE(("|%p|%p|IN_IGNORE\n", ctx
->pattern
, ctx
->ptr
));
965 || !SRE_CHARSET(ctx
->pattern
+1,
966 (SRE_CODE
)state
->lower(*ctx
->ptr
)))
968 ctx
->pattern
+= ctx
->pattern
[0];
975 /* <JUMP> <offset> */
976 TRACE(("|%p|%p|JUMP %d\n", ctx
->pattern
,
977 ctx
->ptr
, ctx
->pattern
[0]));
978 ctx
->pattern
+= ctx
->pattern
[0];
983 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
984 TRACE(("|%p|%p|BRANCH\n", ctx
->pattern
, ctx
->ptr
));
986 ctx
->u
.rep
= state
->repeat
;
988 MARK_PUSH(ctx
->lastmark
);
989 for (; ctx
->pattern
[0]; ctx
->pattern
+= ctx
->pattern
[0]) {
990 if (ctx
->pattern
[1] == SRE_OP_LITERAL
&&
992 (SRE_CODE
) *ctx
->ptr
!= ctx
->pattern
[2]))
994 if (ctx
->pattern
[1] == SRE_OP_IN
&&
996 !SRE_CHARSET(ctx
->pattern
+ 3, (SRE_CODE
) *ctx
->ptr
)))
998 state
->ptr
= ctx
->ptr
;
999 DO_JUMP(JUMP_BRANCH
, jump_branch
, ctx
->pattern
+1);
1002 MARK_POP_DISCARD(ctx
->lastmark
);
1003 RETURN_ON_ERROR(ret
);
1007 MARK_POP_KEEP(ctx
->lastmark
);
1011 MARK_POP_DISCARD(ctx
->lastmark
);
1014 case SRE_OP_REPEAT_ONE
:
1015 /* match repeated sequence (maximizing regexp) */
1017 /* this operator only works if the repeated item is
1018 exactly one character wide, and we're not already
1019 collecting backtracking points. for other cases,
1020 use the MAX_REPEAT operator */
1022 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1024 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1025 ctx
->pattern
[1], ctx
->pattern
[2]));
1027 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1028 RETURN_FAILURE
; /* cannot match */
1030 state
->ptr
= ctx
->ptr
;
1032 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[2]);
1033 RETURN_ON_ERROR(ret
);
1034 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1036 ctx
->ptr
+= ctx
->count
;
1038 /* when we arrive here, count contains the number of
1039 matches, and ctx->ptr points to the tail of the target
1040 string. check if the rest of the pattern matches,
1041 and backtrack if not. */
1043 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1046 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1047 /* tail is empty. we're finished */
1048 state
->ptr
= ctx
->ptr
;
1054 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_LITERAL
) {
1055 /* tail starts with a literal. skip positions where
1056 the rest of the pattern cannot possibly match */
1057 ctx
->u
.chr
= ctx
->pattern
[ctx
->pattern
[0]+1];
1059 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1] &&
1060 (ctx
->ptr
>= end
|| *ctx
->ptr
!= ctx
->u
.chr
)) {
1064 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1066 state
->ptr
= ctx
->ptr
;
1067 DO_JUMP(JUMP_REPEAT_ONE_1
, jump_repeat_one_1
,
1068 ctx
->pattern
+ctx
->pattern
[0]);
1070 RETURN_ON_ERROR(ret
);
1082 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1]) {
1083 state
->ptr
= ctx
->ptr
;
1084 DO_JUMP(JUMP_REPEAT_ONE_2
, jump_repeat_one_2
,
1085 ctx
->pattern
+ctx
->pattern
[0]);
1087 RETURN_ON_ERROR(ret
);
1097 case SRE_OP_MIN_REPEAT_ONE
:
1098 /* match repeated sequence (minimizing regexp) */
1100 /* this operator only works if the repeated item is
1101 exactly one character wide, and we're not already
1102 collecting backtracking points. for other cases,
1103 use the MIN_REPEAT operator */
1105 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1107 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1108 ctx
->pattern
[1], ctx
->pattern
[2]));
1110 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1111 RETURN_FAILURE
; /* cannot match */
1113 state
->ptr
= ctx
->ptr
;
1115 if (ctx
->pattern
[1] == 0)
1118 /* count using pattern min as the maximum */
1119 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[1]);
1120 RETURN_ON_ERROR(ret
);
1121 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1122 if (ret
< (Py_ssize_t
) ctx
->pattern
[1])
1123 /* didn't match minimum number of times */
1125 /* advance past minimum matches of repeat */
1127 ctx
->ptr
+= ctx
->count
;
1130 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1131 /* tail is empty. we're finished */
1132 state
->ptr
= ctx
->ptr
;
1138 while ((Py_ssize_t
)ctx
->pattern
[2] == 65535
1139 || ctx
->count
<= (Py_ssize_t
)ctx
->pattern
[2]) {
1140 state
->ptr
= ctx
->ptr
;
1141 DO_JUMP(JUMP_MIN_REPEAT_ONE
,jump_min_repeat_one
,
1142 ctx
->pattern
+ctx
->pattern
[0]);
1144 RETURN_ON_ERROR(ret
);
1147 state
->ptr
= ctx
->ptr
;
1148 ret
= SRE_COUNT(state
, ctx
->pattern
+3, 1);
1149 RETURN_ON_ERROR(ret
);
1150 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1162 /* create repeat context. all the hard work is done
1163 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1164 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1165 TRACE(("|%p|%p|REPEAT %d %d\n", ctx
->pattern
, ctx
->ptr
,
1166 ctx
->pattern
[1], ctx
->pattern
[2]));
1168 /* install new repeat context */
1169 ctx
->u
.rep
= (SRE_REPEAT
*) PyObject_MALLOC(sizeof(*ctx
->u
.rep
));
1174 ctx
->u
.rep
->count
= -1;
1175 ctx
->u
.rep
->pattern
= ctx
->pattern
;
1176 ctx
->u
.rep
->prev
= state
->repeat
;
1177 ctx
->u
.rep
->last_ptr
= NULL
;
1178 state
->repeat
= ctx
->u
.rep
;
1180 state
->ptr
= ctx
->ptr
;
1181 DO_JUMP(JUMP_REPEAT
, jump_repeat
, ctx
->pattern
+ctx
->pattern
[0]);
1182 state
->repeat
= ctx
->u
.rep
->prev
;
1183 PyObject_FREE(ctx
->u
.rep
);
1186 RETURN_ON_ERROR(ret
);
1191 case SRE_OP_MAX_UNTIL
:
1192 /* maximizing repeat */
1193 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1195 /* FIXME: we probably need to deal with zero-width
1196 matches in here... */
1198 ctx
->u
.rep
= state
->repeat
;
1200 RETURN_ERROR(SRE_ERROR_STATE
);
1202 state
->ptr
= ctx
->ptr
;
1204 ctx
->count
= ctx
->u
.rep
->count
+1;
1206 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx
->pattern
,
1207 ctx
->ptr
, ctx
->count
));
1209 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1210 /* not enough matches */
1211 ctx
->u
.rep
->count
= ctx
->count
;
1212 DO_JUMP(JUMP_MAX_UNTIL_1
, jump_max_until_1
,
1213 ctx
->u
.rep
->pattern
+3);
1215 RETURN_ON_ERROR(ret
);
1218 ctx
->u
.rep
->count
= ctx
->count
-1;
1219 state
->ptr
= ctx
->ptr
;
1223 if ((ctx
->count
< ctx
->u
.rep
->pattern
[2] ||
1224 ctx
->u
.rep
->pattern
[2] == 65535) &&
1225 state
->ptr
!= ctx
->u
.rep
->last_ptr
) {
1226 /* we may have enough matches, but if we can
1227 match another item, do so */
1228 ctx
->u
.rep
->count
= ctx
->count
;
1230 MARK_PUSH(ctx
->lastmark
);
1231 /* zero-width match protection */
1232 DATA_PUSH(&ctx
->u
.rep
->last_ptr
);
1233 ctx
->u
.rep
->last_ptr
= state
->ptr
;
1234 DO_JUMP(JUMP_MAX_UNTIL_2
, jump_max_until_2
,
1235 ctx
->u
.rep
->pattern
+3);
1236 DATA_POP(&ctx
->u
.rep
->last_ptr
);
1238 MARK_POP_DISCARD(ctx
->lastmark
);
1239 RETURN_ON_ERROR(ret
);
1242 MARK_POP(ctx
->lastmark
);
1244 ctx
->u
.rep
->count
= ctx
->count
-1;
1245 state
->ptr
= ctx
->ptr
;
1248 /* cannot match more repeated items here. make sure the
1250 state
->repeat
= ctx
->u
.rep
->prev
;
1251 DO_JUMP(JUMP_MAX_UNTIL_3
, jump_max_until_3
, ctx
->pattern
);
1252 RETURN_ON_SUCCESS(ret
);
1253 state
->repeat
= ctx
->u
.rep
;
1254 state
->ptr
= ctx
->ptr
;
1257 case SRE_OP_MIN_UNTIL
:
1258 /* minimizing repeat */
1259 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1261 ctx
->u
.rep
= state
->repeat
;
1263 RETURN_ERROR(SRE_ERROR_STATE
);
1265 state
->ptr
= ctx
->ptr
;
1267 ctx
->count
= ctx
->u
.rep
->count
+1;
1269 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx
->pattern
,
1270 ctx
->ptr
, ctx
->count
, ctx
->u
.rep
->pattern
));
1272 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1273 /* not enough matches */
1274 ctx
->u
.rep
->count
= ctx
->count
;
1275 DO_JUMP(JUMP_MIN_UNTIL_1
, jump_min_until_1
,
1276 ctx
->u
.rep
->pattern
+3);
1278 RETURN_ON_ERROR(ret
);
1281 ctx
->u
.rep
->count
= ctx
->count
-1;
1282 state
->ptr
= ctx
->ptr
;
1288 /* see if the tail matches */
1289 state
->repeat
= ctx
->u
.rep
->prev
;
1290 DO_JUMP(JUMP_MIN_UNTIL_2
, jump_min_until_2
, ctx
->pattern
);
1292 RETURN_ON_ERROR(ret
);
1296 state
->repeat
= ctx
->u
.rep
;
1297 state
->ptr
= ctx
->ptr
;
1301 if (ctx
->count
>= ctx
->u
.rep
->pattern
[2]
1302 && ctx
->u
.rep
->pattern
[2] != 65535)
1305 ctx
->u
.rep
->count
= ctx
->count
;
1306 DO_JUMP(JUMP_MIN_UNTIL_3
,jump_min_until_3
,
1307 ctx
->u
.rep
->pattern
+3);
1309 RETURN_ON_ERROR(ret
);
1312 ctx
->u
.rep
->count
= ctx
->count
-1;
1313 state
->ptr
= ctx
->ptr
;
1316 case SRE_OP_GROUPREF
:
1317 /* match backreference */
1318 TRACE(("|%p|%p|GROUPREF %d\n", ctx
->pattern
,
1319 ctx
->ptr
, ctx
->pattern
[0]));
1320 i
= ctx
->pattern
[0];
1322 Py_ssize_t groupref
= i
+i
;
1323 if (groupref
>= state
->lastmark
) {
1326 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1327 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1328 if (!p
|| !e
|| e
< p
)
1331 if (ctx
->ptr
>= end
|| *ctx
->ptr
!= *p
)
1340 case SRE_OP_GROUPREF_IGNORE
:
1341 /* match backreference */
1342 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx
->pattern
,
1343 ctx
->ptr
, ctx
->pattern
[0]));
1344 i
= ctx
->pattern
[0];
1346 Py_ssize_t groupref
= i
+i
;
1347 if (groupref
>= state
->lastmark
) {
1350 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1351 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1352 if (!p
|| !e
|| e
< p
)
1355 if (ctx
->ptr
>= end
||
1356 state
->lower(*ctx
->ptr
) != state
->lower(*p
))
1365 case SRE_OP_GROUPREF_EXISTS
:
1366 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx
->pattern
,
1367 ctx
->ptr
, ctx
->pattern
[0]));
1368 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1369 i
= ctx
->pattern
[0];
1371 Py_ssize_t groupref
= i
+i
;
1372 if (groupref
>= state
->lastmark
) {
1373 ctx
->pattern
+= ctx
->pattern
[1];
1376 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1377 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1378 if (!p
|| !e
|| e
< p
) {
1379 ctx
->pattern
+= ctx
->pattern
[1];
1388 /* assert subpattern */
1389 /* <ASSERT> <skip> <back> <pattern> */
1390 TRACE(("|%p|%p|ASSERT %d\n", ctx
->pattern
,
1391 ctx
->ptr
, ctx
->pattern
[1]));
1392 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1393 if (state
->ptr
< state
->beginning
)
1395 DO_JUMP(JUMP_ASSERT
, jump_assert
, ctx
->pattern
+2);
1396 RETURN_ON_FAILURE(ret
);
1397 ctx
->pattern
+= ctx
->pattern
[0];
1400 case SRE_OP_ASSERT_NOT
:
1401 /* assert not subpattern */
1402 /* <ASSERT_NOT> <skip> <back> <pattern> */
1403 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx
->pattern
,
1404 ctx
->ptr
, ctx
->pattern
[1]));
1405 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1406 if (state
->ptr
>= state
->beginning
) {
1407 DO_JUMP(JUMP_ASSERT_NOT
, jump_assert_not
, ctx
->pattern
+2);
1409 RETURN_ON_ERROR(ret
);
1413 ctx
->pattern
+= ctx
->pattern
[0];
1416 case SRE_OP_FAILURE
:
1417 /* immediate failure */
1418 TRACE(("|%p|%p|FAILURE\n", ctx
->pattern
, ctx
->ptr
));
1422 TRACE(("|%p|%p|UNKNOWN %d\n", ctx
->pattern
, ctx
->ptr
,
1424 RETURN_ERROR(SRE_ERROR_ILLEGAL
);
1429 ctx_pos
= ctx
->last_ctx_pos
;
1431 DATA_POP_DISCARD(ctx
);
1434 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1437 case JUMP_MAX_UNTIL_2
:
1438 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1439 goto jump_max_until_2
;
1440 case JUMP_MAX_UNTIL_3
:
1441 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1442 goto jump_max_until_3
;
1443 case JUMP_MIN_UNTIL_2
:
1444 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1445 goto jump_min_until_2
;
1446 case JUMP_MIN_UNTIL_3
:
1447 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1448 goto jump_min_until_3
;
1450 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx
->pattern
, ctx
->ptr
));
1452 case JUMP_MAX_UNTIL_1
:
1453 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1454 goto jump_max_until_1
;
1455 case JUMP_MIN_UNTIL_1
:
1456 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1457 goto jump_min_until_1
;
1459 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx
->pattern
, ctx
->ptr
));
1461 case JUMP_REPEAT_ONE_1
:
1462 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx
->pattern
, ctx
->ptr
));
1463 goto jump_repeat_one_1
;
1464 case JUMP_REPEAT_ONE_2
:
1465 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx
->pattern
, ctx
->ptr
));
1466 goto jump_repeat_one_2
;
1467 case JUMP_MIN_REPEAT_ONE
:
1468 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx
->pattern
, ctx
->ptr
));
1469 goto jump_min_repeat_one
;
1471 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx
->pattern
, ctx
->ptr
));
1473 case JUMP_ASSERT_NOT
:
1474 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx
->pattern
, ctx
->ptr
));
1475 goto jump_assert_not
;
1477 TRACE(("|%p|%p|RETURN %d\n", ctx
->pattern
, ctx
->ptr
, ret
));
1481 return ret
; /* should never get here */
1485 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1487 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->start
;
1488 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
1489 Py_ssize_t status
= 0;
1490 Py_ssize_t prefix_len
= 0;
1491 Py_ssize_t prefix_skip
= 0;
1492 SRE_CODE
* prefix
= NULL
;
1493 SRE_CODE
* charset
= NULL
;
1494 SRE_CODE
* overlap
= NULL
;
1497 if (pattern
[0] == SRE_OP_INFO
) {
1498 /* optimization info block */
1499 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1503 if (pattern
[3] > 1) {
1504 /* adjust end point (but make sure we leave at least one
1505 character in there, so literal search will work) */
1506 end
-= pattern
[3]-1;
1511 if (flags
& SRE_INFO_PREFIX
) {
1512 /* pattern starts with a known prefix */
1513 /* <length> <skip> <prefix data> <overlap data> */
1514 prefix_len
= pattern
[5];
1515 prefix_skip
= pattern
[6];
1516 prefix
= pattern
+ 7;
1517 overlap
= prefix
+ prefix_len
- 1;
1518 } else if (flags
& SRE_INFO_CHARSET
)
1519 /* pattern starts with a character from a known set */
1521 charset
= pattern
+ 5;
1523 pattern
+= 1 + pattern
[1];
1526 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1527 TRACE(("charset = %p\n", charset
));
1529 #if defined(USE_FAST_SEARCH)
1530 if (prefix_len
> 1) {
1531 /* pattern starts with a known prefix. use the overlap
1532 table to skip forward as fast as we possibly can */
1534 end
= (SRE_CHAR
*)state
->end
;
1537 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1543 if (++i
== prefix_len
) {
1544 /* found a potential match */
1545 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1546 state
->start
= ptr
+ 1 - prefix_len
;
1547 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1548 if (flags
& SRE_INFO_LITERAL
)
1549 return 1; /* we got all of it */
1550 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
);
1553 /* close but no cigar -- try again */
1565 if (pattern
[0] == SRE_OP_LITERAL
) {
1566 /* pattern starts with a literal character. this is used
1567 for short prefixes, and if fast search is disabled */
1568 SRE_CODE chr
= pattern
[1];
1569 end
= (SRE_CHAR
*)state
->end
;
1571 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1575 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1578 if (flags
& SRE_INFO_LITERAL
)
1579 return 1; /* we got all of it */
1580 status
= SRE_MATCH(state
, pattern
+ 2);
1584 } else if (charset
) {
1585 /* pattern starts with a character from a known set */
1586 end
= (SRE_CHAR
*)state
->end
;
1588 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1592 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1595 status
= SRE_MATCH(state
, pattern
);
1602 while (ptr
<= end
) {
1603 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1604 state
->start
= state
->ptr
= ptr
++;
1605 status
= SRE_MATCH(state
, pattern
);
1614 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, Py_ssize_t len
)
1616 /* check if given string is a literal template (i.e. no escapes) */
1623 #if !defined(SRE_RECURSIVE)
1625 /* -------------------------------------------------------------------- */
1626 /* factories and destructors */
1628 /* see sre.h for object declarations */
1629 static PyObject
*pattern_new_match(PatternObject
*, SRE_STATE
*, int);
1630 static PyObject
*pattern_scanner(PatternObject
*, PyObject
*);
1633 sre_codesize(PyObject
* self
, PyObject
*unused
)
1635 return Py_BuildValue("l", sizeof(SRE_CODE
));
1639 sre_getlower(PyObject
* self
, PyObject
* args
)
1641 int character
, flags
;
1642 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1644 if (flags
& SRE_FLAG_LOCALE
)
1645 return Py_BuildValue("i", sre_lower_locale(character
));
1646 if (flags
& SRE_FLAG_UNICODE
)
1647 #if defined(HAVE_UNICODE)
1648 return Py_BuildValue("i", sre_lower_unicode(character
));
1650 return Py_BuildValue("i", sre_lower_locale(character
));
1652 return Py_BuildValue("i", sre_lower(character
));
1656 state_reset(SRE_STATE
* state
)
1658 /* FIXME: dynamic! */
1659 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1661 state
->lastmark
= -1;
1662 state
->lastindex
= -1;
1664 state
->repeat
= NULL
;
1666 data_stack_dealloc(state
);
1670 getstring(PyObject
* string
, Py_ssize_t
* p_length
, int* p_charsize
)
1672 /* given a python object, return a data pointer, a length (in
1673 characters), and a character size. return NULL if the object
1674 is not a string (or not compatible) */
1676 PyBufferProcs
*buffer
;
1677 Py_ssize_t size
, bytes
;
1682 /* Unicode objects do not support the buffer API. So, get the data
1683 directly instead. */
1684 if (PyUnicode_Check(string
)) {
1685 ptr
= (void *)PyUnicode_AS_DATA(string
);
1686 *p_length
= PyUnicode_GET_SIZE(string
);
1687 *p_charsize
= sizeof(Py_UNICODE
);
1691 /* get pointer to string buffer */
1693 buffer
= Py_TYPE(string
)->tp_as_buffer
;
1694 if (!buffer
|| !buffer
->bf_getbuffer
||
1695 (*buffer
->bf_getbuffer
)(string
, &view
, PyBUF_SIMPLE
) < 0) {
1696 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1700 /* determine buffer size */
1704 /* Release the buffer immediately --- possibly dangerous
1705 but doing something else would require some re-factoring
1707 PyObject_ReleaseBuffer(string
, &view
);
1710 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1714 /* determine character size */
1715 size
= PyObject_Size(string
);
1717 if (PyString_Check(string
) || bytes
== size
)
1719 #if defined(HAVE_UNICODE)
1720 else if (bytes
== (Py_ssize_t
) (size
* sizeof(Py_UNICODE
)))
1721 charsize
= sizeof(Py_UNICODE
);
1724 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1729 *p_charsize
= charsize
;
1732 PyErr_SetString(PyExc_ValueError
,
1739 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1740 Py_ssize_t start
, Py_ssize_t end
)
1742 /* prepare state object */
1748 memset(state
, 0, sizeof(SRE_STATE
));
1750 state
->lastmark
= -1;
1751 state
->lastindex
= -1;
1753 ptr
= getstring(string
, &length
, &charsize
);
1757 /* adjust boundaries */
1760 else if (start
> length
)
1765 else if (end
> length
)
1768 state
->charsize
= charsize
;
1770 state
->beginning
= ptr
;
1772 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1773 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1776 state
->string
= string
;
1778 state
->endpos
= end
;
1780 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1781 state
->lower
= sre_lower_locale
;
1782 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1783 #if defined(HAVE_UNICODE)
1784 state
->lower
= sre_lower_unicode
;
1786 state
->lower
= sre_lower_locale
;
1789 state
->lower
= sre_lower
;
1795 state_fini(SRE_STATE
* state
)
1797 Py_XDECREF(state
->string
);
1798 data_stack_dealloc(state
);
1801 /* calculate offset from start of string */
1802 #define STATE_OFFSET(state, member)\
1803 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1806 state_getslice(SRE_STATE
* state
, Py_ssize_t index
, PyObject
* string
, int empty
)
1810 index
= (index
- 1) * 2;
1812 if (string
== Py_None
|| index
>= state
->lastmark
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1814 /* want empty string */
1821 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1822 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1825 return PySequence_GetSlice(string
, i
, j
);
1829 pattern_error(int status
)
1832 case SRE_ERROR_RECURSION_LIMIT
:
1835 "maximum recursion limit exceeded"
1838 case SRE_ERROR_MEMORY
:
1841 case SRE_ERROR_INTERRUPTED
:
1842 /* An exception has already been raised, so let it fly */
1845 /* other error codes indicate compiler/engine bugs */
1848 "internal error in regular expression engine"
1854 pattern_dealloc(PatternObject
* self
)
1856 if (self
->weakreflist
!= NULL
)
1857 PyObject_ClearWeakRefs((PyObject
*) self
);
1858 Py_XDECREF(self
->pattern
);
1859 Py_XDECREF(self
->groupindex
);
1860 Py_XDECREF(self
->indexgroup
);
1865 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1871 Py_ssize_t start
= 0;
1872 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1873 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1874 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:match", kwlist
,
1875 &string
, &start
, &end
))
1878 string
= state_init(&state
, self
, string
, start
, end
);
1882 state
.ptr
= state
.start
;
1884 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1886 if (state
.charsize
== 1) {
1887 status
= sre_match(&state
, PatternObject_GetCode(self
));
1889 #if defined(HAVE_UNICODE)
1890 status
= sre_umatch(&state
, PatternObject_GetCode(self
));
1894 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1895 if (PyErr_Occurred())
1900 return pattern_new_match(self
, &state
, status
);
1904 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1910 Py_ssize_t start
= 0;
1911 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1912 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1913 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:search", kwlist
,
1914 &string
, &start
, &end
))
1917 string
= state_init(&state
, self
, string
, start
, end
);
1921 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1923 if (state
.charsize
== 1) {
1924 status
= sre_search(&state
, PatternObject_GetCode(self
));
1926 #if defined(HAVE_UNICODE)
1927 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1931 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1935 if (PyErr_Occurred())
1938 return pattern_new_match(self
, &state
, status
);
1942 call(char* module
, char* function
, PyObject
* args
)
1951 name
= PyUnicode_FromString(module
);
1954 mod
= PyImport_Import(name
);
1958 func
= PyObject_GetAttrString(mod
, function
);
1962 result
= PyObject_CallObject(func
, args
);
1968 #ifdef USE_BUILTIN_COPY
1970 deepcopy(PyObject
** object
, PyObject
* memo
)
1976 PyTuple_Pack(2, *object
, memo
)
1984 return 1; /* success */
1989 join_list(PyObject
* list
, PyObject
* string
)
1991 /* join list elements */
1994 #if PY_VERSION_HEX >= 0x01060000
2000 joiner
= PySequence_GetSlice(string
, 0, 0);
2004 if (PyList_GET_SIZE(list
) == 0) {
2009 #if PY_VERSION_HEX >= 0x01060000
2010 function
= PyObject_GetAttrString(joiner
, "join");
2015 args
= PyTuple_New(1);
2017 Py_DECREF(function
);
2021 PyTuple_SET_ITEM(args
, 0, list
);
2022 result
= PyObject_CallObject(function
, args
);
2023 Py_DECREF(args
); /* also removes list */
2024 Py_DECREF(function
);
2028 PyTuple_Pack(2, list
, joiner
)
2037 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2045 Py_ssize_t start
= 0;
2046 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2047 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
2048 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:findall", kwlist
,
2049 &string
, &start
, &end
))
2052 string
= state_init(&state
, self
, string
, start
, end
);
2056 list
= PyList_New(0);
2062 while (state
.start
<= state
.end
) {
2066 state_reset(&state
);
2068 state
.ptr
= state
.start
;
2070 if (state
.charsize
== 1) {
2071 status
= sre_search(&state
, PatternObject_GetCode(self
));
2073 #if defined(HAVE_UNICODE)
2074 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2078 if (PyErr_Occurred())
2084 pattern_error(status
);
2088 /* don't bother to build a match object */
2089 switch (self
->groups
) {
2091 b
= STATE_OFFSET(&state
, state
.start
);
2092 e
= STATE_OFFSET(&state
, state
.ptr
);
2093 item
= PySequence_GetSlice(string
, b
, e
);
2098 item
= state_getslice(&state
, 1, string
, 1);
2103 item
= PyTuple_New(self
->groups
);
2106 for (i
= 0; i
< self
->groups
; i
++) {
2107 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
2112 PyTuple_SET_ITEM(item
, i
, o
);
2117 status
= PyList_Append(list
, item
);
2122 if (state
.ptr
== state
.start
)
2123 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2125 state
.start
= state
.ptr
;
2139 #if PY_VERSION_HEX >= 0x02020000
2141 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
2147 scanner
= pattern_scanner(pattern
, args
);
2151 search
= PyObject_GetAttrString(scanner
, "search");
2156 iterator
= PyCallIter_New(search
, Py_None
);
2164 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2175 Py_ssize_t maxsplit
= 0;
2176 static char* kwlist
[] = { "source", "maxsplit", NULL
};
2177 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|n:split", kwlist
,
2178 &string
, &maxsplit
))
2181 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2185 list
= PyList_New(0);
2194 while (!maxsplit
|| n
< maxsplit
) {
2196 state_reset(&state
);
2198 state
.ptr
= state
.start
;
2200 if (state
.charsize
== 1) {
2201 status
= sre_search(&state
, PatternObject_GetCode(self
));
2203 #if defined(HAVE_UNICODE)
2204 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2208 if (PyErr_Occurred())
2214 pattern_error(status
);
2218 if (state
.start
== state
.ptr
) {
2219 if (last
== state
.end
)
2221 /* skip one character */
2222 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2226 /* get segment before this match */
2227 item
= PySequence_GetSlice(
2228 string
, STATE_OFFSET(&state
, last
),
2229 STATE_OFFSET(&state
, state
.start
)
2233 status
= PyList_Append(list
, item
);
2238 /* add groups (if any) */
2239 for (i
= 0; i
< self
->groups
; i
++) {
2240 item
= state_getslice(&state
, i
+1, string
, 0);
2243 status
= PyList_Append(list
, item
);
2251 last
= state
.start
= state
.ptr
;
2255 /* get segment following last match (even if empty) */
2256 item
= PySequence_GetSlice(
2257 string
, STATE_OFFSET(&state
, last
), state
.endpos
2261 status
= PyList_Append(list
, item
);
2277 pattern_subx(PatternObject
* self
, PyObject
* ptemplate
, PyObject
* string
,
2278 Py_ssize_t count
, Py_ssize_t subn
)
2291 int filter_is_callable
;
2293 if (PyCallable_Check(ptemplate
)) {
2294 /* sub/subn takes either a function or a template */
2297 filter_is_callable
= 1;
2299 /* if not callable, check if it's a literal string */
2301 ptr
= getstring(ptemplate
, &n
, &bint
);
2305 literal
= sre_literal_template((unsigned char *)ptr
, n
);
2307 #if defined(HAVE_UNICODE)
2308 literal
= sre_uliteral_template((Py_UNICODE
*)ptr
, n
);
2318 filter_is_callable
= 0;
2320 /* not a literal; hand it over to the template compiler */
2322 SRE_PY_MODULE
, "_subx",
2323 PyTuple_Pack(2, self
, ptemplate
)
2327 filter_is_callable
= PyCallable_Check(filter
);
2331 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2337 list
= PyList_New(0);
2346 while (!count
|| n
< count
) {
2348 state_reset(&state
);
2350 state
.ptr
= state
.start
;
2352 if (state
.charsize
== 1) {
2353 status
= sre_search(&state
, PatternObject_GetCode(self
));
2355 #if defined(HAVE_UNICODE)
2356 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2360 if (PyErr_Occurred())
2366 pattern_error(status
);
2370 b
= STATE_OFFSET(&state
, state
.start
);
2371 e
= STATE_OFFSET(&state
, state
.ptr
);
2374 /* get segment before this match */
2375 item
= PySequence_GetSlice(string
, i
, b
);
2378 status
= PyList_Append(list
, item
);
2383 } else if (i
== b
&& i
== e
&& n
> 0)
2384 /* ignore empty match on latest position */
2387 if (filter_is_callable
) {
2388 /* pass match object through filter */
2389 match
= pattern_new_match(self
, &state
, 1);
2392 args
= PyTuple_Pack(1, match
);
2397 item
= PyObject_CallObject(filter
, args
);
2403 /* filter is literal string */
2409 if (item
!= Py_None
) {
2410 status
= PyList_Append(list
, item
);
2421 if (state
.ptr
== state
.start
)
2422 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2424 state
.start
= state
.ptr
;
2428 /* get segment following last match */
2429 if (i
< state
.endpos
) {
2430 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2433 status
= PyList_Append(list
, item
);
2443 /* convert list to single string (also removes list) */
2444 item
= join_list(list
, string
);
2450 return Py_BuildValue("Ni", item
, n
);
2463 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2465 PyObject
* ptemplate
;
2467 Py_ssize_t count
= 0;
2468 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2469 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:sub", kwlist
,
2470 &ptemplate
, &string
, &count
))
2473 return pattern_subx(self
, ptemplate
, string
, count
, 0);
2477 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2479 PyObject
* ptemplate
;
2481 Py_ssize_t count
= 0;
2482 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2483 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:subn", kwlist
,
2484 &ptemplate
, &string
, &count
))
2487 return pattern_subx(self
, ptemplate
, string
, count
, 1);
2491 pattern_copy(PatternObject
* self
, PyObject
*unused
)
2493 #ifdef USE_BUILTIN_COPY
2494 PatternObject
* copy
;
2497 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2501 offset
= offsetof(PatternObject
, groups
);
2503 Py_XINCREF(self
->groupindex
);
2504 Py_XINCREF(self
->indexgroup
);
2505 Py_XINCREF(self
->pattern
);
2507 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2508 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2509 copy
->weakreflist
= NULL
;
2511 return (PyObject
*) copy
;
2513 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2519 pattern_deepcopy(PatternObject
* self
, PyObject
* memo
)
2521 #ifdef USE_BUILTIN_COPY
2522 PatternObject
* copy
;
2524 copy
= (PatternObject
*) pattern_copy(self
);
2528 if (!deepcopy(©
->groupindex
, memo
) ||
2529 !deepcopy(©
->indexgroup
, memo
) ||
2530 !deepcopy(©
->pattern
, memo
)) {
2536 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2541 PyDoc_STRVAR(pattern_match_doc
,
2542 "match(string[, pos[, endpos]]) --> match object or None.\n\
2543 Matches zero or more characters at the beginning of the string");
2545 PyDoc_STRVAR(pattern_search_doc
,
2546 "search(string[, pos[, endpos]]) --> match object or None.\n\
2547 Scan through string looking for a match, and return a corresponding\n\
2548 MatchObject instance. Return None if no position in the string matches.");
2550 PyDoc_STRVAR(pattern_split_doc
,
2551 "split(string[, maxsplit = 0]) --> list.\n\
2552 Split string by the occurrences of pattern.");
2554 PyDoc_STRVAR(pattern_findall_doc
,
2555 "findall(string[, pos[, endpos]]) --> list.\n\
2556 Return a list of all non-overlapping matches of pattern in string.");
2558 PyDoc_STRVAR(pattern_finditer_doc
,
2559 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2560 Return an iterator over all non-overlapping matches for the \n\
2561 RE pattern in string. For each match, the iterator returns a\n\
2564 PyDoc_STRVAR(pattern_sub_doc
,
2565 "sub(repl, string[, count = 0]) --> newstring\n\
2566 Return the string obtained by replacing the leftmost non-overlapping\n\
2567 occurrences of pattern in string by the replacement repl.");
2569 PyDoc_STRVAR(pattern_subn_doc
,
2570 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2571 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2572 the leftmost non-overlapping occurrences of pattern with the\n\
2573 replacement repl.");
2575 PyDoc_STRVAR(pattern_doc
, "Compiled regular expression objects");
2577 static PyMethodDef pattern_methods
[] = {
2578 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
,
2580 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
,
2581 pattern_search_doc
},
2582 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
,
2584 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
,
2586 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
,
2588 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
,
2589 pattern_findall_doc
},
2590 #if PY_VERSION_HEX >= 0x02020000
2591 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
,
2592 pattern_finditer_doc
},
2594 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2595 {"__copy__", (PyCFunction
) pattern_copy
, METH_NOARGS
},
2596 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_O
},
2601 pattern_getattr(PatternObject
* self
, char* name
)
2605 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2613 if (!strcmp(name
, "pattern")) {
2614 Py_INCREF(self
->pattern
);
2615 return self
->pattern
;
2618 if (!strcmp(name
, "flags"))
2619 return Py_BuildValue("i", self
->flags
);
2621 if (!strcmp(name
, "groups"))
2622 return Py_BuildValue("i", self
->groups
);
2624 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2625 Py_INCREF(self
->groupindex
);
2626 return self
->groupindex
;
2629 PyErr_SetString(PyExc_AttributeError
, name
);
2633 static PyTypeObject Pattern_Type
= {
2634 PyVarObject_HEAD_INIT(NULL
, 0)
2635 "_" SRE_MODULE
".SRE_Pattern",
2636 sizeof(PatternObject
), sizeof(SRE_CODE
),
2637 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2639 (getattrfunc
)pattern_getattr
, /*tp_getattr*/
2643 0, /* tp_as_number */
2644 0, /* tp_as_sequence */
2645 0, /* tp_as_mapping */
2649 0, /* tp_getattro */
2650 0, /* tp_setattro */
2651 0, /* tp_as_buffer */
2652 Py_TPFLAGS_DEFAULT
, /* tp_flags */
2653 pattern_doc
, /* tp_doc */
2654 0, /* tp_traverse */
2656 0, /* tp_richcompare */
2657 offsetof(PatternObject
, weakreflist
), /* tp_weaklistoffset */
2661 _compile(PyObject
* self_
, PyObject
* args
)
2663 /* "compile" pattern descriptor to pattern object */
2665 PatternObject
* self
;
2671 Py_ssize_t groups
= 0;
2672 PyObject
* groupindex
= NULL
;
2673 PyObject
* indexgroup
= NULL
;
2674 if (!PyArg_ParseTuple(args
, "OiO!|nOO", &pattern
, &flags
,
2675 &PyList_Type
, &code
, &groups
,
2676 &groupindex
, &indexgroup
))
2679 n
= PyList_GET_SIZE(code
);
2680 /* coverity[ampersand_in_size] */
2681 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
2687 for (i
= 0; i
< n
; i
++) {
2688 PyObject
*o
= PyList_GET_ITEM(code
, i
);
2689 unsigned long value
= PyLong_AsUnsignedLong(o
);
2690 self
->code
[i
] = (SRE_CODE
) value
;
2691 if ((unsigned long) self
->code
[i
] != value
) {
2692 PyErr_SetString(PyExc_OverflowError
,
2693 "regular expression code size limit exceeded");
2698 if (PyErr_Occurred()) {
2704 self
->pattern
= pattern
;
2706 self
->flags
= flags
;
2708 self
->groups
= groups
;
2710 Py_XINCREF(groupindex
);
2711 self
->groupindex
= groupindex
;
2713 Py_XINCREF(indexgroup
);
2714 self
->indexgroup
= indexgroup
;
2716 self
->weakreflist
= NULL
;
2718 return (PyObject
*) self
;
2721 /* -------------------------------------------------------------------- */
2725 match_dealloc(MatchObject
* self
)
2727 Py_XDECREF(self
->regs
);
2728 Py_XDECREF(self
->string
);
2729 Py_DECREF(self
->pattern
);
2734 match_getslice_by_index(MatchObject
* self
, Py_ssize_t index
, PyObject
* def
)
2736 if (index
< 0 || index
>= self
->groups
) {
2737 /* raise IndexError if we were given a bad group number */
2747 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2748 /* return default value if the string or group is undefined */
2753 return PySequence_GetSlice(
2754 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2759 match_getindex(MatchObject
* self
, PyObject
* index
)
2767 if (PyLong_Check(index
))
2768 return PyLong_AsSsize_t(index
);
2772 if (self
->pattern
->groupindex
) {
2773 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2775 if (PyLong_Check(index
))
2776 i
= PyLong_AsSsize_t(index
);
2786 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2788 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2792 match_expand(MatchObject
* self
, PyObject
* ptemplate
)
2794 /* delegate to Python code */
2796 SRE_PY_MODULE
, "_expand",
2797 PyTuple_Pack(3, self
->pattern
, self
, ptemplate
)
2802 match_group(MatchObject
* self
, PyObject
* args
)
2807 size
= PyTuple_GET_SIZE(args
);
2811 result
= match_getslice(self
, Py_False
, Py_None
);
2814 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2817 /* fetch multiple items */
2818 result
= PyTuple_New(size
);
2821 for (i
= 0; i
< size
; i
++) {
2822 PyObject
* item
= match_getslice(
2823 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2829 PyTuple_SET_ITEM(result
, i
, item
);
2837 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2842 PyObject
* def
= Py_None
;
2843 static char* kwlist
[] = { "default", NULL
};
2844 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2847 result
= PyTuple_New(self
->groups
-1);
2851 for (index
= 1; index
< self
->groups
; index
++) {
2853 item
= match_getslice_by_index(self
, index
, def
);
2858 PyTuple_SET_ITEM(result
, index
-1, item
);
2865 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2871 PyObject
* def
= Py_None
;
2872 static char* kwlist
[] = { "default", NULL
};
2873 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2876 result
= PyDict_New();
2877 if (!result
|| !self
->pattern
->groupindex
)
2880 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2884 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2888 key
= PyList_GET_ITEM(keys
, index
);
2891 value
= match_getslice(self
, key
, def
);
2896 status
= PyDict_SetItem(result
, key
, value
);
2913 match_start(MatchObject
* self
, PyObject
* args
)
2917 PyObject
* index_
= NULL
;
2918 if (!PyArg_UnpackTuple(args
, "start", 0, 1, &index_
))
2921 index
= match_getindex(self
, index_
);
2923 if (index
< 0 || index
>= self
->groups
) {
2931 /* mark is -1 if group is undefined */
2932 return Py_BuildValue("i", self
->mark
[index
*2]);
2936 match_end(MatchObject
* self
, PyObject
* args
)
2940 PyObject
* index_
= NULL
;
2941 if (!PyArg_UnpackTuple(args
, "end", 0, 1, &index_
))
2944 index
= match_getindex(self
, index_
);
2946 if (index
< 0 || index
>= self
->groups
) {
2954 /* mark is -1 if group is undefined */
2955 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2959 _pair(Py_ssize_t i1
, Py_ssize_t i2
)
2964 pair
= PyTuple_New(2);
2968 item
= PyLong_FromSsize_t(i1
);
2971 PyTuple_SET_ITEM(pair
, 0, item
);
2973 item
= PyLong_FromSsize_t(i2
);
2976 PyTuple_SET_ITEM(pair
, 1, item
);
2986 match_span(MatchObject
* self
, PyObject
* args
)
2990 PyObject
* index_
= NULL
;
2991 if (!PyArg_UnpackTuple(args
, "span", 0, 1, &index_
))
2994 index
= match_getindex(self
, index_
);
2996 if (index
< 0 || index
>= self
->groups
) {
3004 /* marks are -1 if group is undefined */
3005 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3009 match_regs(MatchObject
* self
)
3015 regs
= PyTuple_New(self
->groups
);
3019 for (index
= 0; index
< self
->groups
; index
++) {
3020 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3025 PyTuple_SET_ITEM(regs
, index
, item
);
3035 match_copy(MatchObject
* self
, PyObject
*unused
)
3037 #ifdef USE_BUILTIN_COPY
3039 Py_ssize_t slots
, offset
;
3041 slots
= 2 * (self
->pattern
->groups
+1);
3043 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
3047 /* this value a constant, but any compiler should be able to
3048 figure that out all by itself */
3049 offset
= offsetof(MatchObject
, string
);
3051 Py_XINCREF(self
->pattern
);
3052 Py_XINCREF(self
->string
);
3053 Py_XINCREF(self
->regs
);
3055 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
3056 sizeof(MatchObject
) + slots
* sizeof(Py_ssize_t
) - offset
);
3058 return (PyObject
*) copy
;
3060 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
3066 match_deepcopy(MatchObject
* self
, PyObject
* memo
)
3068 #ifdef USE_BUILTIN_COPY
3071 copy
= (MatchObject
*) match_copy(self
);
3075 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
3076 !deepcopy(©
->string
, memo
) ||
3077 !deepcopy(©
->regs
, memo
)) {
3083 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
3088 static PyMethodDef match_methods
[] = {
3089 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
3090 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
3091 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
3092 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
3093 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
3094 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
3095 {"expand", (PyCFunction
) match_expand
, METH_O
},
3096 {"__copy__", (PyCFunction
) match_copy
, METH_NOARGS
},
3097 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_O
},
3102 match_getattr(MatchObject
* self
, char* name
)
3106 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
3112 if (!strcmp(name
, "lastindex")) {
3113 if (self
->lastindex
>= 0)
3114 return Py_BuildValue("i", self
->lastindex
);
3119 if (!strcmp(name
, "lastgroup")) {
3120 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
3121 PyObject
* result
= PySequence_GetItem(
3122 self
->pattern
->indexgroup
, self
->lastindex
3132 if (!strcmp(name
, "string")) {
3134 Py_INCREF(self
->string
);
3135 return self
->string
;
3142 if (!strcmp(name
, "regs")) {
3144 Py_INCREF(self
->regs
);
3147 return match_regs(self
);
3150 if (!strcmp(name
, "re")) {
3151 Py_INCREF(self
->pattern
);
3152 return (PyObject
*) self
->pattern
;
3155 if (!strcmp(name
, "pos"))
3156 return Py_BuildValue("i", self
->pos
);
3158 if (!strcmp(name
, "endpos"))
3159 return Py_BuildValue("i", self
->endpos
);
3161 PyErr_SetString(PyExc_AttributeError
, name
);
3165 /* FIXME: implement setattr("string", None) as a special case (to
3166 detach the associated string, if any */
3168 static PyTypeObject Match_Type
= {
3169 PyVarObject_HEAD_INIT(NULL
,0)
3170 "_" SRE_MODULE
".SRE_Match",
3171 sizeof(MatchObject
), sizeof(Py_ssize_t
),
3172 (destructor
)match_dealloc
, /*tp_dealloc*/
3174 (getattrfunc
)match_getattr
/*tp_getattr*/
3178 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
3180 /* create match object (from state object) */
3189 /* create match object (with room for extra group marks) */
3190 /* coverity[ampersand_in_size] */
3191 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
3192 2*(pattern
->groups
+1));
3197 match
->pattern
= pattern
;
3199 Py_INCREF(state
->string
);
3200 match
->string
= state
->string
;
3203 match
->groups
= pattern
->groups
+1;
3205 /* fill in group slices */
3207 base
= (char*) state
->beginning
;
3208 n
= state
->charsize
;
3210 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
3211 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
3213 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
3214 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
3215 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
3216 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
3218 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
3220 match
->pos
= state
->pos
;
3221 match
->endpos
= state
->endpos
;
3223 match
->lastindex
= state
->lastindex
;
3225 return (PyObject
*) match
;
3227 } else if (status
== 0) {
3235 /* internal error */
3236 pattern_error(status
);
3241 /* -------------------------------------------------------------------- */
3242 /* scanner methods (experimental) */
3245 scanner_dealloc(ScannerObject
* self
)
3247 state_fini(&self
->state
);
3248 Py_DECREF(self
->pattern
);
3253 scanner_match(ScannerObject
* self
, PyObject
*unused
)
3255 SRE_STATE
* state
= &self
->state
;
3261 state
->ptr
= state
->start
;
3263 if (state
->charsize
== 1) {
3264 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
));
3266 #if defined(HAVE_UNICODE)
3267 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
));
3270 if (PyErr_Occurred())
3273 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3276 if (status
== 0 || state
->ptr
== state
->start
)
3277 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3279 state
->start
= state
->ptr
;
3286 scanner_search(ScannerObject
* self
, PyObject
*unused
)
3288 SRE_STATE
* state
= &self
->state
;
3294 state
->ptr
= state
->start
;
3296 if (state
->charsize
== 1) {
3297 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
3299 #if defined(HAVE_UNICODE)
3300 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
3303 if (PyErr_Occurred())
3306 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3309 if (status
== 0 || state
->ptr
== state
->start
)
3310 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3312 state
->start
= state
->ptr
;
3317 static PyMethodDef scanner_methods
[] = {
3318 {"match", (PyCFunction
) scanner_match
, METH_NOARGS
},
3319 {"search", (PyCFunction
) scanner_search
, METH_NOARGS
},
3324 scanner_getattr(ScannerObject
* self
, char* name
)
3328 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
3335 if (!strcmp(name
, "pattern")) {
3336 Py_INCREF(self
->pattern
);
3337 return self
->pattern
;
3340 PyErr_SetString(PyExc_AttributeError
, name
);
3344 static PyTypeObject Scanner_Type
= {
3345 PyVarObject_HEAD_INIT(NULL
, 0)
3346 "_" SRE_MODULE
".SRE_Scanner",
3347 sizeof(ScannerObject
), 0,
3348 (destructor
)scanner_dealloc
, /*tp_dealloc*/
3350 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
3354 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
3356 /* create search state object */
3358 ScannerObject
* self
;
3361 Py_ssize_t start
= 0;
3362 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3363 if (!PyArg_ParseTuple(args
, "O|nn:scanner", &string
, &start
, &end
))
3366 /* create scanner object */
3367 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
3371 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
3378 self
->pattern
= (PyObject
*) pattern
;
3380 return (PyObject
*) self
;
3383 static PyMethodDef _functions
[] = {
3384 {"compile", _compile
, METH_VARARGS
},
3385 {"getcodesize", sre_codesize
, METH_NOARGS
},
3386 {"getlower", sre_getlower
, METH_VARARGS
},
3390 PyMODINIT_FUNC
init_sre(void)
3396 /* Initialize object types */
3397 if (PyType_Ready(&Pattern_Type
) < 0)
3399 if (PyType_Ready(&Match_Type
) < 0)
3401 if (PyType_Ready(&Scanner_Type
) < 0)
3404 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
3407 d
= PyModule_GetDict(m
);
3409 x
= PyLong_FromLong(SRE_MAGIC
);
3411 PyDict_SetItemString(d
, "MAGIC", x
);
3415 x
= PyLong_FromLong(sizeof(SRE_CODE
));
3417 PyDict_SetItemString(d
, "CODESIZE", x
);
3421 x
= PyUnicode_FromString(copyright
);
3423 PyDict_SetItemString(d
, "copyright", x
);
3428 #endif /* !defined(SRE_RECURSIVE) */