2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
12 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
14 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
16 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
23 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
24 * 2000-10-24 fl really fixed assert_not; reset groups in findall
25 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
27 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
28 * 2001-01-16 fl fixed memory leak in pattern destructor
29 * 2001-03-20 fl lots of fixes for 2.1b2
30 * 2001-04-15 fl export copyright as Python attribute, not global
31 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
33 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
34 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
35 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
36 * 2001-10-21 fl added sub/subn primitive
37 * 2001-10-22 fl check for literal sub/subn templates
38 * 2001-10-24 fl added finditer primitive (for 2.2 only)
39 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
41 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
43 * This version of the SRE library can be redistributed under CNRI's
44 * Python 1.6 license. For any other use, please contact Secret Labs
45 * AB (info@pythonware.com).
47 * Portions of this engine have been developed in cooperation with
48 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
49 * other compatibility work.
54 static char copyright
[] =
55 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
58 #include "structmember.h" /* offsetof */
64 /* name of this module, minus the leading underscore */
65 #if !defined(SRE_MODULE)
66 #define SRE_MODULE "sre"
69 /* defining this one enables tracing */
72 #if PY_VERSION_HEX >= 0x01060000
73 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
74 /* defining this enables unicode support (default under 1.6a1 and later) */
79 /* -------------------------------------------------------------------- */
80 /* optional features */
82 /* prevent run-away recursion (bad patterns on long strings) */
84 #if !defined(USE_STACKCHECK)
85 #if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
86 /* require smaller recursion limit for a number of 64-bit platforms:
87 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
88 /* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
89 #define USE_RECURSION_LIMIT 7500
91 #define USE_RECURSION_LIMIT 10000
95 /* enables fast searching */
96 #define USE_FAST_SEARCH
98 /* enables aggressive inlining (always on for Visual C) */
101 /* enables copy/deepcopy handling (work in progress) */
102 #undef USE_BUILTIN_COPY
104 #if PY_VERSION_HEX < 0x01060000
105 #define PyObject_DEL(op) PyMem_DEL((op))
108 /* -------------------------------------------------------------------- */
110 #if defined(_MSC_VER)
111 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
112 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
113 /* fastest possible local call under MSVC */
114 #define LOCAL(type) static __inline type __fastcall
115 #elif defined(USE_INLINE)
116 #define LOCAL(type) static inline type
118 #define LOCAL(type) static type
122 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
123 #define SRE_ERROR_STATE -2 /* illegal state */
124 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
125 #define SRE_ERROR_MEMORY -9 /* out of memory */
128 #define TRACE(v) printf v
133 /* -------------------------------------------------------------------- */
134 /* search engine state */
136 /* default character predicates (run sre_chars.py to regenerate tables) */
138 #define SRE_DIGIT_MASK 1
139 #define SRE_SPACE_MASK 2
140 #define SRE_LINEBREAK_MASK 4
141 #define SRE_ALNUM_MASK 8
142 #define SRE_WORD_MASK 16
144 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
146 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
147 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
148 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
149 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
150 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
151 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
152 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
154 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
155 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
156 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
157 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
158 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
159 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
160 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
161 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
162 120, 121, 122, 123, 124, 125, 126, 127 };
164 #define SRE_IS_DIGIT(ch)\
165 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
166 #define SRE_IS_SPACE(ch)\
167 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
168 #define SRE_IS_LINEBREAK(ch)\
169 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
170 #define SRE_IS_ALNUM(ch)\
171 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
172 #define SRE_IS_WORD(ch)\
173 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
175 static unsigned int sre_lower(unsigned int ch
)
177 return ((ch
) < 128 ? sre_char_lower
[ch
] : ch
);
180 /* locale-specific character predicates */
182 #define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
183 #define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
184 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
185 #define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
186 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
188 static unsigned int sre_lower_locale(unsigned int ch
)
190 return ((ch
) < 256 ? tolower((ch
)) : ch
);
193 /* unicode-specific character predicates */
195 #if defined(HAVE_UNICODE)
197 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
198 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
199 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
200 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
201 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
203 static unsigned int sre_lower_unicode(unsigned int ch
)
205 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
211 sre_category(SRE_CODE category
, unsigned int ch
)
215 case SRE_CATEGORY_DIGIT
:
216 return SRE_IS_DIGIT(ch
);
217 case SRE_CATEGORY_NOT_DIGIT
:
218 return !SRE_IS_DIGIT(ch
);
219 case SRE_CATEGORY_SPACE
:
220 return SRE_IS_SPACE(ch
);
221 case SRE_CATEGORY_NOT_SPACE
:
222 return !SRE_IS_SPACE(ch
);
223 case SRE_CATEGORY_WORD
:
224 return SRE_IS_WORD(ch
);
225 case SRE_CATEGORY_NOT_WORD
:
226 return !SRE_IS_WORD(ch
);
227 case SRE_CATEGORY_LINEBREAK
:
228 return SRE_IS_LINEBREAK(ch
);
229 case SRE_CATEGORY_NOT_LINEBREAK
:
230 return !SRE_IS_LINEBREAK(ch
);
232 case SRE_CATEGORY_LOC_WORD
:
233 return SRE_LOC_IS_WORD(ch
);
234 case SRE_CATEGORY_LOC_NOT_WORD
:
235 return !SRE_LOC_IS_WORD(ch
);
237 #if defined(HAVE_UNICODE)
238 case SRE_CATEGORY_UNI_DIGIT
:
239 return SRE_UNI_IS_DIGIT(ch
);
240 case SRE_CATEGORY_UNI_NOT_DIGIT
:
241 return !SRE_UNI_IS_DIGIT(ch
);
242 case SRE_CATEGORY_UNI_SPACE
:
243 return SRE_UNI_IS_SPACE(ch
);
244 case SRE_CATEGORY_UNI_NOT_SPACE
:
245 return !SRE_UNI_IS_SPACE(ch
);
246 case SRE_CATEGORY_UNI_WORD
:
247 return SRE_UNI_IS_WORD(ch
);
248 case SRE_CATEGORY_UNI_NOT_WORD
:
249 return !SRE_UNI_IS_WORD(ch
);
250 case SRE_CATEGORY_UNI_LINEBREAK
:
251 return SRE_UNI_IS_LINEBREAK(ch
);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
253 return !SRE_UNI_IS_LINEBREAK(ch
);
255 case SRE_CATEGORY_UNI_DIGIT
:
256 return SRE_IS_DIGIT(ch
);
257 case SRE_CATEGORY_UNI_NOT_DIGIT
:
258 return !SRE_IS_DIGIT(ch
);
259 case SRE_CATEGORY_UNI_SPACE
:
260 return SRE_IS_SPACE(ch
);
261 case SRE_CATEGORY_UNI_NOT_SPACE
:
262 return !SRE_IS_SPACE(ch
);
263 case SRE_CATEGORY_UNI_WORD
:
264 return SRE_LOC_IS_WORD(ch
);
265 case SRE_CATEGORY_UNI_NOT_WORD
:
266 return !SRE_LOC_IS_WORD(ch
);
267 case SRE_CATEGORY_UNI_LINEBREAK
:
268 return SRE_IS_LINEBREAK(ch
);
269 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
270 return !SRE_IS_LINEBREAK(ch
);
279 mark_fini(SRE_STATE
* state
)
281 if (state
->mark_stack
) {
282 free(state
->mark_stack
);
283 state
->mark_stack
= NULL
;
285 state
->mark_stack_size
= state
->mark_stack_base
= 0;
289 mark_save(SRE_STATE
* state
, int lo
, int hi
)
293 int minsize
, newsize
;
298 size
= (hi
- lo
) + 1;
300 newsize
= state
->mark_stack_size
;
301 minsize
= state
->mark_stack_base
+ size
;
303 if (newsize
< minsize
) {
304 /* create new stack */
307 if (newsize
< minsize
)
309 TRACE(("allocate stack %d\n", newsize
));
310 stack
= malloc(sizeof(void*) * newsize
);
313 while (newsize
< minsize
)
315 TRACE(("grow stack to %d\n", newsize
));
316 stack
= realloc(state
->mark_stack
, sizeof(void*) * newsize
);
320 return SRE_ERROR_MEMORY
;
322 state
->mark_stack
= stack
;
323 state
->mark_stack_size
= newsize
;
326 TRACE(("copy %d:%d to %d (%d)\n", lo
, hi
, state
->mark_stack_base
, size
));
328 memcpy(state
->mark_stack
+ state
->mark_stack_base
, state
->mark
+ lo
,
329 size
* sizeof(void*));
331 state
->mark_stack_base
+= size
;
337 mark_restore(SRE_STATE
* state
, int lo
, int hi
)
344 size
= (hi
- lo
) + 1;
346 state
->mark_stack_base
-= size
;
348 TRACE(("copy %d:%d from %d\n", lo
, hi
, state
->mark_stack_base
));
350 memcpy(state
->mark
+ lo
, state
->mark_stack
+ state
->mark_stack_base
,
351 size
* sizeof(void*));
356 /* generate 8-bit version */
358 #define SRE_CHAR unsigned char
359 #define SRE_AT sre_at
360 #define SRE_COUNT sre_count
361 #define SRE_CHARSET sre_charset
362 #define SRE_INFO sre_info
363 #define SRE_MATCH sre_match
364 #define SRE_SEARCH sre_search
365 #define SRE_LITERAL_TEMPLATE sre_literal_template
367 #if defined(HAVE_UNICODE)
369 #define SRE_RECURSIVE
373 #undef SRE_LITERAL_TEMPLATE
382 /* generate 16-bit unicode version */
384 #define SRE_CHAR Py_UNICODE
385 #define SRE_AT sre_uat
386 #define SRE_COUNT sre_ucount
387 #define SRE_CHARSET sre_ucharset
388 #define SRE_INFO sre_uinfo
389 #define SRE_MATCH sre_umatch
390 #define SRE_SEARCH sre_usearch
391 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
394 #endif /* SRE_RECURSIVE */
396 /* -------------------------------------------------------------------- */
397 /* String matching engine */
399 /* the following section is compiled twice, with different character
403 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
405 /* check if pointer is at given position */
411 case SRE_AT_BEGINNING
:
412 case SRE_AT_BEGINNING_STRING
:
413 return ((void*) ptr
== state
->beginning
);
415 case SRE_AT_BEGINNING_LINE
:
416 return ((void*) ptr
== state
->beginning
||
417 SRE_IS_LINEBREAK((int) ptr
[-1]));
420 return (((void*) (ptr
+1) == state
->end
&&
421 SRE_IS_LINEBREAK((int) ptr
[0])) ||
422 ((void*) ptr
== state
->end
));
424 case SRE_AT_END_LINE
:
425 return ((void*) ptr
== state
->end
||
426 SRE_IS_LINEBREAK((int) ptr
[0]));
428 case SRE_AT_END_STRING
:
429 return ((void*) ptr
== state
->end
);
431 case SRE_AT_BOUNDARY
:
432 if (state
->beginning
== state
->end
)
434 that
= ((void*) ptr
> state
->beginning
) ?
435 SRE_IS_WORD((int) ptr
[-1]) : 0;
436 this = ((void*) ptr
< state
->end
) ?
437 SRE_IS_WORD((int) ptr
[0]) : 0;
440 case SRE_AT_NON_BOUNDARY
:
441 if (state
->beginning
== state
->end
)
443 that
= ((void*) ptr
> state
->beginning
) ?
444 SRE_IS_WORD((int) ptr
[-1]) : 0;
445 this = ((void*) ptr
< state
->end
) ?
446 SRE_IS_WORD((int) ptr
[0]) : 0;
449 case SRE_AT_LOC_BOUNDARY
:
450 if (state
->beginning
== state
->end
)
452 that
= ((void*) ptr
> state
->beginning
) ?
453 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
454 this = ((void*) ptr
< state
->end
) ?
455 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
458 case SRE_AT_LOC_NON_BOUNDARY
:
459 if (state
->beginning
== state
->end
)
461 that
= ((void*) ptr
> state
->beginning
) ?
462 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
463 this = ((void*) ptr
< state
->end
) ?
464 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
467 #if defined(HAVE_UNICODE)
468 case SRE_AT_UNI_BOUNDARY
:
469 if (state
->beginning
== state
->end
)
471 that
= ((void*) ptr
> state
->beginning
) ?
472 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
473 this = ((void*) ptr
< state
->end
) ?
474 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
477 case SRE_AT_UNI_NON_BOUNDARY
:
478 if (state
->beginning
== state
->end
)
480 that
= ((void*) ptr
> state
->beginning
) ?
481 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
482 this = ((void*) ptr
< state
->end
) ?
483 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
493 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
495 /* check if character is a member of the given set */
503 /* <LITERAL> <code> */
510 /* <RANGE> <lower> <upper> */
511 if (set
[0] <= ch
&& ch
<= set
[1])
517 /* <CHARSET> <bitmap> (16 bits per code word) */
518 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
523 case SRE_OP_BIGCHARSET
:
524 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
528 block
= ((unsigned char*)set
)[ch
>> 8];
530 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
536 case SRE_OP_CATEGORY
:
537 /* <CATEGORY> <code> */
538 if (sre_category(set
[0], (int) ch
))
551 /* internal error -- there's not much we can do about it
552 here, so let's just pretend it didn't match... */
558 LOCAL(int) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
, int level
);
561 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, int maxcount
, int level
)
564 SRE_CHAR
* ptr
= state
->ptr
;
565 SRE_CHAR
* end
= state
->end
;
569 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
570 end
= ptr
+ maxcount
;
572 switch (pattern
[0]) {
575 /* repeated dot wildcard. */
576 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
577 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
582 /* repeated dot wildcare. skip to the end of the target
583 string, and backtrack from there */
584 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
589 /* repeated literal */
591 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
592 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
596 case SRE_OP_LITERAL_IGNORE
:
597 /* repeated literal */
599 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
600 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
604 case SRE_OP_NOT_LITERAL
:
605 /* repeated non-literal */
607 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
608 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
612 case SRE_OP_NOT_LITERAL_IGNORE
:
613 /* repeated non-literal */
615 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
616 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
622 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
623 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
628 /* repeated single character pattern */
629 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
630 while ((SRE_CHAR
*) state
->ptr
< end
) {
631 i
= SRE_MATCH(state
, pattern
, level
);
637 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
638 (SRE_CHAR
*) state
->ptr
- ptr
));
639 return (SRE_CHAR
*) state
->ptr
- ptr
;
642 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
643 return ptr
- (SRE_CHAR
*) state
->ptr
;
646 #if 0 /* not used in this release */
648 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
650 /* check if an SRE_OP_INFO block matches at the current position.
651 returns the number of SRE_CODE objects to skip if successful, 0
654 SRE_CHAR
* end
= state
->end
;
655 SRE_CHAR
* ptr
= state
->ptr
;
658 /* check minimal length */
659 if (pattern
[3] && (end
- ptr
) < pattern
[3])
662 /* check known prefix */
663 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
664 /* <length> <skip> <prefix data> <overlap data> */
665 for (i
= 0; i
< pattern
[5]; i
++)
666 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
668 return pattern
[0] + 2 * pattern
[6];
675 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
, int level
)
677 /* check if string matches the given pattern. returns <0 for
678 error, 0 for failure, and 1 for success */
680 SRE_CHAR
* end
= state
->end
;
681 SRE_CHAR
* ptr
= state
->ptr
;
687 SRE_REPEAT rep
; /* FIXME: <fl> allocate in STATE instead */
689 TRACE(("|%p|%p|ENTER %d\n", pattern
, ptr
, level
));
691 #if defined(USE_STACKCHECK)
692 if (level
% 10 == 0 && PyOS_CheckStack())
693 return SRE_ERROR_RECURSION_LIMIT
;
696 #if defined(USE_RECURSION_LIMIT)
697 if (level
> USE_RECURSION_LIMIT
)
698 return SRE_ERROR_RECURSION_LIMIT
;
701 if (pattern
[0] == SRE_OP_INFO
) {
702 /* optimization info block */
703 /* <INFO> <1=skip> <2=flags> <3=min> ... */
704 if (pattern
[3] && (end
- ptr
) < pattern
[3]) {
705 TRACE(("reject (got %d chars, need %d)\n",
706 (end
- ptr
), pattern
[3]));
709 pattern
+= pattern
[1] + 1;
714 switch (*pattern
++) {
717 /* immediate failure */
718 TRACE(("|%p|%p|FAILURE\n", pattern
, ptr
));
723 TRACE(("|%p|%p|SUCCESS\n", pattern
, ptr
));
728 /* match at given position */
730 TRACE(("|%p|%p|AT %d\n", pattern
, ptr
, *pattern
));
731 if (!SRE_AT(state
, ptr
, *pattern
))
736 case SRE_OP_CATEGORY
:
737 /* match at given category */
738 /* <CATEGORY> <code> */
739 TRACE(("|%p|%p|CATEGORY %d\n", pattern
, ptr
, *pattern
));
740 if (ptr
>= end
|| !sre_category(pattern
[0], ptr
[0]))
747 /* match literal string */
748 /* <LITERAL> <code> */
749 TRACE(("|%p|%p|LITERAL %d\n", pattern
, ptr
, *pattern
));
750 if (ptr
>= end
|| (SRE_CODE
) ptr
[0] != pattern
[0])
756 case SRE_OP_NOT_LITERAL
:
757 /* match anything that is not literal character */
758 /* <NOT_LITERAL> <code> */
759 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern
, ptr
, *pattern
));
760 if (ptr
>= end
|| (SRE_CODE
) ptr
[0] == pattern
[0])
767 /* match anything (except a newline) */
769 TRACE(("|%p|%p|ANY\n", pattern
, ptr
));
770 if (ptr
>= end
|| SRE_IS_LINEBREAK(ptr
[0]))
778 TRACE(("|%p|%p|ANY_ALL\n", pattern
, ptr
));
785 /* match set member (or non_member) */
786 /* <IN> <skip> <set> */
787 TRACE(("|%p|%p|IN\n", pattern
, ptr
));
788 if (ptr
>= end
|| !SRE_CHARSET(pattern
+ 1, *ptr
))
790 pattern
+= pattern
[0];
794 case SRE_OP_GROUPREF
:
795 /* match backreference */
796 TRACE(("|%p|%p|GROUPREF %d\n", pattern
, ptr
, pattern
[0]));
799 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[i
+i
];
800 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[i
+i
+1];
801 if (!p
|| !e
|| e
< p
)
804 if (ptr
>= end
|| *ptr
!= *p
)
812 case SRE_OP_GROUPREF_IGNORE
:
813 /* match backreference */
814 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern
, ptr
, pattern
[0]));
817 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[i
+i
];
818 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[i
+i
+1];
819 if (!p
|| !e
|| e
< p
)
823 state
->lower(*ptr
) != state
->lower(*p
))
831 case SRE_OP_LITERAL_IGNORE
:
832 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern
, ptr
, pattern
[0]));
834 state
->lower(*ptr
) != state
->lower(*pattern
))
840 case SRE_OP_NOT_LITERAL_IGNORE
:
841 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, *pattern
));
843 state
->lower(*ptr
) == state
->lower(*pattern
))
849 case SRE_OP_IN_IGNORE
:
850 TRACE(("|%p|%p|IN_IGNORE\n", pattern
, ptr
));
852 || !SRE_CHARSET(pattern
+ 1, (SRE_CODE
) state
->lower(*ptr
)))
854 pattern
+= pattern
[0];
861 TRACE(("|%p|%p|MARK %d\n", pattern
, ptr
, pattern
[0]));
864 state
->lastindex
= i
/2 + 1;
865 if (i
> state
->lastmark
)
867 state
->mark
[i
] = ptr
;
874 /* <JUMP> <offset> */
875 TRACE(("|%p|%p|JUMP %d\n", pattern
, ptr
, pattern
[0]));
876 pattern
+= pattern
[0];
880 /* assert subpattern */
881 /* <ASSERT> <skip> <back> <pattern> */
882 TRACE(("|%p|%p|ASSERT %d\n", pattern
, ptr
, pattern
[1]));
883 state
->ptr
= ptr
- pattern
[1];
884 if (state
->ptr
< state
->beginning
)
886 i
= SRE_MATCH(state
, pattern
+ 2, level
+ 1);
889 pattern
+= pattern
[0];
892 case SRE_OP_ASSERT_NOT
:
893 /* assert not subpattern */
894 /* <ASSERT_NOT> <skip> <back> <pattern> */
895 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern
, ptr
, pattern
[1]));
896 state
->ptr
= ptr
- pattern
[1];
897 if (state
->ptr
>= state
->beginning
) {
898 i
= SRE_MATCH(state
, pattern
+ 2, level
+ 1);
904 pattern
+= pattern
[0];
909 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
910 TRACE(("|%p|%p|BRANCH\n", pattern
, ptr
));
911 lastmark
= state
->lastmark
;
912 for (; pattern
[0]; pattern
+= pattern
[0]) {
913 if (pattern
[1] == SRE_OP_LITERAL
&&
914 (ptr
>= end
|| (SRE_CODE
) *ptr
!= pattern
[2]))
916 if (pattern
[1] == SRE_OP_IN
&&
917 (ptr
>= end
|| !SRE_CHARSET(pattern
+ 3, (SRE_CODE
) *ptr
)))
920 i
= SRE_MATCH(state
, pattern
+ 1, level
+ 1);
923 if (state
->lastmark
> lastmark
) {
925 state
->mark
+ lastmark
+ 1, 0,
926 (state
->lastmark
- lastmark
) * sizeof(void*)
928 state
->lastmark
= lastmark
;
933 case SRE_OP_REPEAT_ONE
:
934 /* match repeated sequence (maximizing regexp) */
936 /* this operator only works if the repeated item is
937 exactly one character wide, and we're not already
938 collecting backtracking points. for other cases,
939 use the MAX_REPEAT operator */
941 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
943 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern
, ptr
,
944 pattern
[1], pattern
[2]));
946 if (ptr
+ pattern
[1] > end
)
947 return 0; /* cannot match */
951 count
= SRE_COUNT(state
, pattern
+ 3, pattern
[2], level
+ 1);
957 /* when we arrive here, count contains the number of
958 matches, and ptr points to the tail of the target
959 string. check if the rest of the pattern matches,
960 and backtrack if not. */
962 if (count
< (int) pattern
[1])
965 if (pattern
[pattern
[0]] == SRE_OP_SUCCESS
) {
966 /* tail is empty. we're finished */
970 } else if (pattern
[pattern
[0]] == SRE_OP_LITERAL
) {
971 /* tail starts with a literal. skip positions where
972 the rest of the pattern cannot possibly match */
973 chr
= pattern
[pattern
[0]+1];
975 while (count
>= (int) pattern
[1] &&
976 (ptr
>= end
|| *ptr
!= chr
)) {
980 if (count
< (int) pattern
[1])
983 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
992 lastmark
= state
->lastmark
;
993 while (count
>= (int) pattern
[1]) {
995 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1000 if (state
->lastmark
> lastmark
) {
1002 state
->mark
+ lastmark
+ 1, 0,
1003 (state
->lastmark
- lastmark
) * sizeof(void*)
1005 state
->lastmark
= lastmark
;
1012 /* create repeat context. all the hard work is done
1013 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1014 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1015 TRACE(("|%p|%p|REPEAT %d %d\n", pattern
, ptr
,
1016 pattern
[1], pattern
[2]));
1019 rep
.pattern
= pattern
;
1021 /* install new repeat context */
1022 rep
.prev
= state
->repeat
;
1023 state
->repeat
= &rep
;
1026 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1028 state
->repeat
= rep
.prev
;
1032 case SRE_OP_MAX_UNTIL
:
1033 /* maximizing repeat */
1034 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1036 /* FIXME: we probably need to deal with zero-width
1037 matches in here... */
1041 return SRE_ERROR_STATE
;
1045 count
= rp
->count
+ 1;
1047 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern
, ptr
, count
));
1049 if (count
< rp
->pattern
[1]) {
1050 /* not enough matches */
1053 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1056 rp
->count
= count
- 1;
1061 if (count
< rp
->pattern
[2] || rp
->pattern
[2] == 65535) {
1062 /* we may have enough matches, but if we can
1063 match another item, do so */
1065 lastmark
= state
->lastmark
;
1066 i
= mark_save(state
, 0, lastmark
);
1070 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1073 i
= mark_restore(state
, 0, lastmark
);
1074 state
->lastmark
= lastmark
;
1077 rp
->count
= count
- 1;
1081 /* cannot match more repeated items here. make sure the
1083 state
->repeat
= rp
->prev
;
1084 i
= SRE_MATCH(state
, pattern
, level
+ 1);
1091 case SRE_OP_MIN_UNTIL
:
1092 /* minimizing repeat */
1093 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1097 return SRE_ERROR_STATE
;
1099 count
= rp
->count
+ 1;
1101 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern
, ptr
, count
,
1106 if (count
< rp
->pattern
[1]) {
1107 /* not enough matches */
1110 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1113 rp
->count
= count
-1;
1118 /* see if the tail matches */
1119 state
->repeat
= rp
->prev
;
1120 i
= SRE_MATCH(state
, pattern
, level
+ 1);
1127 if (count
>= rp
->pattern
[2] && rp
->pattern
[2] != 65535)
1132 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1135 rp
->count
= count
- 1;
1140 TRACE(("|%p|%p|UNKNOWN %d\n", pattern
, ptr
, pattern
[-1]));
1141 return SRE_ERROR_ILLEGAL
;
1145 /* can't end up here */
1146 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
1150 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1152 SRE_CHAR
* ptr
= state
->start
;
1153 SRE_CHAR
* end
= state
->end
;
1156 int prefix_skip
= 0;
1157 SRE_CODE
* prefix
= NULL
;
1158 SRE_CODE
* charset
= NULL
;
1159 SRE_CODE
* overlap
= NULL
;
1162 if (pattern
[0] == SRE_OP_INFO
) {
1163 /* optimization info block */
1164 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1168 if (pattern
[3] > 0) {
1169 /* adjust end point (but make sure we leave at least one
1170 character in there, so literal search will work) */
1171 end
-= pattern
[3]-1;
1176 if (flags
& SRE_INFO_PREFIX
) {
1177 /* pattern starts with a known prefix */
1178 /* <length> <skip> <prefix data> <overlap data> */
1179 prefix_len
= pattern
[5];
1180 prefix_skip
= pattern
[6];
1181 prefix
= pattern
+ 7;
1182 overlap
= prefix
+ prefix_len
- 1;
1183 } else if (flags
& SRE_INFO_CHARSET
)
1184 /* pattern starts with a character from a known set */
1186 charset
= pattern
+ 5;
1188 pattern
+= 1 + pattern
[1];
1191 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1192 TRACE(("charset = %p\n", charset
));
1194 #if defined(USE_FAST_SEARCH)
1195 if (prefix_len
> 1) {
1196 /* pattern starts with a known prefix. use the overlap
1197 table to skip forward as fast as we possibly can */
1202 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1208 if (++i
== prefix_len
) {
1209 /* found a potential match */
1210 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1211 state
->start
= ptr
+ 1 - prefix_len
;
1212 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1213 if (flags
& SRE_INFO_LITERAL
)
1214 return 1; /* we got all of it */
1215 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
, 1);
1218 /* close but no cigar -- try again */
1231 if (pattern
[0] == SRE_OP_LITERAL
) {
1232 /* pattern starts with a literal character. this is used
1233 for short prefixes, and if fast search is disabled */
1234 SRE_CODE chr
= pattern
[1];
1237 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1241 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1244 if (flags
& SRE_INFO_LITERAL
)
1245 return 1; /* we got all of it */
1246 status
= SRE_MATCH(state
, pattern
+ 2, 1);
1250 } else if (charset
) {
1251 /* pattern starts with a character from a known set */
1254 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1258 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1261 status
= SRE_MATCH(state
, pattern
, 1);
1268 while (ptr
<= end
) {
1269 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1270 state
->start
= state
->ptr
= ptr
++;
1271 status
= SRE_MATCH(state
, pattern
, 1);
1280 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, int len
)
1282 /* check if given string is a literal template (i.e. no escapes) */
1289 #if !defined(SRE_RECURSIVE)
1291 /* -------------------------------------------------------------------- */
1292 /* factories and destructors */
1294 /* see sre.h for object declarations */
1296 static PyTypeObject Pattern_Type
;
1297 static PyTypeObject Match_Type
;
1298 static PyTypeObject Scanner_Type
;
1301 _compile(PyObject
* self_
, PyObject
* args
)
1303 /* "compile" pattern descriptor to pattern object */
1305 PatternObject
* self
;
1312 PyObject
* groupindex
= NULL
;
1313 PyObject
* indexgroup
= NULL
;
1314 if (!PyArg_ParseTuple(args
, "OiO!|iOO", &pattern
, &flags
,
1315 &PyList_Type
, &code
, &groups
,
1316 &groupindex
, &indexgroup
))
1319 n
= PyList_GET_SIZE(code
);
1321 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
1327 for (i
= 0; i
< n
; i
++) {
1328 PyObject
*o
= PyList_GET_ITEM(code
, i
);
1329 self
->code
[i
] = (SRE_CODE
) PyInt_AsLong(o
);
1332 if (PyErr_Occurred()) {
1338 self
->pattern
= pattern
;
1340 self
->flags
= flags
;
1342 self
->groups
= groups
;
1344 Py_XINCREF(groupindex
);
1345 self
->groupindex
= groupindex
;
1347 Py_XINCREF(indexgroup
);
1348 self
->indexgroup
= indexgroup
;
1350 return (PyObject
*) self
;
1354 sre_codesize(PyObject
* self
, PyObject
* args
)
1356 return Py_BuildValue("i", sizeof(SRE_CODE
));
1360 sre_getlower(PyObject
* self
, PyObject
* args
)
1362 int character
, flags
;
1363 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1365 if (flags
& SRE_FLAG_LOCALE
)
1366 return Py_BuildValue("i", sre_lower_locale(character
));
1367 if (flags
& SRE_FLAG_UNICODE
)
1368 #if defined(HAVE_UNICODE)
1369 return Py_BuildValue("i", sre_lower_unicode(character
));
1371 return Py_BuildValue("i", sre_lower_locale(character
));
1373 return Py_BuildValue("i", sre_lower(character
));
1377 state_reset(SRE_STATE
* state
)
1379 state
->lastmark
= 0;
1381 /* FIXME: dynamic! */
1382 memset(state
->mark
, 0, sizeof(*state
->mark
) * SRE_MARK_SIZE
);
1384 state
->lastindex
= -1;
1386 state
->repeat
= NULL
;
1392 getstring(PyObject
* string
, int* p_length
, int* p_charsize
)
1394 /* given a python object, return a data pointer, a length (in
1395 characters), and a character size. return NULL if the object
1396 is not a string (or not compatible) */
1398 PyBufferProcs
*buffer
;
1399 int size
, bytes
, charsize
;
1402 #if defined(HAVE_UNICODE)
1403 if (PyUnicode_Check(string
)) {
1404 /* unicode strings doesn't always support the buffer interface */
1405 ptr
= (void*) PyUnicode_AS_DATA(string
);
1406 bytes
= PyUnicode_GET_DATA_SIZE(string
);
1407 size
= PyUnicode_GET_SIZE(string
);
1408 charsize
= sizeof(Py_UNICODE
);
1413 /* get pointer to string buffer */
1414 buffer
= string
->ob_type
->tp_as_buffer
;
1415 if (!buffer
|| !buffer
->bf_getreadbuffer
|| !buffer
->bf_getsegcount
||
1416 buffer
->bf_getsegcount(string
, NULL
) != 1) {
1417 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1421 /* determine buffer size */
1422 bytes
= buffer
->bf_getreadbuffer(string
, 0, &ptr
);
1424 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1428 /* determine character size */
1429 #if PY_VERSION_HEX >= 0x01060000
1430 size
= PyObject_Size(string
);
1432 size
= PyObject_Length(string
);
1435 if (PyString_Check(string
) || bytes
== size
)
1437 #if defined(HAVE_UNICODE)
1438 else if (bytes
== (int) (size
* sizeof(Py_UNICODE
)))
1439 charsize
= sizeof(Py_UNICODE
);
1442 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1446 #if defined(HAVE_UNICODE)
1451 *p_charsize
= charsize
;
1457 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1460 /* prepare state object */
1466 memset(state
, 0, sizeof(SRE_STATE
));
1468 state
->lastindex
= -1;
1470 ptr
= getstring(string
, &length
, &charsize
);
1474 /* adjust boundaries */
1477 else if (start
> length
)
1482 else if (end
> length
)
1485 state
->charsize
= charsize
;
1487 state
->beginning
= ptr
;
1489 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1490 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1493 state
->string
= string
;
1495 state
->endpos
= end
;
1497 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1498 state
->lower
= sre_lower_locale
;
1499 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1500 #if defined(HAVE_UNICODE)
1501 state
->lower
= sre_lower_unicode
;
1503 state
->lower
= sre_lower_locale
;
1506 state
->lower
= sre_lower
;
1512 state_fini(SRE_STATE
* state
)
1514 Py_XDECREF(state
->string
);
1518 /* calculate offset from start of string */
1519 #define STATE_OFFSET(state, member)\
1520 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1523 state_getslice(SRE_STATE
* state
, int index
, PyObject
* string
, int empty
)
1527 index
= (index
- 1) * 2;
1529 if (string
== Py_None
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1531 /* want empty string */
1538 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1539 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1542 return PySequence_GetSlice(string
, i
, j
);
1546 pattern_error(int status
)
1549 case SRE_ERROR_RECURSION_LIMIT
:
1552 "maximum recursion limit exceeded"
1555 case SRE_ERROR_MEMORY
:
1559 /* other error codes indicate compiler/engine bugs */
1562 "internal error in regular expression engine"
1568 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
1570 /* create match object (from state object) */
1579 /* create match object (with room for extra group marks) */
1580 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
1581 2*(pattern
->groups
+1));
1586 match
->pattern
= pattern
;
1588 Py_INCREF(state
->string
);
1589 match
->string
= state
->string
;
1592 match
->groups
= pattern
->groups
+1;
1594 /* fill in group slices */
1596 base
= (char*) state
->beginning
;
1597 n
= state
->charsize
;
1599 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
1600 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
1602 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
1603 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
1604 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
1605 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
1607 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
1609 match
->pos
= state
->pos
;
1610 match
->endpos
= state
->endpos
;
1612 match
->lastindex
= state
->lastindex
;
1614 return (PyObject
*) match
;
1616 } else if (status
== 0) {
1624 /* internal error */
1625 pattern_error(status
);
1630 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
1632 /* create search state object */
1634 ScannerObject
* self
;
1639 if (!PyArg_ParseTuple(args
, "O|ii:scanner", &string
, &start
, &end
))
1642 /* create scanner object */
1643 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
1647 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
1654 self
->pattern
= (PyObject
*) pattern
;
1656 return (PyObject
*) self
;
1660 pattern_dealloc(PatternObject
* self
)
1662 Py_XDECREF(self
->pattern
);
1663 Py_XDECREF(self
->groupindex
);
1664 Py_XDECREF(self
->indexgroup
);
1669 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1677 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1678 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:match", kwlist
,
1679 &string
, &start
, &end
))
1682 string
= state_init(&state
, self
, string
, start
, end
);
1686 state
.ptr
= state
.start
;
1688 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1690 if (state
.charsize
== 1) {
1691 status
= sre_match(&state
, PatternObject_GetCode(self
), 1);
1693 #if defined(HAVE_UNICODE)
1694 status
= sre_umatch(&state
, PatternObject_GetCode(self
), 1);
1698 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1702 return pattern_new_match(self
, &state
, status
);
1706 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1714 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1715 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:search", kwlist
,
1716 &string
, &start
, &end
))
1719 string
= state_init(&state
, self
, string
, start
, end
);
1723 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1725 if (state
.charsize
== 1) {
1726 status
= sre_search(&state
, PatternObject_GetCode(self
));
1728 #if defined(HAVE_UNICODE)
1729 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1733 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1737 return pattern_new_match(self
, &state
, status
);
1741 call(char* module
, char* function
, PyObject
* args
)
1750 name
= PyString_FromString(module
);
1753 mod
= PyImport_Import(name
);
1757 func
= PyObject_GetAttrString(mod
, function
);
1761 result
= PyObject_CallObject(func
, args
);
1767 #ifdef USE_BUILTIN_COPY
1769 deepcopy(PyObject
** object
, PyObject
* memo
)
1775 Py_BuildValue("OO", *object
, memo
)
1783 return 1; /* success */
1788 join_list(PyObject
* list
, PyObject
* pattern
)
1790 /* join list elements */
1793 #if PY_VERSION_HEX >= 0x01060000
1799 switch (PyList_GET_SIZE(list
)) {
1802 return PyString_FromString("");
1804 result
= PyList_GET_ITEM(list
, 0);
1810 /* two or more elements: slice out a suitable separator from the
1811 first member, and use that to join the entire list */
1813 joiner
= PySequence_GetSlice(pattern
, 0, 0);
1817 #if PY_VERSION_HEX >= 0x01060000
1818 function
= PyObject_GetAttrString(joiner
, "join");
1823 args
= PyTuple_New(1);
1825 Py_DECREF(function
);
1829 PyTuple_SET_ITEM(args
, 0, list
);
1830 result
= PyObject_CallObject(function
, args
);
1831 Py_DECREF(args
); /* also removes list */
1832 Py_DECREF(function
);
1836 Py_BuildValue("OO", list
, joiner
)
1845 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1855 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
1856 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:findall", kwlist
,
1857 &string
, &start
, &end
))
1860 string
= state_init(&state
, self
, string
, start
, end
);
1864 list
= PyList_New(0);
1870 while (state
.start
<= state
.end
) {
1874 state_reset(&state
);
1876 state
.ptr
= state
.start
;
1878 if (state
.charsize
== 1) {
1879 status
= sre_search(&state
, PatternObject_GetCode(self
));
1881 #if defined(HAVE_UNICODE)
1882 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1889 pattern_error(status
);
1893 /* don't bother to build a match object */
1894 switch (self
->groups
) {
1896 b
= STATE_OFFSET(&state
, state
.start
);
1897 e
= STATE_OFFSET(&state
, state
.ptr
);
1898 item
= PySequence_GetSlice(string
, b
, e
);
1903 item
= state_getslice(&state
, 1, string
, 1);
1908 item
= PyTuple_New(self
->groups
);
1911 for (i
= 0; i
< self
->groups
; i
++) {
1912 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
1917 PyTuple_SET_ITEM(item
, i
, o
);
1922 status
= PyList_Append(list
, item
);
1927 if (state
.ptr
== state
.start
)
1928 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
1930 state
.start
= state
.ptr
;
1944 #if PY_VERSION_HEX >= 0x02020000
1946 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
1952 scanner
= pattern_scanner(pattern
, args
);
1956 search
= PyObject_GetAttrString(scanner
, "search");
1961 iterator
= PyCallIter_New(search
, Py_None
);
1969 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1981 static char* kwlist
[] = { "source", "maxsplit", NULL
};
1982 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|i:split", kwlist
,
1983 &string
, &maxsplit
))
1986 string
= state_init(&state
, self
, string
, 0, INT_MAX
);
1990 list
= PyList_New(0);
1999 while (!maxsplit
|| n
< maxsplit
) {
2001 state_reset(&state
);
2003 state
.ptr
= state
.start
;
2005 if (state
.charsize
== 1) {
2006 status
= sre_search(&state
, PatternObject_GetCode(self
));
2008 #if defined(HAVE_UNICODE)
2009 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2016 pattern_error(status
);
2020 if (state
.start
== state
.ptr
) {
2021 if (last
== state
.end
)
2023 /* skip one character */
2024 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2028 /* get segment before this match */
2029 item
= PySequence_GetSlice(
2030 string
, STATE_OFFSET(&state
, last
),
2031 STATE_OFFSET(&state
, state
.start
)
2035 status
= PyList_Append(list
, item
);
2040 /* add groups (if any) */
2041 for (i
= 0; i
< self
->groups
; i
++) {
2042 item
= state_getslice(&state
, i
+1, string
, 0);
2045 status
= PyList_Append(list
, item
);
2053 last
= state
.start
= state
.ptr
;
2057 /* get segment following last match (even if empty) */
2058 item
= PySequence_GetSlice(
2059 string
, STATE_OFFSET(&state
, last
), state
.endpos
2063 status
= PyList_Append(list
, item
);
2079 pattern_subx(PatternObject
* self
, PyObject
* template, PyObject
* string
,
2080 int count
, int subn
)
2092 int filter_is_callable
;
2094 if (PyCallable_Check(template)) {
2095 /* sub/subn takes either a function or a template */
2098 filter_is_callable
= 1;
2100 /* if not callable, check if it's a literal string */
2102 ptr
= getstring(template, &n
, &b
);
2105 literal
= sre_literal_template(ptr
, n
);
2107 #if defined(HAVE_UNICODE)
2108 literal
= sre_uliteral_template(ptr
, n
);
2118 filter_is_callable
= 0;
2120 /* not a literal; hand it over to the template compiler */
2122 SRE_MODULE
, "_subx",
2123 Py_BuildValue("OO", self
, template)
2127 filter_is_callable
= PyCallable_Check(filter
);
2131 string
= state_init(&state
, self
, string
, 0, INT_MAX
);
2137 list
= PyList_New(0);
2146 while (!count
|| n
< count
) {
2148 state_reset(&state
);
2150 state
.ptr
= state
.start
;
2152 if (state
.charsize
== 1) {
2153 status
= sre_search(&state
, PatternObject_GetCode(self
));
2155 #if defined(HAVE_UNICODE)
2156 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2163 pattern_error(status
);
2167 b
= STATE_OFFSET(&state
, state
.start
);
2168 e
= STATE_OFFSET(&state
, state
.ptr
);
2171 /* get segment before this match */
2172 item
= PySequence_GetSlice(string
, i
, b
);
2175 status
= PyList_Append(list
, item
);
2180 } else if (i
== b
&& i
== e
&& n
> 0)
2181 /* ignore empty match on latest position */
2184 if (filter_is_callable
) {
2185 /* pass match object through filter */
2186 match
= pattern_new_match(self
, &state
, 1);
2189 args
= Py_BuildValue("(O)", match
);
2194 item
= PyObject_CallObject(filter
, args
);
2200 /* filter is literal string */
2206 if (item
!= Py_None
) {
2207 status
= PyList_Append(list
, item
);
2218 if (state
.ptr
== state
.start
)
2219 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2221 state
.start
= state
.ptr
;
2225 /* get segment following last match */
2226 if (i
< state
.endpos
) {
2227 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2230 status
= PyList_Append(list
, item
);
2240 /* convert list to single string (also removes list) */
2241 item
= join_list(list
, self
->pattern
);
2247 return Py_BuildValue("Ni", item
, n
);
2260 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2265 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2266 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|i:sub", kwlist
,
2267 &template, &string
, &count
))
2270 return pattern_subx(self
, template, string
, count
, 0);
2274 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2279 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2280 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|i:subn", kwlist
,
2281 &template, &string
, &count
))
2284 return pattern_subx(self
, template, string
, count
, 1);
2288 pattern_copy(PatternObject
* self
, PyObject
* args
)
2290 #ifdef USE_BUILTIN_COPY
2291 PatternObject
* copy
;
2294 if (args
!= Py_None
&& !PyArg_ParseTuple(args
, ":__copy__"))
2297 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2301 offset
= offsetof(PatternObject
, groups
);
2303 Py_XINCREF(self
->groupindex
);
2304 Py_XINCREF(self
->indexgroup
);
2305 Py_XINCREF(self
->pattern
);
2307 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2308 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2310 return (PyObject
*) copy
;
2312 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2318 pattern_deepcopy(PatternObject
* self
, PyObject
* args
)
2320 #ifdef USE_BUILTIN_COPY
2321 PatternObject
* copy
;
2324 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
2327 copy
= (PatternObject
*) pattern_copy(self
, Py_None
);
2331 if (!deepcopy(©
->groupindex
, memo
) ||
2332 !deepcopy(©
->indexgroup
, memo
) ||
2333 !deepcopy(©
->pattern
, memo
)) {
2339 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2344 static PyMethodDef pattern_methods
[] = {
2345 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
},
2346 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
},
2347 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
},
2348 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
},
2349 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
},
2350 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
},
2351 #if PY_VERSION_HEX >= 0x02020000
2352 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
},
2354 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2355 {"__copy__", (PyCFunction
) pattern_copy
, METH_VARARGS
},
2356 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_VARARGS
},
2361 pattern_getattr(PatternObject
* self
, char* name
)
2365 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2373 if (!strcmp(name
, "pattern")) {
2374 Py_INCREF(self
->pattern
);
2375 return self
->pattern
;
2378 if (!strcmp(name
, "flags"))
2379 return Py_BuildValue("i", self
->flags
);
2381 if (!strcmp(name
, "groups"))
2382 return Py_BuildValue("i", self
->groups
);
2384 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2385 Py_INCREF(self
->groupindex
);
2386 return self
->groupindex
;
2389 PyErr_SetString(PyExc_AttributeError
, name
);
2393 statichere PyTypeObject Pattern_Type
= {
2394 PyObject_HEAD_INIT(NULL
)
2395 0, "_" SRE_MODULE
".SRE_Pattern",
2396 sizeof(PatternObject
), sizeof(SRE_CODE
),
2397 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2399 (getattrfunc
)pattern_getattr
/*tp_getattr*/
2402 /* -------------------------------------------------------------------- */
2406 match_dealloc(MatchObject
* self
)
2408 Py_XDECREF(self
->regs
);
2409 Py_XDECREF(self
->string
);
2410 Py_DECREF(self
->pattern
);
2415 match_getslice_by_index(MatchObject
* self
, int index
, PyObject
* def
)
2417 if (index
< 0 || index
>= self
->groups
) {
2418 /* raise IndexError if we were given a bad group number */
2428 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2429 /* return default value if the string or group is undefined */
2434 return PySequence_GetSlice(
2435 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2440 match_getindex(MatchObject
* self
, PyObject
* index
)
2444 if (PyInt_Check(index
))
2445 return (int) PyInt_AS_LONG(index
);
2449 if (self
->pattern
->groupindex
) {
2450 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2452 if (PyInt_Check(index
))
2453 i
= (int) PyInt_AS_LONG(index
);
2463 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2465 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2469 match_expand(MatchObject
* self
, PyObject
* args
)
2472 if (!PyArg_ParseTuple(args
, "O:expand", &template))
2475 /* delegate to Python code */
2477 SRE_MODULE
, "_expand",
2478 Py_BuildValue("OOO", self
->pattern
, self
, template)
2483 match_group(MatchObject
* self
, PyObject
* args
)
2488 size
= PyTuple_GET_SIZE(args
);
2492 result
= match_getslice(self
, Py_False
, Py_None
);
2495 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2498 /* fetch multiple items */
2499 result
= PyTuple_New(size
);
2502 for (i
= 0; i
< size
; i
++) {
2503 PyObject
* item
= match_getslice(
2504 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2510 PyTuple_SET_ITEM(result
, i
, item
);
2518 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2523 PyObject
* def
= Py_None
;
2524 static char* kwlist
[] = { "default", NULL
};
2525 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2528 result
= PyTuple_New(self
->groups
-1);
2532 for (index
= 1; index
< self
->groups
; index
++) {
2534 item
= match_getslice_by_index(self
, index
, def
);
2539 PyTuple_SET_ITEM(result
, index
-1, item
);
2546 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2552 PyObject
* def
= Py_None
;
2553 static char* kwlist
[] = { "default", NULL
};
2554 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2557 result
= PyDict_New();
2558 if (!result
|| !self
->pattern
->groupindex
)
2561 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2565 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2569 key
= PyList_GET_ITEM(keys
, index
);
2572 value
= match_getslice(self
, key
, def
);
2577 status
= PyDict_SetItem(result
, key
, value
);
2594 match_start(MatchObject
* self
, PyObject
* args
)
2598 PyObject
* index_
= Py_False
; /* zero */
2599 if (!PyArg_ParseTuple(args
, "|O:start", &index_
))
2602 index
= match_getindex(self
, index_
);
2604 if (index
< 0 || index
>= self
->groups
) {
2612 /* mark is -1 if group is undefined */
2613 return Py_BuildValue("i", self
->mark
[index
*2]);
2617 match_end(MatchObject
* self
, PyObject
* args
)
2621 PyObject
* index_
= Py_False
; /* zero */
2622 if (!PyArg_ParseTuple(args
, "|O:end", &index_
))
2625 index
= match_getindex(self
, index_
);
2627 if (index
< 0 || index
>= self
->groups
) {
2635 /* mark is -1 if group is undefined */
2636 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2640 _pair(int i1
, int i2
)
2645 pair
= PyTuple_New(2);
2649 item
= PyInt_FromLong(i1
);
2652 PyTuple_SET_ITEM(pair
, 0, item
);
2654 item
= PyInt_FromLong(i2
);
2657 PyTuple_SET_ITEM(pair
, 1, item
);
2667 match_span(MatchObject
* self
, PyObject
* args
)
2671 PyObject
* index_
= Py_False
; /* zero */
2672 if (!PyArg_ParseTuple(args
, "|O:span", &index_
))
2675 index
= match_getindex(self
, index_
);
2677 if (index
< 0 || index
>= self
->groups
) {
2685 /* marks are -1 if group is undefined */
2686 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2690 match_regs(MatchObject
* self
)
2696 regs
= PyTuple_New(self
->groups
);
2700 for (index
= 0; index
< self
->groups
; index
++) {
2701 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2706 PyTuple_SET_ITEM(regs
, index
, item
);
2716 match_copy(MatchObject
* self
, PyObject
* args
)
2718 #ifdef USE_BUILTIN_COPY
2722 if (args
!= Py_None
&& !PyArg_ParseTuple(args
, ":__copy__"))
2725 slots
= 2 * (self
->pattern
->groups
+1);
2727 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
2731 /* this value a constant, but any compiler should be able to
2732 figure that out all by itself */
2733 offset
= offsetof(MatchObject
, string
);
2735 Py_XINCREF(self
->pattern
);
2736 Py_XINCREF(self
->string
);
2737 Py_XINCREF(self
->regs
);
2739 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2740 sizeof(MatchObject
) + slots
* sizeof(int) - offset
);
2742 return (PyObject
*) copy
;
2744 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
2750 match_deepcopy(MatchObject
* self
, PyObject
* args
)
2752 #ifdef USE_BUILTIN_COPY
2756 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
2759 copy
= (MatchObject
*) match_copy(self
, Py_None
);
2763 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
2764 !deepcopy(©
->string
, memo
) ||
2765 !deepcopy(©
->regs
, memo
)) {
2771 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
2776 static PyMethodDef match_methods
[] = {
2777 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
2778 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
2779 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
2780 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
2781 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
2782 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
2783 {"expand", (PyCFunction
) match_expand
, METH_VARARGS
},
2784 {"__copy__", (PyCFunction
) match_copy
, METH_VARARGS
},
2785 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_VARARGS
},
2790 match_getattr(MatchObject
* self
, char* name
)
2794 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
2800 if (!strcmp(name
, "lastindex")) {
2801 if (self
->lastindex
>= 0)
2802 return Py_BuildValue("i", self
->lastindex
);
2807 if (!strcmp(name
, "lastgroup")) {
2808 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
2809 PyObject
* result
= PySequence_GetItem(
2810 self
->pattern
->indexgroup
, self
->lastindex
2820 if (!strcmp(name
, "string")) {
2822 Py_INCREF(self
->string
);
2823 return self
->string
;
2830 if (!strcmp(name
, "regs")) {
2832 Py_INCREF(self
->regs
);
2835 return match_regs(self
);
2838 if (!strcmp(name
, "re")) {
2839 Py_INCREF(self
->pattern
);
2840 return (PyObject
*) self
->pattern
;
2843 if (!strcmp(name
, "pos"))
2844 return Py_BuildValue("i", self
->pos
);
2846 if (!strcmp(name
, "endpos"))
2847 return Py_BuildValue("i", self
->endpos
);
2849 PyErr_SetString(PyExc_AttributeError
, name
);
2853 /* FIXME: implement setattr("string", None) as a special case (to
2854 detach the associated string, if any */
2856 statichere PyTypeObject Match_Type
= {
2857 PyObject_HEAD_INIT(NULL
)
2858 0, "_" SRE_MODULE
".SRE_Match",
2859 sizeof(MatchObject
), sizeof(int),
2860 (destructor
)match_dealloc
, /*tp_dealloc*/
2862 (getattrfunc
)match_getattr
/*tp_getattr*/
2865 /* -------------------------------------------------------------------- */
2866 /* scanner methods (experimental) */
2869 scanner_dealloc(ScannerObject
* self
)
2871 state_fini(&self
->state
);
2872 Py_DECREF(self
->pattern
);
2877 scanner_match(ScannerObject
* self
, PyObject
* args
)
2879 SRE_STATE
* state
= &self
->state
;
2885 state
->ptr
= state
->start
;
2887 if (state
->charsize
== 1) {
2888 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
), 1);
2890 #if defined(HAVE_UNICODE)
2891 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
), 1);
2895 match
= pattern_new_match((PatternObject
*) self
->pattern
,
2898 if (status
== 0 || state
->ptr
== state
->start
)
2899 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
2901 state
->start
= state
->ptr
;
2908 scanner_search(ScannerObject
* self
, PyObject
* args
)
2910 SRE_STATE
* state
= &self
->state
;
2916 state
->ptr
= state
->start
;
2918 if (state
->charsize
== 1) {
2919 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
2921 #if defined(HAVE_UNICODE)
2922 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
2926 match
= pattern_new_match((PatternObject
*) self
->pattern
,
2929 if (status
== 0 || state
->ptr
== state
->start
)
2930 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
2932 state
->start
= state
->ptr
;
2937 static PyMethodDef scanner_methods
[] = {
2938 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
2939 /* METH_OLDARGS is not in Python 1.5.2 */
2940 {"match", (PyCFunction
) scanner_match
, 0},
2941 {"search", (PyCFunction
) scanner_search
, 0},
2946 scanner_getattr(ScannerObject
* self
, char* name
)
2950 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
2957 if (!strcmp(name
, "pattern")) {
2958 Py_INCREF(self
->pattern
);
2959 return self
->pattern
;
2962 PyErr_SetString(PyExc_AttributeError
, name
);
2966 statichere PyTypeObject Scanner_Type
= {
2967 PyObject_HEAD_INIT(NULL
)
2968 0, "_" SRE_MODULE
".SRE_Scanner",
2969 sizeof(ScannerObject
), 0,
2970 (destructor
)scanner_dealloc
, /*tp_dealloc*/
2972 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
2975 static PyMethodDef _functions
[] = {
2976 {"compile", _compile
, METH_VARARGS
},
2977 {"getcodesize", sre_codesize
, METH_VARARGS
},
2978 {"getlower", sre_getlower
, METH_VARARGS
},
2982 PyMODINIT_FUNC
init_sre(void)
2988 /* Patch object types */
2989 Pattern_Type
.ob_type
= Match_Type
.ob_type
=
2990 Scanner_Type
.ob_type
= &PyType_Type
;
2992 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
2993 d
= PyModule_GetDict(m
);
2995 x
= PyInt_FromLong(SRE_MAGIC
);
2997 PyDict_SetItemString(d
, "MAGIC", x
);
3001 x
= PyString_FromString(copyright
);
3003 PyDict_SetItemString(d
, "copyright", x
);
3008 #endif /* !defined(SRE_RECURSIVE) */