2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
12 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
14 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
16 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
23 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
24 * 2000-10-24 fl really fixed assert_not; reset groups in findall
25 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
27 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
28 * 2001-01-16 fl fixed memory leak in pattern destructor
29 * 2001-03-20 fl lots of fixes for 2.1b2
30 * 2001-04-15 fl export copyright as Python attribute, not global
31 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
33 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
34 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
35 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
36 * 2001-10-21 fl added sub/subn primitive
37 * 2001-10-22 fl check for literal sub/subn templates
38 * 2001-10-24 fl added finditer primitive (for 2.2 only)
39 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
41 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
43 * This version of the SRE library can be redistributed under CNRI's
44 * Python 1.6 license. For any other use, please contact Secret Labs
45 * AB (info@pythonware.com).
47 * Portions of this engine have been developed in cooperation with
48 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
49 * other compatibility work.
54 static char copyright
[] =
55 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
58 #include "structmember.h" /* offsetof */
64 /* name of this module, minus the leading underscore */
65 #if !defined(SRE_MODULE)
66 #define SRE_MODULE "sre"
69 /* defining this one enables tracing */
72 #if PY_VERSION_HEX >= 0x01060000
73 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
74 /* defining this enables unicode support (default under 1.6a1 and later) */
79 /* -------------------------------------------------------------------- */
80 /* optional features */
82 /* prevent run-away recursion (bad patterns on long strings) */
84 #if !defined(USE_STACKCHECK)
85 #if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
86 /* require smaller recursion limit for a number of 64-bit platforms:
87 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
88 /* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
89 #define USE_RECURSION_LIMIT 7500
91 #define USE_RECURSION_LIMIT 10000
95 /* enables fast searching */
96 #define USE_FAST_SEARCH
98 /* enables aggressive inlining (always on for Visual C) */
101 /* enables copy/deepcopy handling (work in progress) */
102 #undef USE_BUILTIN_COPY
104 #if PY_VERSION_HEX < 0x01060000
105 #define PyObject_DEL(op) PyMem_DEL((op))
108 /* -------------------------------------------------------------------- */
110 #if defined(_MSC_VER)
111 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
112 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
113 /* fastest possible local call under MSVC */
114 #define LOCAL(type) static __inline type __fastcall
115 #elif defined(USE_INLINE)
116 #define LOCAL(type) static inline type
118 #define LOCAL(type) static type
122 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
123 #define SRE_ERROR_STATE -2 /* illegal state */
124 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
125 #define SRE_ERROR_MEMORY -9 /* out of memory */
128 #define TRACE(v) printf v
133 /* -------------------------------------------------------------------- */
134 /* search engine state */
136 /* default character predicates (run sre_chars.py to regenerate tables) */
138 #define SRE_DIGIT_MASK 1
139 #define SRE_SPACE_MASK 2
140 #define SRE_LINEBREAK_MASK 4
141 #define SRE_ALNUM_MASK 8
142 #define SRE_WORD_MASK 16
144 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
146 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
147 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
148 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
149 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
150 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
151 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
152 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
154 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
155 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
156 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
157 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
158 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
159 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
160 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
161 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
162 120, 121, 122, 123, 124, 125, 126, 127 };
164 #define SRE_IS_DIGIT(ch)\
165 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
166 #define SRE_IS_SPACE(ch)\
167 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
168 #define SRE_IS_LINEBREAK(ch)\
169 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
170 #define SRE_IS_ALNUM(ch)\
171 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
172 #define SRE_IS_WORD(ch)\
173 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
175 static unsigned int sre_lower(unsigned int ch
)
177 return ((ch
) < 128 ? sre_char_lower
[ch
] : ch
);
180 /* locale-specific character predicates */
182 #define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
183 #define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
184 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
185 #define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
186 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
188 static unsigned int sre_lower_locale(unsigned int ch
)
190 return ((ch
) < 256 ? tolower((ch
)) : ch
);
193 /* unicode-specific character predicates */
195 #if defined(HAVE_UNICODE)
197 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
198 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
199 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
200 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
201 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
203 static unsigned int sre_lower_unicode(unsigned int ch
)
205 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
211 sre_category(SRE_CODE category
, unsigned int ch
)
215 case SRE_CATEGORY_DIGIT
:
216 return SRE_IS_DIGIT(ch
);
217 case SRE_CATEGORY_NOT_DIGIT
:
218 return !SRE_IS_DIGIT(ch
);
219 case SRE_CATEGORY_SPACE
:
220 return SRE_IS_SPACE(ch
);
221 case SRE_CATEGORY_NOT_SPACE
:
222 return !SRE_IS_SPACE(ch
);
223 case SRE_CATEGORY_WORD
:
224 return SRE_IS_WORD(ch
);
225 case SRE_CATEGORY_NOT_WORD
:
226 return !SRE_IS_WORD(ch
);
227 case SRE_CATEGORY_LINEBREAK
:
228 return SRE_IS_LINEBREAK(ch
);
229 case SRE_CATEGORY_NOT_LINEBREAK
:
230 return !SRE_IS_LINEBREAK(ch
);
232 case SRE_CATEGORY_LOC_WORD
:
233 return SRE_LOC_IS_WORD(ch
);
234 case SRE_CATEGORY_LOC_NOT_WORD
:
235 return !SRE_LOC_IS_WORD(ch
);
237 #if defined(HAVE_UNICODE)
238 case SRE_CATEGORY_UNI_DIGIT
:
239 return SRE_UNI_IS_DIGIT(ch
);
240 case SRE_CATEGORY_UNI_NOT_DIGIT
:
241 return !SRE_UNI_IS_DIGIT(ch
);
242 case SRE_CATEGORY_UNI_SPACE
:
243 return SRE_UNI_IS_SPACE(ch
);
244 case SRE_CATEGORY_UNI_NOT_SPACE
:
245 return !SRE_UNI_IS_SPACE(ch
);
246 case SRE_CATEGORY_UNI_WORD
:
247 return SRE_UNI_IS_WORD(ch
);
248 case SRE_CATEGORY_UNI_NOT_WORD
:
249 return !SRE_UNI_IS_WORD(ch
);
250 case SRE_CATEGORY_UNI_LINEBREAK
:
251 return SRE_UNI_IS_LINEBREAK(ch
);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
253 return !SRE_UNI_IS_LINEBREAK(ch
);
255 case SRE_CATEGORY_UNI_DIGIT
:
256 return SRE_IS_DIGIT(ch
);
257 case SRE_CATEGORY_UNI_NOT_DIGIT
:
258 return !SRE_IS_DIGIT(ch
);
259 case SRE_CATEGORY_UNI_SPACE
:
260 return SRE_IS_SPACE(ch
);
261 case SRE_CATEGORY_UNI_NOT_SPACE
:
262 return !SRE_IS_SPACE(ch
);
263 case SRE_CATEGORY_UNI_WORD
:
264 return SRE_LOC_IS_WORD(ch
);
265 case SRE_CATEGORY_UNI_NOT_WORD
:
266 return !SRE_LOC_IS_WORD(ch
);
267 case SRE_CATEGORY_UNI_LINEBREAK
:
268 return SRE_IS_LINEBREAK(ch
);
269 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
270 return !SRE_IS_LINEBREAK(ch
);
279 mark_fini(SRE_STATE
* state
)
281 if (state
->mark_stack
) {
282 free(state
->mark_stack
);
283 state
->mark_stack
= NULL
;
285 state
->mark_stack_size
= state
->mark_stack_base
= 0;
289 mark_save(SRE_STATE
* state
, int lo
, int hi
)
293 int minsize
, newsize
;
298 size
= (hi
- lo
) + 1;
300 newsize
= state
->mark_stack_size
;
301 minsize
= state
->mark_stack_base
+ size
;
303 if (newsize
< minsize
) {
304 /* create new stack */
307 if (newsize
< minsize
)
309 TRACE(("allocate stack %d\n", newsize
));
310 stack
= malloc(sizeof(void*) * newsize
);
313 while (newsize
< minsize
)
315 TRACE(("grow stack to %d\n", newsize
));
316 stack
= realloc(state
->mark_stack
, sizeof(void*) * newsize
);
320 return SRE_ERROR_MEMORY
;
322 state
->mark_stack
= stack
;
323 state
->mark_stack_size
= newsize
;
326 TRACE(("copy %d:%d to %d (%d)\n", lo
, hi
, state
->mark_stack_base
, size
));
328 memcpy(state
->mark_stack
+ state
->mark_stack_base
, state
->mark
+ lo
,
329 size
* sizeof(void*));
331 state
->mark_stack_base
+= size
;
337 mark_restore(SRE_STATE
* state
, int lo
, int hi
)
344 size
= (hi
- lo
) + 1;
346 state
->mark_stack_base
-= size
;
348 TRACE(("copy %d:%d from %d\n", lo
, hi
, state
->mark_stack_base
));
350 memcpy(state
->mark
+ lo
, state
->mark_stack
+ state
->mark_stack_base
,
351 size
* sizeof(void*));
356 /* generate 8-bit version */
358 #define SRE_CHAR unsigned char
359 #define SRE_AT sre_at
360 #define SRE_COUNT sre_count
361 #define SRE_CHARSET sre_charset
362 #define SRE_INFO sre_info
363 #define SRE_MATCH sre_match
364 #define SRE_SEARCH sre_search
365 #define SRE_LITERAL_TEMPLATE sre_literal_template
367 #if defined(HAVE_UNICODE)
369 #define SRE_RECURSIVE
373 #undef SRE_LITERAL_TEMPLATE
382 /* generate 16-bit unicode version */
384 #define SRE_CHAR Py_UNICODE
385 #define SRE_AT sre_uat
386 #define SRE_COUNT sre_ucount
387 #define SRE_CHARSET sre_ucharset
388 #define SRE_INFO sre_uinfo
389 #define SRE_MATCH sre_umatch
390 #define SRE_SEARCH sre_usearch
391 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
394 #endif /* SRE_RECURSIVE */
396 /* -------------------------------------------------------------------- */
397 /* String matching engine */
399 /* the following section is compiled twice, with different character
403 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
405 /* check if pointer is at given position */
411 case SRE_AT_BEGINNING
:
412 case SRE_AT_BEGINNING_STRING
:
413 return ((void*) ptr
== state
->beginning
);
415 case SRE_AT_BEGINNING_LINE
:
416 return ((void*) ptr
== state
->beginning
||
417 SRE_IS_LINEBREAK((int) ptr
[-1]));
420 return (((void*) (ptr
+1) == state
->end
&&
421 SRE_IS_LINEBREAK((int) ptr
[0])) ||
422 ((void*) ptr
== state
->end
));
424 case SRE_AT_END_LINE
:
425 return ((void*) ptr
== state
->end
||
426 SRE_IS_LINEBREAK((int) ptr
[0]));
428 case SRE_AT_END_STRING
:
429 return ((void*) ptr
== state
->end
);
431 case SRE_AT_BOUNDARY
:
432 if (state
->beginning
== state
->end
)
434 that
= ((void*) ptr
> state
->beginning
) ?
435 SRE_IS_WORD((int) ptr
[-1]) : 0;
436 this = ((void*) ptr
< state
->end
) ?
437 SRE_IS_WORD((int) ptr
[0]) : 0;
440 case SRE_AT_NON_BOUNDARY
:
441 if (state
->beginning
== state
->end
)
443 that
= ((void*) ptr
> state
->beginning
) ?
444 SRE_IS_WORD((int) ptr
[-1]) : 0;
445 this = ((void*) ptr
< state
->end
) ?
446 SRE_IS_WORD((int) ptr
[0]) : 0;
449 case SRE_AT_LOC_BOUNDARY
:
450 if (state
->beginning
== state
->end
)
452 that
= ((void*) ptr
> state
->beginning
) ?
453 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
454 this = ((void*) ptr
< state
->end
) ?
455 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
458 case SRE_AT_LOC_NON_BOUNDARY
:
459 if (state
->beginning
== state
->end
)
461 that
= ((void*) ptr
> state
->beginning
) ?
462 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
463 this = ((void*) ptr
< state
->end
) ?
464 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
467 #if defined(HAVE_UNICODE)
468 case SRE_AT_UNI_BOUNDARY
:
469 if (state
->beginning
== state
->end
)
471 that
= ((void*) ptr
> state
->beginning
) ?
472 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
473 this = ((void*) ptr
< state
->end
) ?
474 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
477 case SRE_AT_UNI_NON_BOUNDARY
:
478 if (state
->beginning
== state
->end
)
480 that
= ((void*) ptr
> state
->beginning
) ?
481 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
482 this = ((void*) ptr
< state
->end
) ?
483 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
493 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
495 /* check if character is a member of the given set */
503 /* <LITERAL> <code> */
510 /* <RANGE> <lower> <upper> */
511 if (set
[0] <= ch
&& ch
<= set
[1])
517 /* <CHARSET> <bitmap> (16 bits per code word) */
518 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
523 case SRE_OP_BIGCHARSET
:
524 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
528 block
= ((unsigned char*)set
)[ch
>> 8];
530 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
536 case SRE_OP_CATEGORY
:
537 /* <CATEGORY> <code> */
538 if (sre_category(set
[0], (int) ch
))
551 /* internal error -- there's not much we can do about it
552 here, so let's just pretend it didn't match... */
558 LOCAL(int) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
, int level
);
561 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, int maxcount
, int level
)
564 SRE_CHAR
* ptr
= state
->ptr
;
565 SRE_CHAR
* end
= state
->end
;
569 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
570 end
= ptr
+ maxcount
;
572 switch (pattern
[0]) {
575 /* repeated dot wildcard. */
576 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
577 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
582 /* repeated dot wildcare. skip to the end of the target
583 string, and backtrack from there */
584 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
589 /* repeated literal */
591 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
592 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
596 case SRE_OP_LITERAL_IGNORE
:
597 /* repeated literal */
599 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
600 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
604 case SRE_OP_NOT_LITERAL
:
605 /* repeated non-literal */
607 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
608 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
612 case SRE_OP_NOT_LITERAL_IGNORE
:
613 /* repeated non-literal */
615 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
616 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
622 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
623 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
628 /* repeated single character pattern */
629 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
630 while ((SRE_CHAR
*) state
->ptr
< end
) {
631 i
= SRE_MATCH(state
, pattern
, level
);
637 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
638 (SRE_CHAR
*) state
->ptr
- ptr
));
639 return (SRE_CHAR
*) state
->ptr
- ptr
;
642 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
643 return ptr
- (SRE_CHAR
*) state
->ptr
;
646 #if 0 /* not used in this release */
648 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
650 /* check if an SRE_OP_INFO block matches at the current position.
651 returns the number of SRE_CODE objects to skip if successful, 0
654 SRE_CHAR
* end
= state
->end
;
655 SRE_CHAR
* ptr
= state
->ptr
;
658 /* check minimal length */
659 if (pattern
[3] && (end
- ptr
) < pattern
[3])
662 /* check known prefix */
663 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
664 /* <length> <skip> <prefix data> <overlap data> */
665 for (i
= 0; i
< pattern
[5]; i
++)
666 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
668 return pattern
[0] + 2 * pattern
[6];
675 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
, int level
)
677 /* check if string matches the given pattern. returns <0 for
678 error, 0 for failure, and 1 for success */
680 SRE_CHAR
* end
= state
->end
;
681 SRE_CHAR
* ptr
= state
->ptr
;
687 SRE_REPEAT rep
; /* FIXME: <fl> allocate in STATE instead */
689 TRACE(("|%p|%p|ENTER %d\n", pattern
, ptr
, level
));
691 #if defined(USE_STACKCHECK)
692 if (level
% 10 == 0 && PyOS_CheckStack())
693 return SRE_ERROR_RECURSION_LIMIT
;
696 #if defined(USE_RECURSION_LIMIT)
697 if (level
> USE_RECURSION_LIMIT
)
698 return SRE_ERROR_RECURSION_LIMIT
;
701 if (pattern
[0] == SRE_OP_INFO
) {
702 /* optimization info block */
703 /* <INFO> <1=skip> <2=flags> <3=min> ... */
704 if (pattern
[3] && (end
- ptr
) < pattern
[3]) {
705 TRACE(("reject (got %d chars, need %d)\n",
706 (end
- ptr
), pattern
[3]));
709 pattern
+= pattern
[1] + 1;
714 switch (*pattern
++) {
717 /* immediate failure */
718 TRACE(("|%p|%p|FAILURE\n", pattern
, ptr
));
723 TRACE(("|%p|%p|SUCCESS\n", pattern
, ptr
));
728 /* match at given position */
730 TRACE(("|%p|%p|AT %d\n", pattern
, ptr
, *pattern
));
731 if (!SRE_AT(state
, ptr
, *pattern
))
736 case SRE_OP_CATEGORY
:
737 /* match at given category */
738 /* <CATEGORY> <code> */
739 TRACE(("|%p|%p|CATEGORY %d\n", pattern
, ptr
, *pattern
));
740 if (ptr
>= end
|| !sre_category(pattern
[0], ptr
[0]))
747 /* match literal string */
748 /* <LITERAL> <code> */
749 TRACE(("|%p|%p|LITERAL %d\n", pattern
, ptr
, *pattern
));
750 if (ptr
>= end
|| (SRE_CODE
) ptr
[0] != pattern
[0])
756 case SRE_OP_NOT_LITERAL
:
757 /* match anything that is not literal character */
758 /* <NOT_LITERAL> <code> */
759 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern
, ptr
, *pattern
));
760 if (ptr
>= end
|| (SRE_CODE
) ptr
[0] == pattern
[0])
767 /* match anything (except a newline) */
769 TRACE(("|%p|%p|ANY\n", pattern
, ptr
));
770 if (ptr
>= end
|| SRE_IS_LINEBREAK(ptr
[0]))
778 TRACE(("|%p|%p|ANY_ALL\n", pattern
, ptr
));
785 /* match set member (or non_member) */
786 /* <IN> <skip> <set> */
787 TRACE(("|%p|%p|IN\n", pattern
, ptr
));
788 if (ptr
>= end
|| !SRE_CHARSET(pattern
+ 1, *ptr
))
790 pattern
+= pattern
[0];
794 case SRE_OP_GROUPREF
:
795 /* match backreference */
796 TRACE(("|%p|%p|GROUPREF %d\n", pattern
, ptr
, pattern
[0]));
799 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[i
+i
];
800 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[i
+i
+1];
801 if (!p
|| !e
|| e
< p
)
804 if (ptr
>= end
|| *ptr
!= *p
)
812 case SRE_OP_GROUPREF_IGNORE
:
813 /* match backreference */
814 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern
, ptr
, pattern
[0]));
817 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[i
+i
];
818 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[i
+i
+1];
819 if (!p
|| !e
|| e
< p
)
823 state
->lower(*ptr
) != state
->lower(*p
))
831 case SRE_OP_LITERAL_IGNORE
:
832 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern
, ptr
, pattern
[0]));
834 state
->lower(*ptr
) != state
->lower(*pattern
))
840 case SRE_OP_NOT_LITERAL_IGNORE
:
841 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, *pattern
));
843 state
->lower(*ptr
) == state
->lower(*pattern
))
849 case SRE_OP_IN_IGNORE
:
850 TRACE(("|%p|%p|IN_IGNORE\n", pattern
, ptr
));
852 || !SRE_CHARSET(pattern
+ 1, (SRE_CODE
) state
->lower(*ptr
)))
854 pattern
+= pattern
[0];
861 TRACE(("|%p|%p|MARK %d\n", pattern
, ptr
, pattern
[0]));
864 state
->lastindex
= i
/2 + 1;
865 if (i
> state
->lastmark
)
867 state
->mark
[i
] = ptr
;
874 /* <JUMP> <offset> */
875 TRACE(("|%p|%p|JUMP %d\n", pattern
, ptr
, pattern
[0]));
876 pattern
+= pattern
[0];
880 /* assert subpattern */
881 /* <ASSERT> <skip> <back> <pattern> */
882 TRACE(("|%p|%p|ASSERT %d\n", pattern
, ptr
, pattern
[1]));
883 state
->ptr
= ptr
- pattern
[1];
884 if (state
->ptr
< state
->beginning
)
886 i
= SRE_MATCH(state
, pattern
+ 2, level
+ 1);
889 pattern
+= pattern
[0];
892 case SRE_OP_ASSERT_NOT
:
893 /* assert not subpattern */
894 /* <ASSERT_NOT> <skip> <back> <pattern> */
895 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern
, ptr
, pattern
[1]));
896 state
->ptr
= ptr
- pattern
[1];
897 if (state
->ptr
>= state
->beginning
) {
898 i
= SRE_MATCH(state
, pattern
+ 2, level
+ 1);
904 pattern
+= pattern
[0];
909 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
910 TRACE(("|%p|%p|BRANCH\n", pattern
, ptr
));
911 lastmark
= state
->lastmark
;
912 for (; pattern
[0]; pattern
+= pattern
[0]) {
913 if (pattern
[1] == SRE_OP_LITERAL
&&
914 (ptr
>= end
|| (SRE_CODE
) *ptr
!= pattern
[2]))
916 if (pattern
[1] == SRE_OP_IN
&&
917 (ptr
>= end
|| !SRE_CHARSET(pattern
+ 3, (SRE_CODE
) *ptr
)))
920 i
= SRE_MATCH(state
, pattern
+ 1, level
+ 1);
923 if (state
->lastmark
> lastmark
) {
925 state
->mark
+ lastmark
+ 1, 0,
926 (state
->lastmark
- lastmark
) * sizeof(void*)
928 state
->lastmark
= lastmark
;
933 case SRE_OP_REPEAT_ONE
:
934 /* match repeated sequence (maximizing regexp) */
936 /* this operator only works if the repeated item is
937 exactly one character wide, and we're not already
938 collecting backtracking points. for other cases,
939 use the MAX_REPEAT operator */
941 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
943 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern
, ptr
,
944 pattern
[1], pattern
[2]));
946 if (ptr
+ pattern
[1] > end
)
947 return 0; /* cannot match */
951 count
= SRE_COUNT(state
, pattern
+ 3, pattern
[2], level
+ 1);
957 /* when we arrive here, count contains the number of
958 matches, and ptr points to the tail of the target
959 string. check if the rest of the pattern matches,
960 and backtrack if not. */
962 if (count
< (int) pattern
[1])
965 if (pattern
[pattern
[0]] == SRE_OP_SUCCESS
) {
966 /* tail is empty. we're finished */
970 } else if (pattern
[pattern
[0]] == SRE_OP_LITERAL
) {
971 /* tail starts with a literal. skip positions where
972 the rest of the pattern cannot possibly match */
973 chr
= pattern
[pattern
[0]+1];
975 while (count
>= (int) pattern
[1] &&
976 (ptr
>= end
|| *ptr
!= chr
)) {
980 if (count
< (int) pattern
[1])
983 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
992 lastmark
= state
->lastmark
;
993 while (count
>= (int) pattern
[1]) {
995 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1000 if (state
->lastmark
> lastmark
) {
1002 state
->mark
+ lastmark
+ 1, 0,
1003 (state
->lastmark
- lastmark
) * sizeof(void*)
1005 state
->lastmark
= lastmark
;
1012 /* create repeat context. all the hard work is done
1013 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1014 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1015 TRACE(("|%p|%p|REPEAT %d %d\n", pattern
, ptr
,
1016 pattern
[1], pattern
[2]));
1019 rep
.pattern
= pattern
;
1021 /* install new repeat context */
1022 rep
.prev
= state
->repeat
;
1023 state
->repeat
= &rep
;
1026 i
= SRE_MATCH(state
, pattern
+ pattern
[0], level
+ 1);
1028 state
->repeat
= rep
.prev
;
1032 case SRE_OP_MAX_UNTIL
:
1033 /* maximizing repeat */
1034 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1036 /* FIXME: we probably need to deal with zero-width
1037 matches in here... */
1041 return SRE_ERROR_STATE
;
1045 count
= rp
->count
+ 1;
1047 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern
, ptr
, count
));
1049 if (count
< rp
->pattern
[1]) {
1050 /* not enough matches */
1053 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1056 rp
->count
= count
- 1;
1061 if (count
< rp
->pattern
[2] || rp
->pattern
[2] == 65535) {
1062 /* we may have enough matches, but if we can
1063 match another item, do so */
1065 lastmark
= state
->lastmark
;
1066 i
= mark_save(state
, 0, lastmark
);
1070 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1073 i
= mark_restore(state
, 0, lastmark
);
1074 state
->lastmark
= lastmark
;
1077 rp
->count
= count
- 1;
1081 /* cannot match more repeated items here. make sure the
1083 state
->repeat
= rp
->prev
;
1084 i
= SRE_MATCH(state
, pattern
, level
+ 1);
1091 case SRE_OP_MIN_UNTIL
:
1092 /* minimizing repeat */
1093 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1097 return SRE_ERROR_STATE
;
1099 count
= rp
->count
+ 1;
1101 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern
, ptr
, count
,
1106 if (count
< rp
->pattern
[1]) {
1107 /* not enough matches */
1110 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1113 rp
->count
= count
-1;
1118 /* see if the tail matches */
1119 state
->repeat
= rp
->prev
;
1120 i
= SRE_MATCH(state
, pattern
, level
+ 1);
1127 if (count
>= rp
->pattern
[2] && rp
->pattern
[2] != 65535)
1132 i
= SRE_MATCH(state
, rp
->pattern
+ 3, level
+ 1);
1135 rp
->count
= count
- 1;
1140 TRACE(("|%p|%p|UNKNOWN %d\n", pattern
, ptr
, pattern
[-1]));
1141 return SRE_ERROR_ILLEGAL
;
1145 /* can't end up here */
1146 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
1150 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1152 SRE_CHAR
* ptr
= state
->start
;
1153 SRE_CHAR
* end
= state
->end
;
1156 int prefix_skip
= 0;
1157 SRE_CODE
* prefix
= NULL
;
1158 SRE_CODE
* charset
= NULL
;
1159 SRE_CODE
* overlap
= NULL
;
1162 if (pattern
[0] == SRE_OP_INFO
) {
1163 /* optimization info block */
1164 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1168 if (pattern
[3] > 0) {
1169 /* adjust end point (but make sure we leave at least one
1170 character in there, so literal search will work) */
1171 end
-= pattern
[3]-1;
1176 if (flags
& SRE_INFO_PREFIX
) {
1177 /* pattern starts with a known prefix */
1178 /* <length> <skip> <prefix data> <overlap data> */
1179 prefix_len
= pattern
[5];
1180 prefix_skip
= pattern
[6];
1181 prefix
= pattern
+ 7;
1182 overlap
= prefix
+ prefix_len
- 1;
1183 } else if (flags
& SRE_INFO_CHARSET
)
1184 /* pattern starts with a character from a known set */
1186 charset
= pattern
+ 5;
1188 pattern
+= 1 + pattern
[1];
1191 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1192 TRACE(("charset = %p\n", charset
));
1194 #if defined(USE_FAST_SEARCH)
1195 if (prefix_len
> 1) {
1196 /* pattern starts with a known prefix. use the overlap
1197 table to skip forward as fast as we possibly can */
1202 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1208 if (++i
== prefix_len
) {
1209 /* found a potential match */
1210 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1211 state
->start
= ptr
+ 1 - prefix_len
;
1212 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1213 if (flags
& SRE_INFO_LITERAL
)
1214 return 1; /* we got all of it */
1215 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
, 1);
1218 /* close but no cigar -- try again */
1231 if (pattern
[0] == SRE_OP_LITERAL
) {
1232 /* pattern starts with a literal character. this is used
1233 for short prefixes, and if fast search is disabled */
1234 SRE_CODE chr
= pattern
[1];
1237 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1241 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1244 if (flags
& SRE_INFO_LITERAL
)
1245 return 1; /* we got all of it */
1246 status
= SRE_MATCH(state
, pattern
+ 2, 1);
1250 } else if (charset
) {
1251 /* pattern starts with a character from a known set */
1254 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1258 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1261 status
= SRE_MATCH(state
, pattern
, 1);
1268 while (ptr
<= end
) {
1269 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1270 state
->start
= state
->ptr
= ptr
++;
1271 status
= SRE_MATCH(state
, pattern
, 1);
1280 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, int len
)
1282 /* check if given string is a literal template (i.e. no escapes) */
1289 #if !defined(SRE_RECURSIVE)
1291 /* -------------------------------------------------------------------- */
1292 /* factories and destructors */
1294 /* see sre.h for object declarations */
1296 staticforward PyTypeObject Pattern_Type
;
1297 staticforward PyTypeObject Match_Type
;
1298 staticforward PyTypeObject Scanner_Type
;
1301 _compile(PyObject
* self_
, PyObject
* args
)
1303 /* "compile" pattern descriptor to pattern object */
1305 PatternObject
* self
;
1312 PyObject
* groupindex
= NULL
;
1313 PyObject
* indexgroup
= NULL
;
1314 if (!PyArg_ParseTuple(args
, "OiO!|iOO", &pattern
, &flags
,
1315 &PyList_Type
, &code
, &groups
,
1316 &groupindex
, &indexgroup
))
1319 n
= PyList_GET_SIZE(code
);
1321 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
1327 for (i
= 0; i
< n
; i
++) {
1328 PyObject
*o
= PyList_GET_ITEM(code
, i
);
1329 self
->code
[i
] = (SRE_CODE
) PyInt_AsLong(o
);
1332 if (PyErr_Occurred()) {
1338 self
->pattern
= pattern
;
1340 self
->flags
= flags
;
1342 self
->groups
= groups
;
1344 Py_XINCREF(groupindex
);
1345 self
->groupindex
= groupindex
;
1347 Py_XINCREF(indexgroup
);
1348 self
->indexgroup
= indexgroup
;
1350 return (PyObject
*) self
;
1354 sre_codesize(PyObject
* self
, PyObject
* args
)
1356 return Py_BuildValue("i", sizeof(SRE_CODE
));
1360 sre_getlower(PyObject
* self
, PyObject
* args
)
1362 int character
, flags
;
1363 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1365 if (flags
& SRE_FLAG_LOCALE
)
1366 return Py_BuildValue("i", sre_lower_locale(character
));
1367 if (flags
& SRE_FLAG_UNICODE
)
1368 #if defined(HAVE_UNICODE)
1369 return Py_BuildValue("i", sre_lower_unicode(character
));
1371 return Py_BuildValue("i", sre_lower_locale(character
));
1373 return Py_BuildValue("i", sre_lower(character
));
1377 state_reset(SRE_STATE
* state
)
1381 state
->lastmark
= 0;
1383 /* FIXME: dynamic! */
1384 for (i
= 0; i
< SRE_MARK_SIZE
; i
++)
1385 state
->mark
[i
] = NULL
;
1387 state
->lastindex
= -1;
1389 state
->repeat
= NULL
;
1395 getstring(PyObject
* string
, int* p_length
, int* p_charsize
)
1397 /* given a python object, return a data pointer, a length (in
1398 characters), and a character size. return NULL if the object
1399 is not a string (or not compatible) */
1401 PyBufferProcs
*buffer
;
1402 int size
, bytes
, charsize
;
1405 #if defined(HAVE_UNICODE)
1406 if (PyUnicode_Check(string
)) {
1407 /* unicode strings doesn't always support the buffer interface */
1408 ptr
= (void*) PyUnicode_AS_DATA(string
);
1409 bytes
= PyUnicode_GET_DATA_SIZE(string
);
1410 size
= PyUnicode_GET_SIZE(string
);
1411 charsize
= sizeof(Py_UNICODE
);
1416 /* get pointer to string buffer */
1417 buffer
= string
->ob_type
->tp_as_buffer
;
1418 if (!buffer
|| !buffer
->bf_getreadbuffer
|| !buffer
->bf_getsegcount
||
1419 buffer
->bf_getsegcount(string
, NULL
) != 1) {
1420 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1424 /* determine buffer size */
1425 bytes
= buffer
->bf_getreadbuffer(string
, 0, &ptr
);
1427 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1431 /* determine character size */
1432 #if PY_VERSION_HEX >= 0x01060000
1433 size
= PyObject_Size(string
);
1435 size
= PyObject_Length(string
);
1438 if (PyString_Check(string
) || bytes
== size
)
1440 #if defined(HAVE_UNICODE)
1441 else if (bytes
== (int) (size
* sizeof(Py_UNICODE
)))
1442 charsize
= sizeof(Py_UNICODE
);
1445 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1449 #if defined(HAVE_UNICODE)
1454 *p_charsize
= charsize
;
1460 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1463 /* prepare state object */
1469 memset(state
, 0, sizeof(SRE_STATE
));
1471 state
->lastindex
= -1;
1473 ptr
= getstring(string
, &length
, &charsize
);
1477 /* adjust boundaries */
1480 else if (start
> length
)
1485 else if (end
> length
)
1488 state
->charsize
= charsize
;
1490 state
->beginning
= ptr
;
1492 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1493 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1496 state
->string
= string
;
1498 state
->endpos
= end
;
1500 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1501 state
->lower
= sre_lower_locale
;
1502 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1503 #if defined(HAVE_UNICODE)
1504 state
->lower
= sre_lower_unicode
;
1506 state
->lower
= sre_lower_locale
;
1509 state
->lower
= sre_lower
;
1515 state_fini(SRE_STATE
* state
)
1517 Py_XDECREF(state
->string
);
1521 /* calculate offset from start of string */
1522 #define STATE_OFFSET(state, member)\
1523 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1526 state_getslice(SRE_STATE
* state
, int index
, PyObject
* string
, int empty
)
1530 index
= (index
- 1) * 2;
1532 if (string
== Py_None
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1534 /* want empty string */
1541 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1542 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1545 return PySequence_GetSlice(string
, i
, j
);
1549 pattern_error(int status
)
1552 case SRE_ERROR_RECURSION_LIMIT
:
1555 "maximum recursion limit exceeded"
1558 case SRE_ERROR_MEMORY
:
1562 /* other error codes indicate compiler/engine bugs */
1565 "internal error in regular expression engine"
1571 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
1573 /* create match object (from state object) */
1582 /* create match object (with room for extra group marks) */
1583 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
1584 2*(pattern
->groups
+1));
1589 match
->pattern
= pattern
;
1591 Py_INCREF(state
->string
);
1592 match
->string
= state
->string
;
1595 match
->groups
= pattern
->groups
+1;
1597 /* fill in group slices */
1599 base
= (char*) state
->beginning
;
1600 n
= state
->charsize
;
1602 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
1603 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
1605 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
1606 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
1607 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
1608 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
1610 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
1612 match
->pos
= state
->pos
;
1613 match
->endpos
= state
->endpos
;
1615 match
->lastindex
= state
->lastindex
;
1617 return (PyObject
*) match
;
1619 } else if (status
== 0) {
1627 /* internal error */
1628 pattern_error(status
);
1633 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
1635 /* create search state object */
1637 ScannerObject
* self
;
1642 if (!PyArg_ParseTuple(args
, "O|ii:scanner", &string
, &start
, &end
))
1645 /* create scanner object */
1646 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
1650 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
1657 self
->pattern
= (PyObject
*) pattern
;
1659 return (PyObject
*) self
;
1663 pattern_dealloc(PatternObject
* self
)
1665 Py_XDECREF(self
->pattern
);
1666 Py_XDECREF(self
->groupindex
);
1667 Py_XDECREF(self
->indexgroup
);
1672 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1680 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1681 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:match", kwlist
,
1682 &string
, &start
, &end
))
1685 string
= state_init(&state
, self
, string
, start
, end
);
1689 state
.ptr
= state
.start
;
1691 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1693 if (state
.charsize
== 1) {
1694 status
= sre_match(&state
, PatternObject_GetCode(self
), 1);
1696 #if defined(HAVE_UNICODE)
1697 status
= sre_umatch(&state
, PatternObject_GetCode(self
), 1);
1701 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1705 return pattern_new_match(self
, &state
, status
);
1709 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1717 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1718 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:search", kwlist
,
1719 &string
, &start
, &end
))
1722 string
= state_init(&state
, self
, string
, start
, end
);
1726 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1728 if (state
.charsize
== 1) {
1729 status
= sre_search(&state
, PatternObject_GetCode(self
));
1731 #if defined(HAVE_UNICODE)
1732 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1736 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1740 return pattern_new_match(self
, &state
, status
);
1744 call(char* module
, char* function
, PyObject
* args
)
1753 name
= PyString_FromString(module
);
1756 mod
= PyImport_Import(name
);
1760 func
= PyObject_GetAttrString(mod
, function
);
1764 result
= PyObject_CallObject(func
, args
);
1770 #ifdef USE_BUILTIN_COPY
1772 deepcopy(PyObject
** object
, PyObject
* memo
)
1778 Py_BuildValue("OO", *object
, memo
)
1786 return 1; /* success */
1791 join(PyObject
* list
, PyObject
* pattern
)
1793 /* join list elements */
1796 #if PY_VERSION_HEX >= 0x01060000
1802 switch (PyList_GET_SIZE(list
)) {
1805 return PyString_FromString("");
1807 result
= PyList_GET_ITEM(list
, 0);
1813 /* two or more elements: slice out a suitable separator from the
1814 first member, and use that to join the entire list */
1816 joiner
= PySequence_GetSlice(pattern
, 0, 0);
1820 #if PY_VERSION_HEX >= 0x01060000
1821 function
= PyObject_GetAttrString(joiner
, "join");
1826 args
= PyTuple_New(1);
1828 Py_DECREF(function
);
1832 PyTuple_SET_ITEM(args
, 0, list
);
1833 result
= PyObject_CallObject(function
, args
);
1834 Py_DECREF(args
); /* also removes list */
1835 Py_DECREF(function
);
1839 Py_BuildValue("OO", list
, joiner
)
1848 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1858 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
1859 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|ii:findall", kwlist
,
1860 &string
, &start
, &end
))
1863 string
= state_init(&state
, self
, string
, start
, end
);
1867 list
= PyList_New(0);
1873 while (state
.start
<= state
.end
) {
1877 state_reset(&state
);
1879 state
.ptr
= state
.start
;
1881 if (state
.charsize
== 1) {
1882 status
= sre_search(&state
, PatternObject_GetCode(self
));
1884 #if defined(HAVE_UNICODE)
1885 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1892 pattern_error(status
);
1896 /* don't bother to build a match object */
1897 switch (self
->groups
) {
1899 b
= STATE_OFFSET(&state
, state
.start
);
1900 e
= STATE_OFFSET(&state
, state
.ptr
);
1901 item
= PySequence_GetSlice(string
, b
, e
);
1906 item
= state_getslice(&state
, 1, string
, 1);
1911 item
= PyTuple_New(self
->groups
);
1914 for (i
= 0; i
< self
->groups
; i
++) {
1915 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
1920 PyTuple_SET_ITEM(item
, i
, o
);
1925 status
= PyList_Append(list
, item
);
1930 if (state
.ptr
== state
.start
)
1931 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
1933 state
.start
= state
.ptr
;
1947 #if PY_VERSION_HEX >= 0x02020000
1949 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
1955 scanner
= pattern_scanner(pattern
, args
);
1959 search
= PyObject_GetAttrString(scanner
, "search");
1964 iterator
= PyCallIter_New(search
, Py_None
);
1972 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1984 static char* kwlist
[] = { "source", "maxsplit", NULL
};
1985 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|i:split", kwlist
,
1986 &string
, &maxsplit
))
1989 string
= state_init(&state
, self
, string
, 0, INT_MAX
);
1993 list
= PyList_New(0);
2002 while (!maxsplit
|| n
< maxsplit
) {
2004 state_reset(&state
);
2006 state
.ptr
= state
.start
;
2008 if (state
.charsize
== 1) {
2009 status
= sre_search(&state
, PatternObject_GetCode(self
));
2011 #if defined(HAVE_UNICODE)
2012 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2019 pattern_error(status
);
2023 if (state
.start
== state
.ptr
) {
2024 if (last
== state
.end
)
2026 /* skip one character */
2027 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2031 /* get segment before this match */
2032 item
= PySequence_GetSlice(
2033 string
, STATE_OFFSET(&state
, last
),
2034 STATE_OFFSET(&state
, state
.start
)
2038 status
= PyList_Append(list
, item
);
2043 /* add groups (if any) */
2044 for (i
= 0; i
< self
->groups
; i
++) {
2045 item
= state_getslice(&state
, i
+1, string
, 0);
2048 status
= PyList_Append(list
, item
);
2056 last
= state
.start
= state
.ptr
;
2060 /* get segment following last match (even if empty) */
2061 item
= PySequence_GetSlice(
2062 string
, STATE_OFFSET(&state
, last
), state
.endpos
2066 status
= PyList_Append(list
, item
);
2082 pattern_subx(PatternObject
* self
, PyObject
* template, PyObject
* string
,
2083 int count
, int subn
)
2095 int filter_is_callable
;
2097 if (PyCallable_Check(template)) {
2098 /* sub/subn takes either a function or a template */
2101 filter_is_callable
= 1;
2103 /* if not callable, check if it's a literal string */
2105 ptr
= getstring(template, &n
, &b
);
2108 literal
= sre_literal_template(ptr
, n
);
2110 #if defined(HAVE_UNICODE)
2111 literal
= sre_uliteral_template(ptr
, n
);
2121 filter_is_callable
= 0;
2123 /* not a literal; hand it over to the template compiler */
2125 SRE_MODULE
, "_subx",
2126 Py_BuildValue("OO", self
, template)
2130 filter_is_callable
= PyCallable_Check(filter
);
2134 string
= state_init(&state
, self
, string
, 0, INT_MAX
);
2140 list
= PyList_New(0);
2149 while (!count
|| n
< count
) {
2151 state_reset(&state
);
2153 state
.ptr
= state
.start
;
2155 if (state
.charsize
== 1) {
2156 status
= sre_search(&state
, PatternObject_GetCode(self
));
2158 #if defined(HAVE_UNICODE)
2159 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2166 pattern_error(status
);
2170 b
= STATE_OFFSET(&state
, state
.start
);
2171 e
= STATE_OFFSET(&state
, state
.ptr
);
2174 /* get segment before this match */
2175 item
= PySequence_GetSlice(string
, i
, b
);
2178 status
= PyList_Append(list
, item
);
2183 } else if (i
== b
&& i
== e
&& n
> 0)
2184 /* ignore empty match on latest position */
2187 if (filter_is_callable
) {
2188 /* pass match object through filter */
2189 match
= pattern_new_match(self
, &state
, 1);
2192 args
= Py_BuildValue("(O)", match
);
2197 item
= PyObject_CallObject(filter
, args
);
2203 /* filter is literal string */
2209 if (item
!= Py_None
) {
2210 status
= PyList_Append(list
, item
);
2221 if (state
.ptr
== state
.start
)
2222 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2224 state
.start
= state
.ptr
;
2228 /* get segment following last match */
2229 if (i
< state
.endpos
) {
2230 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2233 status
= PyList_Append(list
, item
);
2243 /* convert list to single string (also removes list) */
2244 item
= join(list
, self
->pattern
);
2250 return Py_BuildValue("Ni", item
, n
);
2263 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2268 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2269 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|i:sub", kwlist
,
2270 &template, &string
, &count
))
2273 return pattern_subx(self
, template, string
, count
, 0);
2277 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2282 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2283 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|i:subn", kwlist
,
2284 &template, &string
, &count
))
2287 return pattern_subx(self
, template, string
, count
, 1);
2291 pattern_copy(PatternObject
* self
, PyObject
* args
)
2293 #ifdef USE_BUILTIN_COPY
2294 PatternObject
* copy
;
2297 if (args
!= Py_None
&& !PyArg_ParseTuple(args
, ":__copy__"))
2300 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2304 offset
= offsetof(PatternObject
, groups
);
2306 Py_XINCREF(self
->groupindex
);
2307 Py_XINCREF(self
->indexgroup
);
2308 Py_XINCREF(self
->pattern
);
2310 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2311 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2313 return (PyObject
*) copy
;
2315 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2321 pattern_deepcopy(PatternObject
* self
, PyObject
* args
)
2323 #ifdef USE_BUILTIN_COPY
2324 PatternObject
* copy
;
2327 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
2330 copy
= (PatternObject
*) pattern_copy(self
, Py_None
);
2334 if (!deepcopy(©
->groupindex
, memo
) ||
2335 !deepcopy(©
->indexgroup
, memo
) ||
2336 !deepcopy(©
->pattern
, memo
)) {
2342 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2347 static PyMethodDef pattern_methods
[] = {
2348 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
},
2349 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
},
2350 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
},
2351 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
},
2352 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
},
2353 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
},
2354 #if PY_VERSION_HEX >= 0x02020000
2355 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
},
2357 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2358 {"__copy__", (PyCFunction
) pattern_copy
, METH_VARARGS
},
2359 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_VARARGS
},
2364 pattern_getattr(PatternObject
* self
, char* name
)
2368 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2376 if (!strcmp(name
, "pattern")) {
2377 Py_INCREF(self
->pattern
);
2378 return self
->pattern
;
2381 if (!strcmp(name
, "flags"))
2382 return Py_BuildValue("i", self
->flags
);
2384 if (!strcmp(name
, "groups"))
2385 return Py_BuildValue("i", self
->groups
);
2387 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2388 Py_INCREF(self
->groupindex
);
2389 return self
->groupindex
;
2392 PyErr_SetString(PyExc_AttributeError
, name
);
2396 statichere PyTypeObject Pattern_Type
= {
2397 PyObject_HEAD_INIT(NULL
)
2398 0, "_" SRE_MODULE
".SRE_Pattern",
2399 sizeof(PatternObject
), sizeof(SRE_CODE
),
2400 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2402 (getattrfunc
)pattern_getattr
/*tp_getattr*/
2405 /* -------------------------------------------------------------------- */
2409 match_dealloc(MatchObject
* self
)
2411 Py_XDECREF(self
->regs
);
2412 Py_XDECREF(self
->string
);
2413 Py_DECREF(self
->pattern
);
2418 match_getslice_by_index(MatchObject
* self
, int index
, PyObject
* def
)
2420 if (index
< 0 || index
>= self
->groups
) {
2421 /* raise IndexError if we were given a bad group number */
2431 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2432 /* return default value if the string or group is undefined */
2437 return PySequence_GetSlice(
2438 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2443 match_getindex(MatchObject
* self
, PyObject
* index
)
2447 if (PyInt_Check(index
))
2448 return (int) PyInt_AS_LONG(index
);
2452 if (self
->pattern
->groupindex
) {
2453 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2455 if (PyInt_Check(index
))
2456 i
= (int) PyInt_AS_LONG(index
);
2466 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2468 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2472 match_expand(MatchObject
* self
, PyObject
* args
)
2475 if (!PyArg_ParseTuple(args
, "O:expand", &template))
2478 /* delegate to Python code */
2480 SRE_MODULE
, "_expand",
2481 Py_BuildValue("OOO", self
->pattern
, self
, template)
2486 match_group(MatchObject
* self
, PyObject
* args
)
2491 size
= PyTuple_GET_SIZE(args
);
2495 result
= match_getslice(self
, Py_False
, Py_None
);
2498 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2501 /* fetch multiple items */
2502 result
= PyTuple_New(size
);
2505 for (i
= 0; i
< size
; i
++) {
2506 PyObject
* item
= match_getslice(
2507 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2513 PyTuple_SET_ITEM(result
, i
, item
);
2521 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2526 PyObject
* def
= Py_None
;
2527 static char* kwlist
[] = { "default", NULL
};
2528 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2531 result
= PyTuple_New(self
->groups
-1);
2535 for (index
= 1; index
< self
->groups
; index
++) {
2537 item
= match_getslice_by_index(self
, index
, def
);
2542 PyTuple_SET_ITEM(result
, index
-1, item
);
2549 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2555 PyObject
* def
= Py_None
;
2556 static char* kwlist
[] = { "default", NULL
};
2557 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2560 result
= PyDict_New();
2561 if (!result
|| !self
->pattern
->groupindex
)
2564 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2568 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2572 key
= PyList_GET_ITEM(keys
, index
);
2575 value
= match_getslice(self
, key
, def
);
2580 status
= PyDict_SetItem(result
, key
, value
);
2597 match_start(MatchObject
* self
, PyObject
* args
)
2601 PyObject
* index_
= Py_False
; /* zero */
2602 if (!PyArg_ParseTuple(args
, "|O:start", &index_
))
2605 index
= match_getindex(self
, index_
);
2607 if (index
< 0 || index
>= self
->groups
) {
2615 /* mark is -1 if group is undefined */
2616 return Py_BuildValue("i", self
->mark
[index
*2]);
2620 match_end(MatchObject
* self
, PyObject
* args
)
2624 PyObject
* index_
= Py_False
; /* zero */
2625 if (!PyArg_ParseTuple(args
, "|O:end", &index_
))
2628 index
= match_getindex(self
, index_
);
2630 if (index
< 0 || index
>= self
->groups
) {
2638 /* mark is -1 if group is undefined */
2639 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2643 _pair(int i1
, int i2
)
2648 pair
= PyTuple_New(2);
2652 item
= PyInt_FromLong(i1
);
2655 PyTuple_SET_ITEM(pair
, 0, item
);
2657 item
= PyInt_FromLong(i2
);
2660 PyTuple_SET_ITEM(pair
, 1, item
);
2670 match_span(MatchObject
* self
, PyObject
* args
)
2674 PyObject
* index_
= Py_False
; /* zero */
2675 if (!PyArg_ParseTuple(args
, "|O:span", &index_
))
2678 index
= match_getindex(self
, index_
);
2680 if (index
< 0 || index
>= self
->groups
) {
2688 /* marks are -1 if group is undefined */
2689 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2693 match_regs(MatchObject
* self
)
2699 regs
= PyTuple_New(self
->groups
);
2703 for (index
= 0; index
< self
->groups
; index
++) {
2704 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2709 PyTuple_SET_ITEM(regs
, index
, item
);
2719 match_copy(MatchObject
* self
, PyObject
* args
)
2721 #ifdef USE_BUILTIN_COPY
2725 if (args
!= Py_None
&& !PyArg_ParseTuple(args
, ":__copy__"))
2728 slots
= 2 * (self
->pattern
->groups
+1);
2730 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
2734 /* this value a constant, but any compiler should be able to
2735 figure that out all by itself */
2736 offset
= offsetof(MatchObject
, string
);
2738 Py_XINCREF(self
->pattern
);
2739 Py_XINCREF(self
->string
);
2740 Py_XINCREF(self
->regs
);
2742 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2743 sizeof(MatchObject
) + slots
* sizeof(int) - offset
);
2745 return (PyObject
*) copy
;
2747 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
2753 match_deepcopy(MatchObject
* self
, PyObject
* args
)
2755 #ifdef USE_BUILTIN_COPY
2759 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
2762 copy
= (MatchObject
*) match_copy(self
, Py_None
);
2766 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
2767 !deepcopy(©
->string
, memo
) ||
2768 !deepcopy(©
->regs
, memo
)) {
2774 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
2779 static PyMethodDef match_methods
[] = {
2780 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
2781 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
2782 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
2783 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
2784 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
2785 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
2786 {"expand", (PyCFunction
) match_expand
, METH_VARARGS
},
2787 {"__copy__", (PyCFunction
) match_copy
, METH_VARARGS
},
2788 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_VARARGS
},
2793 match_getattr(MatchObject
* self
, char* name
)
2797 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
2803 if (!strcmp(name
, "lastindex")) {
2804 if (self
->lastindex
>= 0)
2805 return Py_BuildValue("i", self
->lastindex
);
2810 if (!strcmp(name
, "lastgroup")) {
2811 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
2812 PyObject
* result
= PySequence_GetItem(
2813 self
->pattern
->indexgroup
, self
->lastindex
2823 if (!strcmp(name
, "string")) {
2825 Py_INCREF(self
->string
);
2826 return self
->string
;
2833 if (!strcmp(name
, "regs")) {
2835 Py_INCREF(self
->regs
);
2838 return match_regs(self
);
2841 if (!strcmp(name
, "re")) {
2842 Py_INCREF(self
->pattern
);
2843 return (PyObject
*) self
->pattern
;
2846 if (!strcmp(name
, "pos"))
2847 return Py_BuildValue("i", self
->pos
);
2849 if (!strcmp(name
, "endpos"))
2850 return Py_BuildValue("i", self
->endpos
);
2852 PyErr_SetString(PyExc_AttributeError
, name
);
2856 /* FIXME: implement setattr("string", None) as a special case (to
2857 detach the associated string, if any */
2859 statichere PyTypeObject Match_Type
= {
2860 PyObject_HEAD_INIT(NULL
)
2861 0, "_" SRE_MODULE
".SRE_Match",
2862 sizeof(MatchObject
), sizeof(int),
2863 (destructor
)match_dealloc
, /*tp_dealloc*/
2865 (getattrfunc
)match_getattr
/*tp_getattr*/
2868 /* -------------------------------------------------------------------- */
2869 /* scanner methods (experimental) */
2872 scanner_dealloc(ScannerObject
* self
)
2874 state_fini(&self
->state
);
2875 Py_DECREF(self
->pattern
);
2880 scanner_match(ScannerObject
* self
, PyObject
* args
)
2882 SRE_STATE
* state
= &self
->state
;
2888 state
->ptr
= state
->start
;
2890 if (state
->charsize
== 1) {
2891 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
), 1);
2893 #if defined(HAVE_UNICODE)
2894 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
), 1);
2898 match
= pattern_new_match((PatternObject
*) self
->pattern
,
2901 if (status
== 0 || state
->ptr
== state
->start
)
2902 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
2904 state
->start
= state
->ptr
;
2911 scanner_search(ScannerObject
* self
, PyObject
* args
)
2913 SRE_STATE
* state
= &self
->state
;
2919 state
->ptr
= state
->start
;
2921 if (state
->charsize
== 1) {
2922 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
2924 #if defined(HAVE_UNICODE)
2925 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
2929 match
= pattern_new_match((PatternObject
*) self
->pattern
,
2932 if (status
== 0 || state
->ptr
== state
->start
)
2933 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
2935 state
->start
= state
->ptr
;
2940 static PyMethodDef scanner_methods
[] = {
2941 {"match", (PyCFunction
) scanner_match
, 0},
2942 {"search", (PyCFunction
) scanner_search
, 0},
2947 scanner_getattr(ScannerObject
* self
, char* name
)
2951 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
2958 if (!strcmp(name
, "pattern")) {
2959 Py_INCREF(self
->pattern
);
2960 return self
->pattern
;
2963 PyErr_SetString(PyExc_AttributeError
, name
);
2967 statichere PyTypeObject Scanner_Type
= {
2968 PyObject_HEAD_INIT(NULL
)
2969 0, "_" SRE_MODULE
".SRE_Scanner",
2970 sizeof(ScannerObject
), 0,
2971 (destructor
)scanner_dealloc
, /*tp_dealloc*/
2973 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
2976 static PyMethodDef _functions
[] = {
2977 {"compile", _compile
, 1},
2978 {"getcodesize", sre_codesize
, 1},
2979 {"getlower", sre_getlower
, 1},
2990 /* Patch object types */
2991 Pattern_Type
.ob_type
= Match_Type
.ob_type
=
2992 Scanner_Type
.ob_type
= &PyType_Type
;
2994 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
2995 d
= PyModule_GetDict(m
);
2997 x
= PyInt_FromLong(SRE_MAGIC
);
2999 PyDict_SetItemString(d
, "MAGIC", x
);
3003 x
= PyString_FromString(copyright
);
3005 PyDict_SetItemString(d
, "copyright", x
);
3010 #endif /* !defined(SRE_RECURSIVE) */