Apparently the code to forestall Tk eating events was too aggressive (Tk user input...
[python/dscho.git] / Modules / _sre.c
blob308b7260b57f96fa7c4e3fd93e02f03849cc397d
1 /*
2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
12 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
14 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
16 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
23 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
24 * 2000-10-24 fl really fixed assert_not; reset groups in findall
25 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
27 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
28 * 2001-01-16 fl fixed memory leak in pattern destructor
29 * 2001-03-20 fl lots of fixes for 2.1b2
30 * 2001-04-15 fl export copyright as Python attribute, not global
32 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
34 * This version of the SRE library can be redistributed under CNRI's
35 * Python 1.6 license. For any other use, please contact Secret Labs
36 * AB (info@pythonware.com).
38 * Portions of this engine have been developed in cooperation with
39 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
40 * other compatibility work.
43 #ifndef SRE_RECURSIVE
45 static char copyright[] =
46 " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB ";
48 #include "Python.h"
50 #include "sre.h"
52 #include <ctype.h>
54 /* name of this module, minus the leading underscore */
55 #if !defined(SRE_MODULE)
56 #define SRE_MODULE "sre"
57 #endif
59 /* defining this one enables tracing */
60 #undef VERBOSE
62 #if PY_VERSION_HEX >= 0x01060000
63 /* defining this enables unicode support (default under 1.6a1 and later) */
64 #define HAVE_UNICODE
65 #endif
67 /* -------------------------------------------------------------------- */
68 /* optional features */
70 /* prevent run-away recursion (bad patterns on long strings) */
72 #if !defined(USE_STACKCHECK)
73 #if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
74 /* require smaller recursion limit for a number of 64-bit platforms:
75 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
76 /* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
77 #define USE_RECURSION_LIMIT 7500
78 #else
79 #define USE_RECURSION_LIMIT 10000
80 #endif
81 #endif
83 /* enables fast searching */
84 #define USE_FAST_SEARCH
86 /* enables aggressive inlining (always on for Visual C) */
87 #undef USE_INLINE
89 #if PY_VERSION_HEX < 0x01060000
90 #define PyObject_DEL(op) PyMem_DEL((op))
91 #endif
93 /* -------------------------------------------------------------------- */
95 #if defined(_MSC_VER)
96 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
97 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
98 /* fastest possible local call under MSVC */
99 #define LOCAL(type) static __inline type __fastcall
100 #elif defined(USE_INLINE)
101 #define LOCAL(type) static inline type
102 #else
103 #define LOCAL(type) static type
104 #endif
106 /* error codes */
107 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
108 #define SRE_ERROR_STATE -2 /* illegal state */
109 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
110 #define SRE_ERROR_MEMORY -9 /* out of memory */
112 #if defined(VERBOSE)
113 #define TRACE(v) printf v
114 #else
115 #define TRACE(v)
116 #endif
118 /* -------------------------------------------------------------------- */
119 /* search engine state */
121 /* default character predicates (run sre_chars.py to regenerate tables) */
123 #define SRE_DIGIT_MASK 1
124 #define SRE_SPACE_MASK 2
125 #define SRE_LINEBREAK_MASK 4
126 #define SRE_ALNUM_MASK 8
127 #define SRE_WORD_MASK 16
129 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
130 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
131 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
132 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
133 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
134 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
135 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
137 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
138 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
139 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
140 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
141 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
142 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
143 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
144 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
145 120, 121, 122, 123, 124, 125, 126, 127 };
147 #define SRE_IS_DIGIT(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
149 #define SRE_IS_SPACE(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
151 #define SRE_IS_LINEBREAK(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
153 #define SRE_IS_ALNUM(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
155 #define SRE_IS_WORD(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
158 static unsigned int sre_lower(unsigned int ch)
160 return ((ch) < 128 ? sre_char_lower[ch] : ch);
163 /* locale-specific character predicates */
165 #define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
166 #define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
167 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
168 #define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
169 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
171 static unsigned int sre_lower_locale(unsigned int ch)
173 return ((ch) < 256 ? tolower((ch)) : ch);
176 /* unicode-specific character predicates */
178 #if defined(HAVE_UNICODE)
180 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
181 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
182 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
183 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
184 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
186 static unsigned int sre_lower_unicode(unsigned int ch)
188 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
191 #endif
193 LOCAL(int)
194 sre_category(SRE_CODE category, unsigned int ch)
196 switch (category) {
198 case SRE_CATEGORY_DIGIT:
199 return SRE_IS_DIGIT(ch);
200 case SRE_CATEGORY_NOT_DIGIT:
201 return !SRE_IS_DIGIT(ch);
202 case SRE_CATEGORY_SPACE:
203 return SRE_IS_SPACE(ch);
204 case SRE_CATEGORY_NOT_SPACE:
205 return !SRE_IS_SPACE(ch);
206 case SRE_CATEGORY_WORD:
207 return SRE_IS_WORD(ch);
208 case SRE_CATEGORY_NOT_WORD:
209 return !SRE_IS_WORD(ch);
210 case SRE_CATEGORY_LINEBREAK:
211 return SRE_IS_LINEBREAK(ch);
212 case SRE_CATEGORY_NOT_LINEBREAK:
213 return !SRE_IS_LINEBREAK(ch);
215 case SRE_CATEGORY_LOC_WORD:
216 return SRE_LOC_IS_WORD(ch);
217 case SRE_CATEGORY_LOC_NOT_WORD:
218 return !SRE_LOC_IS_WORD(ch);
220 #if defined(HAVE_UNICODE)
221 case SRE_CATEGORY_UNI_DIGIT:
222 return SRE_UNI_IS_DIGIT(ch);
223 case SRE_CATEGORY_UNI_NOT_DIGIT:
224 return !SRE_UNI_IS_DIGIT(ch);
225 case SRE_CATEGORY_UNI_SPACE:
226 return SRE_UNI_IS_SPACE(ch);
227 case SRE_CATEGORY_UNI_NOT_SPACE:
228 return !SRE_UNI_IS_SPACE(ch);
229 case SRE_CATEGORY_UNI_WORD:
230 return SRE_UNI_IS_WORD(ch);
231 case SRE_CATEGORY_UNI_NOT_WORD:
232 return !SRE_UNI_IS_WORD(ch);
233 case SRE_CATEGORY_UNI_LINEBREAK:
234 return SRE_UNI_IS_LINEBREAK(ch);
235 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
236 return !SRE_UNI_IS_LINEBREAK(ch);
237 #else
238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_LOC_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_LOC_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_IS_LINEBREAK(ch);
254 #endif
256 return 0;
259 /* helpers */
261 static void
262 mark_fini(SRE_STATE* state)
264 if (state->mark_stack) {
265 free(state->mark_stack);
266 state->mark_stack = NULL;
268 state->mark_stack_size = state->mark_stack_base = 0;
271 static int
272 mark_save(SRE_STATE* state, int lo, int hi)
274 void* stack;
275 int size;
276 int minsize, newsize;
278 if (hi <= lo)
279 return 0;
281 size = (hi - lo) + 1;
283 newsize = state->mark_stack_size;
284 minsize = state->mark_stack_base + size;
286 if (newsize < minsize) {
287 /* create new stack */
288 if (!newsize) {
289 newsize = 512;
290 if (newsize < minsize)
291 newsize = minsize;
292 TRACE(("allocate stack %d\n", newsize));
293 stack = malloc(sizeof(void*) * newsize);
294 } else {
295 /* grow the stack */
296 while (newsize < minsize)
297 newsize += newsize;
298 TRACE(("grow stack to %d\n", newsize));
299 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
301 if (!stack) {
302 mark_fini(state);
303 return SRE_ERROR_MEMORY;
305 state->mark_stack = stack;
306 state->mark_stack_size = newsize;
309 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
311 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
312 size * sizeof(void*));
314 state->mark_stack_base += size;
316 return 0;
319 static int
320 mark_restore(SRE_STATE* state, int lo, int hi)
322 int size;
324 if (hi <= lo)
325 return 0;
327 size = (hi - lo) + 1;
329 state->mark_stack_base -= size;
331 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
333 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
334 size * sizeof(void*));
336 return 0;
339 /* generate 8-bit version */
341 #define SRE_CHAR unsigned char
342 #define SRE_AT sre_at
343 #define SRE_COUNT sre_count
344 #define SRE_CHARSET sre_charset
345 #define SRE_INFO sre_info
346 #define SRE_MATCH sre_match
347 #define SRE_SEARCH sre_search
349 #if defined(HAVE_UNICODE)
351 #define SRE_RECURSIVE
352 #include "_sre.c"
353 #undef SRE_RECURSIVE
355 #undef SRE_SEARCH
356 #undef SRE_MATCH
357 #undef SRE_INFO
358 #undef SRE_CHARSET
359 #undef SRE_COUNT
360 #undef SRE_AT
361 #undef SRE_CHAR
363 /* generate 16-bit unicode version */
365 #define SRE_CHAR Py_UNICODE
366 #define SRE_AT sre_uat
367 #define SRE_COUNT sre_ucount
368 #define SRE_CHARSET sre_ucharset
369 #define SRE_INFO sre_uinfo
370 #define SRE_MATCH sre_umatch
371 #define SRE_SEARCH sre_usearch
372 #endif
374 #endif /* SRE_RECURSIVE */
376 /* -------------------------------------------------------------------- */
377 /* String matching engine */
379 /* the following section is compiled twice, with different character
380 settings */
382 LOCAL(int)
383 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
385 /* check if pointer is at given position */
387 int this, that;
389 switch (at) {
391 case SRE_AT_BEGINNING:
392 case SRE_AT_BEGINNING_STRING:
393 return ((void*) ptr == state->beginning);
395 case SRE_AT_BEGINNING_LINE:
396 return ((void*) ptr == state->beginning ||
397 SRE_IS_LINEBREAK((int) ptr[-1]));
399 case SRE_AT_END:
400 return (((void*) (ptr+1) == state->end &&
401 SRE_IS_LINEBREAK((int) ptr[0])) ||
402 ((void*) ptr == state->end));
404 case SRE_AT_END_LINE:
405 return ((void*) ptr == state->end ||
406 SRE_IS_LINEBREAK((int) ptr[0]));
408 case SRE_AT_END_STRING:
409 return ((void*) ptr == state->end);
411 case SRE_AT_BOUNDARY:
412 if (state->beginning == state->end)
413 return 0;
414 that = ((void*) ptr > state->beginning) ?
415 SRE_IS_WORD((int) ptr[-1]) : 0;
416 this = ((void*) ptr < state->end) ?
417 SRE_IS_WORD((int) ptr[0]) : 0;
418 return this != that;
420 case SRE_AT_NON_BOUNDARY:
421 if (state->beginning == state->end)
422 return 0;
423 that = ((void*) ptr > state->beginning) ?
424 SRE_IS_WORD((int) ptr[-1]) : 0;
425 this = ((void*) ptr < state->end) ?
426 SRE_IS_WORD((int) ptr[0]) : 0;
427 return this == that;
429 case SRE_AT_LOC_BOUNDARY:
430 if (state->beginning == state->end)
431 return 0;
432 that = ((void*) ptr > state->beginning) ?
433 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
434 this = ((void*) ptr < state->end) ?
435 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
436 return this != that;
438 case SRE_AT_LOC_NON_BOUNDARY:
439 if (state->beginning == state->end)
440 return 0;
441 that = ((void*) ptr > state->beginning) ?
442 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
443 this = ((void*) ptr < state->end) ?
444 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
445 return this == that;
447 case SRE_AT_UNI_BOUNDARY:
448 if (state->beginning == state->end)
449 return 0;
450 that = ((void*) ptr > state->beginning) ?
451 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
452 this = ((void*) ptr < state->end) ?
453 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
454 return this != that;
456 case SRE_AT_UNI_NON_BOUNDARY:
457 if (state->beginning == state->end)
458 return 0;
459 that = ((void*) ptr > state->beginning) ?
460 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
461 this = ((void*) ptr < state->end) ?
462 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
463 return this == that;
466 return 0;
469 LOCAL(int)
470 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
472 /* check if character is a member of the given set */
474 int ok = 1;
476 for (;;) {
477 switch (*set++) {
479 case SRE_OP_LITERAL:
480 /* <LITERAL> <code> */
481 if (ch == set[0])
482 return ok;
483 set++;
484 break;
486 case SRE_OP_RANGE:
487 /* <RANGE> <lower> <upper> */
488 if (set[0] <= ch && ch <= set[1])
489 return ok;
490 set += 2;
491 break;
493 case SRE_OP_CHARSET:
494 /* <CHARSET> <bitmap> (16 bits per code word) */
495 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
496 return ok;
497 set += 16;
498 break;
500 case SRE_OP_CATEGORY:
501 /* <CATEGORY> <code> */
502 if (sre_category(set[0], (int) ch))
503 return ok;
504 set += 1;
505 break;
507 case SRE_OP_NEGATE:
508 ok = !ok;
509 break;
511 case SRE_OP_FAILURE:
512 return !ok;
514 default:
515 /* internal error -- there's not much we can do about it
516 here, so let's just pretend it didn't match... */
517 return 0;
522 LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
524 LOCAL(int)
525 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
527 SRE_CODE chr;
528 SRE_CHAR* ptr = state->ptr;
529 SRE_CHAR* end = state->end;
530 int i;
532 /* adjust end */
533 if (maxcount < end - ptr && maxcount != 65535)
534 end = ptr + maxcount;
536 switch (pattern[0]) {
538 case SRE_OP_ANY:
539 /* repeated dot wildcard. */
540 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
541 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
542 ptr++;
543 break;
545 case SRE_OP_ANY_ALL:
546 /* repeated dot wildcare. skip to the end of the target
547 string, and backtrack from there */
548 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
549 ptr = end;
550 break;
552 case SRE_OP_LITERAL:
553 /* repeated literal */
554 chr = pattern[1];
555 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
556 while (ptr < end && (SRE_CODE) *ptr == chr)
557 ptr++;
558 break;
560 case SRE_OP_LITERAL_IGNORE:
561 /* repeated literal */
562 chr = pattern[1];
563 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
564 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
565 ptr++;
566 break;
568 case SRE_OP_NOT_LITERAL:
569 /* repeated non-literal */
570 chr = pattern[1];
571 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
572 while (ptr < end && (SRE_CODE) *ptr != chr)
573 ptr++;
574 break;
576 case SRE_OP_NOT_LITERAL_IGNORE:
577 /* repeated non-literal */
578 chr = pattern[1];
579 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
580 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
581 ptr++;
582 break;
584 case SRE_OP_IN:
585 /* repeated set */
586 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
587 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
588 ptr++;
589 break;
591 default:
592 /* repeated single character pattern */
593 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
594 while ((SRE_CHAR*) state->ptr < end) {
595 i = SRE_MATCH(state, pattern, level);
596 if (i < 0)
597 return i;
598 if (!i)
599 break;
601 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
602 (SRE_CHAR*) state->ptr - ptr));
603 return (SRE_CHAR*) state->ptr - ptr;
606 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
607 return ptr - (SRE_CHAR*) state->ptr;
610 #if 0 /* not used in this release */
611 LOCAL(int)
612 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
614 /* check if an SRE_OP_INFO block matches at the current position.
615 returns the number of SRE_CODE objects to skip if successful, 0
616 if no match */
618 SRE_CHAR* end = state->end;
619 SRE_CHAR* ptr = state->ptr;
620 int i;
622 /* check minimal length */
623 if (pattern[3] && (end - ptr) < pattern[3])
624 return 0;
626 /* check known prefix */
627 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
628 /* <length> <skip> <prefix data> <overlap data> */
629 for (i = 0; i < pattern[5]; i++)
630 if ((SRE_CODE) ptr[i] != pattern[7 + i])
631 return 0;
632 return pattern[0] + 2 * pattern[6];
634 return pattern[0];
636 #endif
638 LOCAL(int)
639 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
641 /* check if string matches the given pattern. returns <0 for
642 error, 0 for failure, and 1 for success */
644 SRE_CHAR* end = state->end;
645 SRE_CHAR* ptr = state->ptr;
646 int i, count;
647 SRE_REPEAT* rp;
648 int lastmark;
649 SRE_CODE chr;
651 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
653 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
655 #if defined(USE_STACKCHECK)
656 if (level % 10 == 0 && PyOS_CheckStack())
657 return SRE_ERROR_RECURSION_LIMIT;
658 #endif
660 #if defined(USE_RECURSION_LIMIT)
661 if (level > USE_RECURSION_LIMIT)
662 return SRE_ERROR_RECURSION_LIMIT;
663 #endif
665 if (pattern[0] == SRE_OP_INFO) {
666 /* optimization info block */
667 /* <INFO> <1=skip> <2=flags> <3=min> ... */
668 if (pattern[3] && (end - ptr) < pattern[3]) {
669 TRACE(("reject (got %d chars, need %d)\n",
670 (end - ptr), pattern[3]));
671 return 0;
673 pattern += pattern[1] + 1;
676 for (;;) {
678 switch (*pattern++) {
680 case SRE_OP_FAILURE:
681 /* immediate failure */
682 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
683 return 0;
685 case SRE_OP_SUCCESS:
686 /* end of pattern */
687 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
688 state->ptr = ptr;
689 return 1;
691 case SRE_OP_AT:
692 /* match at given position */
693 /* <AT> <code> */
694 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
695 if (!SRE_AT(state, ptr, *pattern))
696 return 0;
697 pattern++;
698 break;
700 case SRE_OP_CATEGORY:
701 /* match at given category */
702 /* <CATEGORY> <code> */
703 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
704 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
705 return 0;
706 pattern++;
707 ptr++;
708 break;
710 case SRE_OP_LITERAL:
711 /* match literal string */
712 /* <LITERAL> <code> */
713 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
714 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
715 return 0;
716 pattern++;
717 ptr++;
718 break;
720 case SRE_OP_NOT_LITERAL:
721 /* match anything that is not literal character */
722 /* <NOT_LITERAL> <code> */
723 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
724 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
725 return 0;
726 pattern++;
727 ptr++;
728 break;
730 case SRE_OP_ANY:
731 /* match anything (except a newline) */
732 /* <ANY> */
733 TRACE(("|%p|%p|ANY\n", pattern, ptr));
734 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
735 return 0;
736 ptr++;
737 break;
739 case SRE_OP_ANY_ALL:
740 /* match anything */
741 /* <ANY_ALL> */
742 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
743 if (ptr >= end)
744 return 0;
745 ptr++;
746 break;
748 case SRE_OP_IN:
749 /* match set member (or non_member) */
750 /* <IN> <skip> <set> */
751 TRACE(("|%p|%p|IN\n", pattern, ptr));
752 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
753 return 0;
754 pattern += pattern[0];
755 ptr++;
756 break;
758 case SRE_OP_GROUPREF:
759 /* match backreference */
760 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
761 i = pattern[0];
763 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
764 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
765 if (!p || !e || e < p)
766 return 0;
767 while (p < e) {
768 if (ptr >= end || *ptr != *p)
769 return 0;
770 p++; ptr++;
773 pattern++;
774 break;
776 case SRE_OP_GROUPREF_IGNORE:
777 /* match backreference */
778 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
779 i = pattern[0];
781 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
782 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
783 if (!p || !e || e < p)
784 return 0;
785 while (p < e) {
786 if (ptr >= end ||
787 state->lower(*ptr) != state->lower(*p))
788 return 0;
789 p++; ptr++;
792 pattern++;
793 break;
795 case SRE_OP_LITERAL_IGNORE:
796 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
797 if (ptr >= end ||
798 state->lower(*ptr) != state->lower(*pattern))
799 return 0;
800 pattern++;
801 ptr++;
802 break;
804 case SRE_OP_NOT_LITERAL_IGNORE:
805 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
806 if (ptr >= end ||
807 state->lower(*ptr) == state->lower(*pattern))
808 return 0;
809 pattern++;
810 ptr++;
811 break;
813 case SRE_OP_IN_IGNORE:
814 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
815 if (ptr >= end
816 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
817 return 0;
818 pattern += pattern[0];
819 ptr++;
820 break;
822 case SRE_OP_MARK:
823 /* set mark */
824 /* <MARK> <gid> */
825 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
826 i = pattern[0];
827 if (i & 1)
828 state->lastindex = i/2 + 1;
829 if (i > state->lastmark)
830 state->lastmark = i;
831 state->mark[i] = ptr;
832 pattern++;
833 break;
835 case SRE_OP_JUMP:
836 case SRE_OP_INFO:
837 /* jump forward */
838 /* <JUMP> <offset> */
839 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
840 pattern += pattern[0];
841 break;
843 case SRE_OP_ASSERT:
844 /* assert subpattern */
845 /* <ASSERT> <skip> <back> <pattern> */
846 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
847 state->ptr = ptr - pattern[1];
848 if (state->ptr < state->beginning)
849 return 0;
850 i = SRE_MATCH(state, pattern + 2, level + 1);
851 if (i <= 0)
852 return i;
853 pattern += pattern[0];
854 break;
856 case SRE_OP_ASSERT_NOT:
857 /* assert not subpattern */
858 /* <ASSERT_NOT> <skip> <back> <pattern> */
859 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
860 state->ptr = ptr - pattern[1];
861 if (state->ptr >= state->beginning) {
862 i = SRE_MATCH(state, pattern + 2, level + 1);
863 if (i < 0)
864 return i;
865 if (i)
866 return 0;
868 pattern += pattern[0];
869 break;
871 case SRE_OP_BRANCH:
872 /* alternation */
873 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
874 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
875 lastmark = state->lastmark;
876 for (; pattern[0]; pattern += pattern[0]) {
877 if (pattern[1] == SRE_OP_LITERAL &&
878 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
879 continue;
880 if (pattern[1] == SRE_OP_IN &&
881 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
882 continue;
883 state->ptr = ptr;
884 i = SRE_MATCH(state, pattern + 1, level + 1);
885 if (i)
886 return i;
887 if (state->lastmark > lastmark) {
888 memset(
889 state->mark + lastmark + 1, 0,
890 (state->lastmark - lastmark) * sizeof(void*)
892 state->lastmark = lastmark;
895 return 0;
897 case SRE_OP_REPEAT_ONE:
898 /* match repeated sequence (maximizing regexp) */
900 /* this operator only works if the repeated item is
901 exactly one character wide, and we're not already
902 collecting backtracking points. for other cases,
903 use the MAX_REPEAT operator */
905 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
907 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
908 pattern[1], pattern[2]));
910 if (ptr + pattern[1] > end)
911 return 0; /* cannot match */
913 state->ptr = ptr;
915 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
916 if (count < 0)
917 return count;
919 ptr += count;
921 /* when we arrive here, count contains the number of
922 matches, and ptr points to the tail of the target
923 string. check if the rest of the pattern matches,
924 and backtrack if not. */
926 if (count < (int) pattern[1])
927 return 0;
929 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
930 /* tail is empty. we're finished */
931 state->ptr = ptr;
932 return 1;
934 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
935 /* tail starts with a literal. skip positions where
936 the rest of the pattern cannot possibly match */
937 chr = pattern[pattern[0]+1];
938 for (;;) {
939 while (count >= (int) pattern[1] &&
940 (ptr >= end || *ptr != chr)) {
941 ptr--;
942 count--;
944 if (count < (int) pattern[1])
945 break;
946 state->ptr = ptr;
947 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
948 if (i)
949 return i;
950 ptr--;
951 count--;
954 } else {
955 /* general case */
956 lastmark = state->lastmark;
957 while (count >= (int) pattern[1]) {
958 state->ptr = ptr;
959 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
960 if (i)
961 return i;
962 ptr--;
963 count--;
964 if (state->lastmark > lastmark) {
965 memset(
966 state->mark + lastmark + 1, 0,
967 (state->lastmark - lastmark) * sizeof(void*)
969 state->lastmark = lastmark;
973 return 0;
975 case SRE_OP_REPEAT:
976 /* create repeat context. all the hard work is done
977 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
978 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
979 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
980 pattern[1], pattern[2]));
982 rep.count = -1;
983 rep.pattern = pattern;
985 /* install new repeat context */
986 rep.prev = state->repeat;
987 state->repeat = &rep;
989 state->ptr = ptr;
990 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
992 state->repeat = rep.prev;
994 return i;
996 case SRE_OP_MAX_UNTIL:
997 /* maximizing repeat */
998 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1000 /* FIXME: we probably need to deal with zero-width
1001 matches in here... */
1003 rp = state->repeat;
1004 if (!rp)
1005 return SRE_ERROR_STATE;
1007 state->ptr = ptr;
1009 count = rp->count + 1;
1011 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
1013 if (count < rp->pattern[1]) {
1014 /* not enough matches */
1015 rp->count = count;
1016 /* RECURSIVE */
1017 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1018 if (i)
1019 return i;
1020 rp->count = count - 1;
1021 state->ptr = ptr;
1022 return 0;
1025 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
1026 /* we may have enough matches, but if we can
1027 match another item, do so */
1028 rp->count = count;
1029 lastmark = state->lastmark;
1030 i = mark_save(state, 0, lastmark);
1031 if (i < 0)
1032 return i;
1033 /* RECURSIVE */
1034 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1035 if (i)
1036 return i;
1037 i = mark_restore(state, 0, lastmark);
1038 if (i < 0)
1039 return i;
1040 rp->count = count - 1;
1041 state->ptr = ptr;
1044 /* cannot match more repeated items here. make sure the
1045 tail matches */
1046 state->repeat = rp->prev;
1047 i = SRE_MATCH(state, pattern, level + 1);
1048 if (i)
1049 return i;
1050 state->repeat = rp;
1051 state->ptr = ptr;
1052 return 0;
1054 case SRE_OP_MIN_UNTIL:
1055 /* minimizing repeat */
1056 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1058 rp = state->repeat;
1059 if (!rp)
1060 return SRE_ERROR_STATE;
1062 count = rp->count + 1;
1064 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1065 rp->pattern));
1067 state->ptr = ptr;
1069 if (count < rp->pattern[1]) {
1070 /* not enough matches */
1071 rp->count = count;
1072 /* RECURSIVE */
1073 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1074 if (i)
1075 return i;
1076 rp->count = count-1;
1077 state->ptr = ptr;
1078 return 0;
1081 /* see if the tail matches */
1082 state->repeat = rp->prev;
1083 /* FIXME: the following fix doesn't always work (#133283) */
1084 if (0 && rp->pattern[2] == 65535) {
1085 /* unbounded repeat */
1086 for (;;) {
1087 i = SRE_MATCH(state, pattern, level + 1);
1088 if (i || ptr >= end)
1089 break;
1090 state->ptr = ++ptr;
1092 } else
1093 i = SRE_MATCH(state, pattern, level + 1);
1094 if (i) {
1095 /* free(rp); */
1096 return i;
1099 state->ptr = ptr;
1100 state->repeat = rp;
1102 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1103 return 0;
1105 rp->count = count;
1106 /* RECURSIVE */
1107 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1108 if (i)
1109 return i;
1110 rp->count = count - 1;
1111 state->ptr = ptr;
1112 return 0;
1114 default:
1115 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
1116 return SRE_ERROR_ILLEGAL;
1120 /* shouldn't end up here */
1121 return SRE_ERROR_ILLEGAL;
1124 LOCAL(int)
1125 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1127 SRE_CHAR* ptr = state->start;
1128 SRE_CHAR* end = state->end;
1129 int status = 0;
1130 int prefix_len = 0;
1131 int prefix_skip = 0;
1132 SRE_CODE* prefix = NULL;
1133 SRE_CODE* charset = NULL;
1134 SRE_CODE* overlap = NULL;
1135 int flags = 0;
1137 if (pattern[0] == SRE_OP_INFO) {
1138 /* optimization info block */
1139 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1141 flags = pattern[2];
1143 if (pattern[3] > 0) {
1144 /* adjust end point (but make sure we leave at least one
1145 character in there, so literal search will work) */
1146 end -= pattern[3]-1;
1147 if (end <= ptr)
1148 end = ptr+1;
1151 if (flags & SRE_INFO_PREFIX) {
1152 /* pattern starts with a known prefix */
1153 /* <length> <skip> <prefix data> <overlap data> */
1154 prefix_len = pattern[5];
1155 prefix_skip = pattern[6];
1156 prefix = pattern + 7;
1157 overlap = prefix + prefix_len - 1;
1158 } else if (flags & SRE_INFO_CHARSET)
1159 /* pattern starts with a character from a known set */
1160 /* <charset> */
1161 charset = pattern + 5;
1163 pattern += 1 + pattern[1];
1166 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1167 TRACE(("charset = %p\n", charset));
1169 #if defined(USE_FAST_SEARCH)
1170 if (prefix_len > 1) {
1171 /* pattern starts with a known prefix. use the overlap
1172 table to skip forward as fast as we possibly can */
1173 int i = 0;
1174 end = state->end;
1175 while (ptr < end) {
1176 for (;;) {
1177 if ((SRE_CODE) ptr[0] != prefix[i]) {
1178 if (!i)
1179 break;
1180 else
1181 i = overlap[i];
1182 } else {
1183 if (++i == prefix_len) {
1184 /* found a potential match */
1185 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1186 state->start = ptr + 1 - prefix_len;
1187 state->ptr = ptr + 1 - prefix_len + prefix_skip;
1188 if (flags & SRE_INFO_LITERAL)
1189 return 1; /* we got all of it */
1190 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
1191 if (status != 0)
1192 return status;
1193 /* close but no cigar -- try again */
1194 i = overlap[i];
1196 break;
1200 ptr++;
1202 return 0;
1204 #endif
1206 if (pattern[0] == SRE_OP_LITERAL) {
1207 /* pattern starts with a literal character. this is used
1208 for short prefixes, and if fast search is disabled */
1209 SRE_CODE chr = pattern[1];
1210 end = state->end;
1211 for (;;) {
1212 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1213 ptr++;
1214 if (ptr == end)
1215 return 0;
1216 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1217 state->start = ptr;
1218 state->ptr = ++ptr;
1219 status = SRE_MATCH(state, pattern + 2, 1);
1220 if (status != 0)
1221 break;
1223 } else if (charset) {
1224 /* pattern starts with a character from a known set */
1225 end = state->end;
1226 for (;;) {
1227 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1228 ptr++;
1229 if (ptr == end)
1230 return 0;
1231 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1232 state->start = ptr;
1233 state->ptr = ptr;
1234 status = SRE_MATCH(state, pattern, 1);
1235 if (status != 0)
1236 break;
1237 ptr++;
1239 } else
1240 /* general case */
1241 while (ptr <= end) {
1242 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1243 state->start = state->ptr = ptr++;
1244 status = SRE_MATCH(state, pattern, 1);
1245 if (status != 0)
1246 break;
1249 return status;
1253 #if !defined(SRE_RECURSIVE)
1255 /* -------------------------------------------------------------------- */
1256 /* factories and destructors */
1258 /* see sre.h for object declarations */
1260 staticforward PyTypeObject Pattern_Type;
1261 staticforward PyTypeObject Match_Type;
1262 staticforward PyTypeObject Scanner_Type;
1264 static PyObject *
1265 _compile(PyObject* self_, PyObject* args)
1267 /* "compile" pattern descriptor to pattern object */
1269 PatternObject* self;
1270 int i, n;
1272 PyObject* pattern;
1273 int flags = 0;
1274 PyObject* code;
1275 int groups = 0;
1276 PyObject* groupindex = NULL;
1277 PyObject* indexgroup = NULL;
1278 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1279 &PyList_Type, &code, &groups,
1280 &groupindex, &indexgroup))
1281 return NULL;
1283 n = PyList_GET_SIZE(code);
1285 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1286 if (!self)
1287 return NULL;
1289 for (i = 0; i < n; i++) {
1290 PyObject *o = PyList_GET_ITEM(code, i);
1291 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
1294 if (PyErr_Occurred()) {
1295 PyObject_DEL(self);
1296 return NULL;
1299 Py_INCREF(pattern);
1300 self->pattern = pattern;
1302 self->flags = flags;
1304 self->groups = groups;
1306 Py_XINCREF(groupindex);
1307 self->groupindex = groupindex;
1309 Py_XINCREF(indexgroup);
1310 self->indexgroup = indexgroup;
1312 return (PyObject*) self;
1315 static PyObject *
1316 sre_codesize(PyObject* self, PyObject* args)
1318 return Py_BuildValue("i", sizeof(SRE_CODE));
1321 static PyObject *
1322 sre_getlower(PyObject* self, PyObject* args)
1324 int character, flags;
1325 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1326 return NULL;
1327 if (flags & SRE_FLAG_LOCALE)
1328 return Py_BuildValue("i", sre_lower_locale(character));
1329 if (flags & SRE_FLAG_UNICODE)
1330 #if defined(HAVE_UNICODE)
1331 return Py_BuildValue("i", sre_lower_unicode(character));
1332 #else
1333 return Py_BuildValue("i", sre_lower_locale(character));
1334 #endif
1335 return Py_BuildValue("i", sre_lower(character));
1338 LOCAL(void)
1339 state_reset(SRE_STATE* state)
1341 int i;
1343 state->lastmark = 0;
1345 /* FIXME: dynamic! */
1346 for (i = 0; i < SRE_MARK_SIZE; i++)
1347 state->mark[i] = NULL;
1349 state->lastindex = -1;
1351 state->repeat = NULL;
1353 mark_fini(state);
1356 LOCAL(PyObject*)
1357 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1358 int start, int end)
1360 /* prepare state object */
1362 PyBufferProcs *buffer;
1363 int size, bytes;
1364 void* ptr;
1366 memset(state, 0, sizeof(SRE_STATE));
1368 state->lastindex = -1;
1370 #if defined(HAVE_UNICODE)
1371 if (PyUnicode_Check(string)) {
1372 /* unicode strings doesn't always support the buffer interface */
1373 ptr = (void*) PyUnicode_AS_DATA(string);
1374 bytes = PyUnicode_GET_DATA_SIZE(string);
1375 size = PyUnicode_GET_SIZE(string);
1376 state->charsize = sizeof(Py_UNICODE);
1378 } else {
1379 #endif
1381 /* get pointer to string buffer */
1382 buffer = string->ob_type->tp_as_buffer;
1383 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1384 buffer->bf_getsegcount(string, NULL) != 1) {
1385 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1386 return NULL;
1389 /* determine buffer size */
1390 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1391 if (bytes < 0) {
1392 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1393 return NULL;
1396 /* determine character size */
1397 #if PY_VERSION_HEX >= 0x01060000
1398 size = PyObject_Size(string);
1399 #else
1400 size = PyObject_Length(string);
1401 #endif
1403 if (PyString_Check(string) || bytes == size)
1404 state->charsize = 1;
1405 #if defined(HAVE_UNICODE)
1406 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1407 state->charsize = sizeof(Py_UNICODE);
1408 #endif
1409 else {
1410 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1411 return NULL;
1414 #if defined(HAVE_UNICODE)
1416 #endif
1418 /* adjust boundaries */
1419 if (start < 0)
1420 start = 0;
1421 else if (start > size)
1422 start = size;
1424 if (end < 0)
1425 end = 0;
1426 else if (end > size)
1427 end = size;
1429 state->beginning = ptr;
1431 state->start = (void*) ((char*) ptr + start * state->charsize);
1432 state->end = (void*) ((char*) ptr + end * state->charsize);
1434 Py_INCREF(string);
1435 state->string = string;
1436 state->pos = start;
1437 state->endpos = end;
1439 if (pattern->flags & SRE_FLAG_LOCALE)
1440 state->lower = sre_lower_locale;
1441 else if (pattern->flags & SRE_FLAG_UNICODE)
1442 #if defined(HAVE_UNICODE)
1443 state->lower = sre_lower_unicode;
1444 #else
1445 state->lower = sre_lower_locale;
1446 #endif
1447 else
1448 state->lower = sre_lower;
1450 return string;
1453 LOCAL(void)
1454 state_fini(SRE_STATE* state)
1456 Py_XDECREF(state->string);
1457 mark_fini(state);
1460 LOCAL(PyObject*)
1461 state_getslice(SRE_STATE* state, int index, PyObject* string)
1463 int i, j;
1465 index = (index - 1) * 2;
1467 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1468 i = j = 0;
1469 } else {
1470 i = ((char*)state->mark[index] - (char*)state->beginning) /
1471 state->charsize;
1472 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1473 state->charsize;
1476 return PySequence_GetSlice(string, i, j);
1479 static void
1480 pattern_error(int status)
1482 switch (status) {
1483 case SRE_ERROR_RECURSION_LIMIT:
1484 PyErr_SetString(
1485 PyExc_RuntimeError,
1486 "maximum recursion limit exceeded"
1488 break;
1489 case SRE_ERROR_MEMORY:
1490 PyErr_NoMemory();
1491 break;
1492 default:
1493 /* other error codes indicate compiler/engine bugs */
1494 PyErr_SetString(
1495 PyExc_RuntimeError,
1496 "internal error in regular expression engine"
1501 static PyObject*
1502 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
1504 /* create match object (from state object) */
1506 MatchObject* match;
1507 int i, j;
1508 char* base;
1509 int n;
1511 if (status > 0) {
1513 /* create match object (with room for extra group marks) */
1514 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
1515 2*(pattern->groups+1));
1516 if (!match)
1517 return NULL;
1519 Py_INCREF(pattern);
1520 match->pattern = pattern;
1522 Py_INCREF(state->string);
1523 match->string = state->string;
1525 match->regs = NULL;
1526 match->groups = pattern->groups+1;
1528 /* fill in group slices */
1530 base = (char*) state->beginning;
1531 n = state->charsize;
1533 match->mark[0] = ((char*) state->start - base) / n;
1534 match->mark[1] = ((char*) state->ptr - base) / n;
1536 for (i = j = 0; i < pattern->groups; i++, j+=2)
1537 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1538 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1539 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1540 } else
1541 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1543 match->pos = state->pos;
1544 match->endpos = state->endpos;
1546 match->lastindex = state->lastindex;
1548 return (PyObject*) match;
1550 } else if (status == 0) {
1552 /* no match */
1553 Py_INCREF(Py_None);
1554 return Py_None;
1558 /* internal error */
1559 pattern_error(status);
1560 return NULL;
1563 static PyObject*
1564 pattern_scanner(PatternObject* pattern, PyObject* args)
1566 /* create search state object */
1568 ScannerObject* self;
1570 PyObject* string;
1571 int start = 0;
1572 int end = INT_MAX;
1573 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1574 return NULL;
1576 /* create scanner object */
1577 self = PyObject_NEW(ScannerObject, &Scanner_Type);
1578 if (!self)
1579 return NULL;
1581 string = state_init(&self->state, pattern, string, start, end);
1582 if (!string) {
1583 PyObject_DEL(self);
1584 return NULL;
1587 Py_INCREF(pattern);
1588 self->pattern = (PyObject*) pattern;
1590 return (PyObject*) self;
1593 static void
1594 pattern_dealloc(PatternObject* self)
1596 Py_XDECREF(self->pattern);
1597 Py_XDECREF(self->groupindex);
1598 Py_XDECREF(self->indexgroup);
1599 PyObject_DEL(self);
1602 static PyObject*
1603 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1605 SRE_STATE state;
1606 int status;
1608 PyObject* string;
1609 int start = 0;
1610 int end = INT_MAX;
1611 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1612 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1613 &string, &start, &end))
1614 return NULL;
1616 string = state_init(&state, self, string, start, end);
1617 if (!string)
1618 return NULL;
1620 state.ptr = state.start;
1622 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1624 if (state.charsize == 1) {
1625 status = sre_match(&state, PatternObject_GetCode(self), 1);
1626 } else {
1627 #if defined(HAVE_UNICODE)
1628 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
1629 #endif
1632 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1634 state_fini(&state);
1636 return pattern_new_match(self, &state, status);
1639 static PyObject*
1640 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1642 SRE_STATE state;
1643 int status;
1645 PyObject* string;
1646 int start = 0;
1647 int end = INT_MAX;
1648 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1649 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1650 &string, &start, &end))
1651 return NULL;
1653 string = state_init(&state, self, string, start, end);
1654 if (!string)
1655 return NULL;
1657 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1659 if (state.charsize == 1) {
1660 status = sre_search(&state, PatternObject_GetCode(self));
1661 } else {
1662 #if defined(HAVE_UNICODE)
1663 status = sre_usearch(&state, PatternObject_GetCode(self));
1664 #endif
1667 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1669 state_fini(&state);
1671 return pattern_new_match(self, &state, status);
1674 static PyObject*
1675 call(char* function, PyObject* args)
1677 PyObject* name;
1678 PyObject* module;
1679 PyObject* func;
1680 PyObject* result;
1682 name = PyString_FromString(SRE_MODULE);
1683 if (!name)
1684 return NULL;
1685 module = PyImport_Import(name);
1686 Py_DECREF(name);
1687 if (!module)
1688 return NULL;
1689 func = PyObject_GetAttrString(module, function);
1690 Py_DECREF(module);
1691 if (!func)
1692 return NULL;
1693 result = PyObject_CallObject(func, args);
1694 Py_DECREF(func);
1695 Py_DECREF(args);
1696 return result;
1699 static PyObject*
1700 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1702 PyObject* template;
1703 PyObject* string;
1704 PyObject* count = Py_False; /* zero */
1705 static char* kwlist[] = { "repl", "string", "count", NULL };
1706 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1707 &template, &string, &count))
1708 return NULL;
1710 /* delegate to Python code */
1711 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1714 static PyObject*
1715 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1717 PyObject* template;
1718 PyObject* string;
1719 PyObject* count = Py_False; /* zero */
1720 static char* kwlist[] = { "repl", "string", "count", NULL };
1721 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1722 &template, &string, &count))
1723 return NULL;
1725 /* delegate to Python code */
1726 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1729 static PyObject*
1730 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1732 PyObject* string;
1733 PyObject* maxsplit = Py_False; /* zero */
1734 static char* kwlist[] = { "source", "maxsplit", NULL };
1735 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1736 &string, &maxsplit))
1737 return NULL;
1739 /* delegate to Python code */
1740 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1743 static PyObject*
1744 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
1746 SRE_STATE state;
1747 PyObject* list;
1748 int status;
1749 int i;
1751 PyObject* string;
1752 int start = 0;
1753 int end = INT_MAX;
1754 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1755 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1756 &string, &start, &end))
1757 return NULL;
1759 string = state_init(&state, self, string, start, end);
1760 if (!string)
1761 return NULL;
1763 list = PyList_New(0);
1765 while (state.start <= state.end) {
1767 PyObject* item;
1769 state_reset(&state);
1771 state.ptr = state.start;
1773 if (state.charsize == 1) {
1774 status = sre_search(&state, PatternObject_GetCode(self));
1775 } else {
1776 #if defined(HAVE_UNICODE)
1777 status = sre_usearch(&state, PatternObject_GetCode(self));
1778 #endif
1781 if (status > 0) {
1783 /* don't bother to build a match object */
1784 switch (self->groups) {
1785 case 0:
1786 item = PySequence_GetSlice(
1787 string,
1788 ((char*) state.start - (char*) state.beginning) /
1789 state.charsize,
1790 ((char*) state.ptr - (char*) state.beginning) /
1791 state.charsize);
1792 if (!item)
1793 goto error;
1794 break;
1795 case 1:
1796 item = state_getslice(&state, 1, string);
1797 if (!item)
1798 goto error;
1799 break;
1800 default:
1801 item = PyTuple_New(self->groups);
1802 if (!item)
1803 goto error;
1804 for (i = 0; i < self->groups; i++) {
1805 PyObject* o = state_getslice(&state, i+1, string);
1806 if (!o) {
1807 Py_DECREF(item);
1808 goto error;
1810 PyTuple_SET_ITEM(item, i, o);
1812 break;
1815 status = PyList_Append(list, item);
1816 Py_DECREF(item);
1818 if (status < 0)
1819 goto error;
1821 if (state.ptr == state.start)
1822 state.start = (void*) ((char*) state.ptr + state.charsize);
1823 else
1824 state.start = state.ptr;
1826 } else {
1828 if (status == 0)
1829 break;
1831 pattern_error(status);
1832 goto error;
1837 state_fini(&state);
1838 return list;
1840 error:
1841 Py_DECREF(list);
1842 state_fini(&state);
1843 return NULL;
1847 static PyMethodDef pattern_methods[] = {
1848 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1849 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1850 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1851 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1852 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1853 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
1854 /* experimental */
1855 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
1856 {NULL, NULL}
1859 static PyObject*
1860 pattern_getattr(PatternObject* self, char* name)
1862 PyObject* res;
1864 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
1866 if (res)
1867 return res;
1869 PyErr_Clear();
1871 /* attributes */
1872 if (!strcmp(name, "pattern")) {
1873 Py_INCREF(self->pattern);
1874 return self->pattern;
1877 if (!strcmp(name, "flags"))
1878 return Py_BuildValue("i", self->flags);
1880 if (!strcmp(name, "groups"))
1881 return Py_BuildValue("i", self->groups);
1883 if (!strcmp(name, "groupindex") && self->groupindex) {
1884 Py_INCREF(self->groupindex);
1885 return self->groupindex;
1888 PyErr_SetString(PyExc_AttributeError, name);
1889 return NULL;
1892 statichere PyTypeObject Pattern_Type = {
1893 PyObject_HEAD_INIT(NULL)
1894 0, "SRE_Pattern",
1895 sizeof(PatternObject), sizeof(SRE_CODE),
1896 (destructor)pattern_dealloc, /*tp_dealloc*/
1897 0, /*tp_print*/
1898 (getattrfunc)pattern_getattr /*tp_getattr*/
1901 /* -------------------------------------------------------------------- */
1902 /* match methods */
1904 static void
1905 match_dealloc(MatchObject* self)
1907 Py_XDECREF(self->regs);
1908 Py_XDECREF(self->string);
1909 Py_DECREF(self->pattern);
1910 PyObject_DEL(self);
1913 static PyObject*
1914 match_getslice_by_index(MatchObject* self, int index, PyObject* def)
1916 if (index < 0 || index >= self->groups) {
1917 /* raise IndexError if we were given a bad group number */
1918 PyErr_SetString(
1919 PyExc_IndexError,
1920 "no such group"
1922 return NULL;
1925 index *= 2;
1927 if (self->string == Py_None || self->mark[index] < 0) {
1928 /* return default value if the string or group is undefined */
1929 Py_INCREF(def);
1930 return def;
1933 return PySequence_GetSlice(
1934 self->string, self->mark[index], self->mark[index+1]
1938 static int
1939 match_getindex(MatchObject* self, PyObject* index)
1941 int i;
1943 if (PyInt_Check(index))
1944 return (int) PyInt_AS_LONG(index);
1946 i = -1;
1948 if (self->pattern->groupindex) {
1949 index = PyObject_GetItem(self->pattern->groupindex, index);
1950 if (index) {
1951 if (PyInt_Check(index))
1952 i = (int) PyInt_AS_LONG(index);
1953 Py_DECREF(index);
1954 } else
1955 PyErr_Clear();
1958 return i;
1961 static PyObject*
1962 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1964 return match_getslice_by_index(self, match_getindex(self, index), def);
1967 static PyObject*
1968 match_expand(MatchObject* self, PyObject* args)
1970 PyObject* template;
1971 if (!PyArg_ParseTuple(args, "O:expand", &template))
1972 return NULL;
1974 /* delegate to Python code */
1975 return call(
1976 "_expand",
1977 Py_BuildValue("OOO", self->pattern, self, template)
1981 static PyObject*
1982 match_group(MatchObject* self, PyObject* args)
1984 PyObject* result;
1985 int i, size;
1987 size = PyTuple_GET_SIZE(args);
1989 switch (size) {
1990 case 0:
1991 result = match_getslice(self, Py_False, Py_None);
1992 break;
1993 case 1:
1994 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1995 break;
1996 default:
1997 /* fetch multiple items */
1998 result = PyTuple_New(size);
1999 if (!result)
2000 return NULL;
2001 for (i = 0; i < size; i++) {
2002 PyObject* item = match_getslice(
2003 self, PyTuple_GET_ITEM(args, i), Py_None
2005 if (!item) {
2006 Py_DECREF(result);
2007 return NULL;
2009 PyTuple_SET_ITEM(result, i, item);
2011 break;
2013 return result;
2016 static PyObject*
2017 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
2019 PyObject* result;
2020 int index;
2022 PyObject* def = Py_None;
2023 static char* kwlist[] = { "default", NULL };
2024 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
2025 return NULL;
2027 result = PyTuple_New(self->groups-1);
2028 if (!result)
2029 return NULL;
2031 for (index = 1; index < self->groups; index++) {
2032 PyObject* item;
2033 item = match_getslice_by_index(self, index, def);
2034 if (!item) {
2035 Py_DECREF(result);
2036 return NULL;
2038 PyTuple_SET_ITEM(result, index-1, item);
2041 return result;
2044 static PyObject*
2045 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
2047 PyObject* result;
2048 PyObject* keys;
2049 int index;
2051 PyObject* def = Py_None;
2052 static char* kwlist[] = { "default", NULL };
2053 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
2054 return NULL;
2056 result = PyDict_New();
2057 if (!result || !self->pattern->groupindex)
2058 return result;
2060 keys = PyMapping_Keys(self->pattern->groupindex);
2061 if (!keys)
2062 goto failed;
2064 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
2065 int status;
2066 PyObject* key;
2067 PyObject* value;
2068 key = PyList_GET_ITEM(keys, index);
2069 if (!key)
2070 goto failed;
2071 value = match_getslice(self, key, def);
2072 if (!value) {
2073 Py_DECREF(key);
2074 goto failed;
2076 status = PyDict_SetItem(result, key, value);
2077 Py_DECREF(value);
2078 if (status < 0)
2079 goto failed;
2082 Py_DECREF(keys);
2084 return result;
2086 failed:
2087 Py_DECREF(keys);
2088 Py_DECREF(result);
2089 return NULL;
2092 static PyObject*
2093 match_start(MatchObject* self, PyObject* args)
2095 int index;
2097 PyObject* index_ = Py_False; /* zero */
2098 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2099 return NULL;
2101 index = match_getindex(self, index_);
2103 if (index < 0 || index >= self->groups) {
2104 PyErr_SetString(
2105 PyExc_IndexError,
2106 "no such group"
2108 return NULL;
2111 /* mark is -1 if group is undefined */
2112 return Py_BuildValue("i", self->mark[index*2]);
2115 static PyObject*
2116 match_end(MatchObject* self, PyObject* args)
2118 int index;
2120 PyObject* index_ = Py_False; /* zero */
2121 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2122 return NULL;
2124 index = match_getindex(self, index_);
2126 if (index < 0 || index >= self->groups) {
2127 PyErr_SetString(
2128 PyExc_IndexError,
2129 "no such group"
2131 return NULL;
2134 /* mark is -1 if group is undefined */
2135 return Py_BuildValue("i", self->mark[index*2+1]);
2138 LOCAL(PyObject*)
2139 _pair(int i1, int i2)
2141 PyObject* pair;
2142 PyObject* item;
2144 pair = PyTuple_New(2);
2145 if (!pair)
2146 return NULL;
2148 item = PyInt_FromLong(i1);
2149 if (!item)
2150 goto error;
2151 PyTuple_SET_ITEM(pair, 0, item);
2153 item = PyInt_FromLong(i2);
2154 if (!item)
2155 goto error;
2156 PyTuple_SET_ITEM(pair, 1, item);
2158 return pair;
2160 error:
2161 Py_DECREF(pair);
2162 return NULL;
2165 static PyObject*
2166 match_span(MatchObject* self, PyObject* args)
2168 int index;
2170 PyObject* index_ = Py_False; /* zero */
2171 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2172 return NULL;
2174 index = match_getindex(self, index_);
2176 if (index < 0 || index >= self->groups) {
2177 PyErr_SetString(
2178 PyExc_IndexError,
2179 "no such group"
2181 return NULL;
2184 /* marks are -1 if group is undefined */
2185 return _pair(self->mark[index*2], self->mark[index*2+1]);
2188 static PyObject*
2189 match_regs(MatchObject* self)
2191 PyObject* regs;
2192 PyObject* item;
2193 int index;
2195 regs = PyTuple_New(self->groups);
2196 if (!regs)
2197 return NULL;
2199 for (index = 0; index < self->groups; index++) {
2200 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2201 if (!item) {
2202 Py_DECREF(regs);
2203 return NULL;
2205 PyTuple_SET_ITEM(regs, index, item);
2208 Py_INCREF(regs);
2209 self->regs = regs;
2211 return regs;
2214 static PyMethodDef match_methods[] = {
2215 {"group", (PyCFunction) match_group, METH_VARARGS},
2216 {"start", (PyCFunction) match_start, METH_VARARGS},
2217 {"end", (PyCFunction) match_end, METH_VARARGS},
2218 {"span", (PyCFunction) match_span, METH_VARARGS},
2219 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2220 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2221 {"expand", (PyCFunction) match_expand, METH_VARARGS},
2222 {NULL, NULL}
2225 static PyObject*
2226 match_getattr(MatchObject* self, char* name)
2228 PyObject* res;
2230 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2231 if (res)
2232 return res;
2234 PyErr_Clear();
2236 if (!strcmp(name, "lastindex")) {
2237 if (self->lastindex >= 0)
2238 return Py_BuildValue("i", self->lastindex);
2239 Py_INCREF(Py_None);
2240 return Py_None;
2243 if (!strcmp(name, "lastgroup")) {
2244 if (self->pattern->indexgroup && self->lastindex >= 0) {
2245 PyObject* result = PySequence_GetItem(
2246 self->pattern->indexgroup, self->lastindex
2248 if (result)
2249 return result;
2250 PyErr_Clear();
2252 Py_INCREF(Py_None);
2253 return Py_None;
2256 if (!strcmp(name, "string")) {
2257 if (self->string) {
2258 Py_INCREF(self->string);
2259 return self->string;
2260 } else {
2261 Py_INCREF(Py_None);
2262 return Py_None;
2266 if (!strcmp(name, "regs")) {
2267 if (self->regs) {
2268 Py_INCREF(self->regs);
2269 return self->regs;
2270 } else
2271 return match_regs(self);
2274 if (!strcmp(name, "re")) {
2275 Py_INCREF(self->pattern);
2276 return (PyObject*) self->pattern;
2279 if (!strcmp(name, "pos"))
2280 return Py_BuildValue("i", self->pos);
2282 if (!strcmp(name, "endpos"))
2283 return Py_BuildValue("i", self->endpos);
2285 PyErr_SetString(PyExc_AttributeError, name);
2286 return NULL;
2289 /* FIXME: implement setattr("string", None) as a special case (to
2290 detach the associated string, if any */
2292 statichere PyTypeObject Match_Type = {
2293 PyObject_HEAD_INIT(NULL)
2294 0, "SRE_Match",
2295 sizeof(MatchObject), sizeof(int),
2296 (destructor)match_dealloc, /*tp_dealloc*/
2297 0, /*tp_print*/
2298 (getattrfunc)match_getattr /*tp_getattr*/
2301 /* -------------------------------------------------------------------- */
2302 /* scanner methods (experimental) */
2304 static void
2305 scanner_dealloc(ScannerObject* self)
2307 state_fini(&self->state);
2308 Py_DECREF(self->pattern);
2309 PyObject_DEL(self);
2312 static PyObject*
2313 scanner_match(ScannerObject* self, PyObject* args)
2315 SRE_STATE* state = &self->state;
2316 PyObject* match;
2317 int status;
2319 state_reset(state);
2321 state->ptr = state->start;
2323 if (state->charsize == 1) {
2324 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
2325 } else {
2326 #if defined(HAVE_UNICODE)
2327 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
2328 #endif
2331 match = pattern_new_match((PatternObject*) self->pattern,
2332 state, status);
2334 if (status == 0 || state->ptr == state->start)
2335 state->start = (void*) ((char*) state->ptr + state->charsize);
2336 else
2337 state->start = state->ptr;
2339 return match;
2343 static PyObject*
2344 scanner_search(ScannerObject* self, PyObject* args)
2346 SRE_STATE* state = &self->state;
2347 PyObject* match;
2348 int status;
2350 state_reset(state);
2352 state->ptr = state->start;
2354 if (state->charsize == 1) {
2355 status = sre_search(state, PatternObject_GetCode(self->pattern));
2356 } else {
2357 #if defined(HAVE_UNICODE)
2358 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
2359 #endif
2362 match = pattern_new_match((PatternObject*) self->pattern,
2363 state, status);
2365 if (status == 0 || state->ptr == state->start)
2366 state->start = (void*) ((char*) state->ptr + state->charsize);
2367 else
2368 state->start = state->ptr;
2370 return match;
2373 static PyMethodDef scanner_methods[] = {
2374 {"match", (PyCFunction) scanner_match, 0},
2375 {"search", (PyCFunction) scanner_search, 0},
2376 {NULL, NULL}
2379 static PyObject*
2380 scanner_getattr(ScannerObject* self, char* name)
2382 PyObject* res;
2384 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2385 if (res)
2386 return res;
2388 PyErr_Clear();
2390 /* attributes */
2391 if (!strcmp(name, "pattern")) {
2392 Py_INCREF(self->pattern);
2393 return self->pattern;
2396 PyErr_SetString(PyExc_AttributeError, name);
2397 return NULL;
2400 statichere PyTypeObject Scanner_Type = {
2401 PyObject_HEAD_INIT(NULL)
2402 0, "SRE_Scanner",
2403 sizeof(ScannerObject), 0,
2404 (destructor)scanner_dealloc, /*tp_dealloc*/
2405 0, /*tp_print*/
2406 (getattrfunc)scanner_getattr, /*tp_getattr*/
2409 static PyMethodDef _functions[] = {
2410 {"compile", _compile, 1},
2411 {"getcodesize", sre_codesize, 1},
2412 {"getlower", sre_getlower, 1},
2413 {NULL, NULL}
2416 DL_EXPORT(void)
2417 init_sre(void)
2419 PyObject* m;
2420 PyObject* d;
2422 /* Patch object types */
2423 Pattern_Type.ob_type = Match_Type.ob_type =
2424 Scanner_Type.ob_type = &PyType_Type;
2426 m = Py_InitModule("_" SRE_MODULE, _functions);
2427 d = PyModule_GetDict(m);
2429 PyDict_SetItemString(
2430 d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
2433 PyDict_SetItemString(
2434 d, "copyright", (PyObject*) PyString_FromString(copyright)
2439 #endif /* !defined(SRE_RECURSIVE) */