Maintain backwards compatibility with python < 2.3 by dynamically
[python/dscho.git] / Modules / _sre.c
bloba8a97748b197372a18d4222f2c25cedc2ec5dca9
1 /*
2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
25 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
27 * This version of the SRE library can be redistributed under CNRI's
28 * Python 1.6 license. For any other use, please contact Secret Labs
29 * AB (info@pythonware.com).
31 * Portions of this engine have been developed in cooperation with
32 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
33 * other compatibility work.
36 #ifndef SRE_RECURSIVE
38 static char copyright[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
41 #include "Python.h"
42 #include "structmember.h" /* offsetof */
44 #include "sre.h"
46 #include <ctype.h>
48 /* name of this module, minus the leading underscore */
49 #if !defined(SRE_MODULE)
50 #define SRE_MODULE "sre"
51 #endif
53 /* defining this one enables tracing */
54 #undef VERBOSE
56 #if PY_VERSION_HEX >= 0x01060000
57 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
58 /* defining this enables unicode support (default under 1.6a1 and later) */
59 #define HAVE_UNICODE
60 #endif
61 #endif
63 /* -------------------------------------------------------------------- */
64 /* optional features */
66 /* prevent run-away recursion (bad patterns on long strings) */
68 #if !defined(USE_STACKCHECK)
69 #if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
70 /* require smaller recursion limit for a number of 64-bit platforms:
71 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
72 /* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
73 #define USE_RECURSION_LIMIT 7500
74 #else
76 #if defined(__GNUC__) && defined(WITH_THREAD) && defined(__FreeBSD__)
77 /* the pthreads library on FreeBSD has a fixed 1MB stack size for the
78 * initial (or "primary") thread, which is insufficient for the default
79 * recursion limit. gcc 3.x at the default optimisation
80 * level (-O3) uses stack space more aggressively than gcc 2.95.
82 #if (__GNUC__ > 2)
83 #define USE_RECURSION_LIMIT 6500
84 #else
85 #define USE_RECURSION_LIMIT 7500
86 #endif
88 #else
89 #define USE_RECURSION_LIMIT 10000
90 #endif
91 #endif
92 #endif
94 /* enables fast searching */
95 #define USE_FAST_SEARCH
97 /* enables aggressive inlining (always on for Visual C) */
98 #undef USE_INLINE
100 /* enables copy/deepcopy handling (work in progress) */
101 #undef USE_BUILTIN_COPY
103 #if PY_VERSION_HEX < 0x01060000
104 #define PyObject_DEL(op) PyMem_DEL((op))
105 #endif
107 /* -------------------------------------------------------------------- */
109 #if defined(_MSC_VER)
110 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
111 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
112 /* fastest possible local call under MSVC */
113 #define LOCAL(type) static __inline type __fastcall
114 #elif defined(USE_INLINE)
115 #define LOCAL(type) static inline type
116 #else
117 #define LOCAL(type) static type
118 #endif
120 /* error codes */
121 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
122 #define SRE_ERROR_STATE -2 /* illegal state */
123 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
124 #define SRE_ERROR_MEMORY -9 /* out of memory */
126 #if defined(VERBOSE)
127 #define TRACE(v) printf v
128 #else
129 #define TRACE(v)
130 #endif
132 /* -------------------------------------------------------------------- */
133 /* search engine state */
135 /* default character predicates (run sre_chars.py to regenerate tables) */
137 #define SRE_DIGIT_MASK 1
138 #define SRE_SPACE_MASK 2
139 #define SRE_LINEBREAK_MASK 4
140 #define SRE_ALNUM_MASK 8
141 #define SRE_WORD_MASK 16
143 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
145 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
146 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
147 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
148 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
149 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
150 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
151 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
153 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
154 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
155 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
156 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
157 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
158 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
159 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
160 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
161 120, 121, 122, 123, 124, 125, 126, 127 };
163 #define SRE_IS_DIGIT(ch)\
164 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
165 #define SRE_IS_SPACE(ch)\
166 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
167 #define SRE_IS_LINEBREAK(ch)\
168 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
169 #define SRE_IS_ALNUM(ch)\
170 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
171 #define SRE_IS_WORD(ch)\
172 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
174 static unsigned int sre_lower(unsigned int ch)
176 return ((ch) < 128 ? sre_char_lower[ch] : ch);
179 /* locale-specific character predicates */
181 #define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
182 #define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
183 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
184 #define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
185 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
187 static unsigned int sre_lower_locale(unsigned int ch)
189 return ((ch) < 256 ? tolower((ch)) : ch);
192 /* unicode-specific character predicates */
194 #if defined(HAVE_UNICODE)
196 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
197 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
198 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
199 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
200 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
202 static unsigned int sre_lower_unicode(unsigned int ch)
204 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
207 #endif
209 LOCAL(int)
210 sre_category(SRE_CODE category, unsigned int ch)
212 switch (category) {
214 case SRE_CATEGORY_DIGIT:
215 return SRE_IS_DIGIT(ch);
216 case SRE_CATEGORY_NOT_DIGIT:
217 return !SRE_IS_DIGIT(ch);
218 case SRE_CATEGORY_SPACE:
219 return SRE_IS_SPACE(ch);
220 case SRE_CATEGORY_NOT_SPACE:
221 return !SRE_IS_SPACE(ch);
222 case SRE_CATEGORY_WORD:
223 return SRE_IS_WORD(ch);
224 case SRE_CATEGORY_NOT_WORD:
225 return !SRE_IS_WORD(ch);
226 case SRE_CATEGORY_LINEBREAK:
227 return SRE_IS_LINEBREAK(ch);
228 case SRE_CATEGORY_NOT_LINEBREAK:
229 return !SRE_IS_LINEBREAK(ch);
231 case SRE_CATEGORY_LOC_WORD:
232 return SRE_LOC_IS_WORD(ch);
233 case SRE_CATEGORY_LOC_NOT_WORD:
234 return !SRE_LOC_IS_WORD(ch);
236 #if defined(HAVE_UNICODE)
237 case SRE_CATEGORY_UNI_DIGIT:
238 return SRE_UNI_IS_DIGIT(ch);
239 case SRE_CATEGORY_UNI_NOT_DIGIT:
240 return !SRE_UNI_IS_DIGIT(ch);
241 case SRE_CATEGORY_UNI_SPACE:
242 return SRE_UNI_IS_SPACE(ch);
243 case SRE_CATEGORY_UNI_NOT_SPACE:
244 return !SRE_UNI_IS_SPACE(ch);
245 case SRE_CATEGORY_UNI_WORD:
246 return SRE_UNI_IS_WORD(ch);
247 case SRE_CATEGORY_UNI_NOT_WORD:
248 return !SRE_UNI_IS_WORD(ch);
249 case SRE_CATEGORY_UNI_LINEBREAK:
250 return SRE_UNI_IS_LINEBREAK(ch);
251 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
252 return !SRE_UNI_IS_LINEBREAK(ch);
253 #else
254 case SRE_CATEGORY_UNI_DIGIT:
255 return SRE_IS_DIGIT(ch);
256 case SRE_CATEGORY_UNI_NOT_DIGIT:
257 return !SRE_IS_DIGIT(ch);
258 case SRE_CATEGORY_UNI_SPACE:
259 return SRE_IS_SPACE(ch);
260 case SRE_CATEGORY_UNI_NOT_SPACE:
261 return !SRE_IS_SPACE(ch);
262 case SRE_CATEGORY_UNI_WORD:
263 return SRE_LOC_IS_WORD(ch);
264 case SRE_CATEGORY_UNI_NOT_WORD:
265 return !SRE_LOC_IS_WORD(ch);
266 case SRE_CATEGORY_UNI_LINEBREAK:
267 return SRE_IS_LINEBREAK(ch);
268 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
269 return !SRE_IS_LINEBREAK(ch);
270 #endif
272 return 0;
275 /* helpers */
277 static void
278 mark_fini(SRE_STATE* state)
280 if (state->mark_stack) {
281 free(state->mark_stack);
282 state->mark_stack = NULL;
284 state->mark_stack_size = state->mark_stack_base = 0;
287 static int
288 mark_save(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
290 void* stack;
291 int size;
292 int minsize, newsize;
294 if (hi <= lo)
295 return 0;
297 size = (hi - lo) + 1;
299 newsize = state->mark_stack_size;
300 minsize = state->mark_stack_base + size;
302 if (newsize < minsize) {
303 /* create new stack */
304 if (!newsize) {
305 newsize = 512;
306 if (newsize < minsize)
307 newsize = minsize;
308 TRACE(("allocate stack %d\n", newsize));
309 stack = malloc(sizeof(void*) * newsize);
310 } else {
311 /* grow the stack */
312 while (newsize < minsize)
313 newsize += newsize;
314 TRACE(("grow stack to %d\n", newsize));
315 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
317 if (!stack) {
318 mark_fini(state);
319 return SRE_ERROR_MEMORY;
321 state->mark_stack = stack;
322 state->mark_stack_size = newsize;
325 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
327 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
328 size * sizeof(void*));
330 state->mark_stack_base += size;
332 *mark_stack_base = state->mark_stack_base;
334 return 0;
337 static int
338 mark_restore(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
340 int size;
342 if (hi <= lo)
343 return 0;
345 size = (hi - lo) + 1;
347 state->mark_stack_base = *mark_stack_base - size;
349 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
351 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
352 size * sizeof(void*));
354 return 0;
357 /* generate 8-bit version */
359 #define SRE_CHAR unsigned char
360 #define SRE_AT sre_at
361 #define SRE_COUNT sre_count
362 #define SRE_CHARSET sre_charset
363 #define SRE_INFO sre_info
364 #define SRE_MATCH sre_match
365 #define SRE_SEARCH sre_search
366 #define SRE_LITERAL_TEMPLATE sre_literal_template
368 #if defined(HAVE_UNICODE)
370 #define SRE_RECURSIVE
371 #include "_sre.c"
372 #undef SRE_RECURSIVE
374 #undef SRE_LITERAL_TEMPLATE
375 #undef SRE_SEARCH
376 #undef SRE_MATCH
377 #undef SRE_INFO
378 #undef SRE_CHARSET
379 #undef SRE_COUNT
380 #undef SRE_AT
381 #undef SRE_CHAR
383 /* generate 16-bit unicode version */
385 #define SRE_CHAR Py_UNICODE
386 #define SRE_AT sre_uat
387 #define SRE_COUNT sre_ucount
388 #define SRE_CHARSET sre_ucharset
389 #define SRE_INFO sre_uinfo
390 #define SRE_MATCH sre_umatch
391 #define SRE_SEARCH sre_usearch
392 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
393 #endif
395 #endif /* SRE_RECURSIVE */
397 /* -------------------------------------------------------------------- */
398 /* String matching engine */
400 /* the following section is compiled twice, with different character
401 settings */
403 LOCAL(int)
404 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
406 /* check if pointer is at given position */
408 int this, that;
410 switch (at) {
412 case SRE_AT_BEGINNING:
413 case SRE_AT_BEGINNING_STRING:
414 return ((void*) ptr == state->beginning);
416 case SRE_AT_BEGINNING_LINE:
417 return ((void*) ptr == state->beginning ||
418 SRE_IS_LINEBREAK((int) ptr[-1]));
420 case SRE_AT_END:
421 return (((void*) (ptr+1) == state->end &&
422 SRE_IS_LINEBREAK((int) ptr[0])) ||
423 ((void*) ptr == state->end));
425 case SRE_AT_END_LINE:
426 return ((void*) ptr == state->end ||
427 SRE_IS_LINEBREAK((int) ptr[0]));
429 case SRE_AT_END_STRING:
430 return ((void*) ptr == state->end);
432 case SRE_AT_BOUNDARY:
433 if (state->beginning == state->end)
434 return 0;
435 that = ((void*) ptr > state->beginning) ?
436 SRE_IS_WORD((int) ptr[-1]) : 0;
437 this = ((void*) ptr < state->end) ?
438 SRE_IS_WORD((int) ptr[0]) : 0;
439 return this != that;
441 case SRE_AT_NON_BOUNDARY:
442 if (state->beginning == state->end)
443 return 0;
444 that = ((void*) ptr > state->beginning) ?
445 SRE_IS_WORD((int) ptr[-1]) : 0;
446 this = ((void*) ptr < state->end) ?
447 SRE_IS_WORD((int) ptr[0]) : 0;
448 return this == that;
450 case SRE_AT_LOC_BOUNDARY:
451 if (state->beginning == state->end)
452 return 0;
453 that = ((void*) ptr > state->beginning) ?
454 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
455 this = ((void*) ptr < state->end) ?
456 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
457 return this != that;
459 case SRE_AT_LOC_NON_BOUNDARY:
460 if (state->beginning == state->end)
461 return 0;
462 that = ((void*) ptr > state->beginning) ?
463 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
464 this = ((void*) ptr < state->end) ?
465 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
466 return this == that;
468 #if defined(HAVE_UNICODE)
469 case SRE_AT_UNI_BOUNDARY:
470 if (state->beginning == state->end)
471 return 0;
472 that = ((void*) ptr > state->beginning) ?
473 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
474 this = ((void*) ptr < state->end) ?
475 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
476 return this != that;
478 case SRE_AT_UNI_NON_BOUNDARY:
479 if (state->beginning == state->end)
480 return 0;
481 that = ((void*) ptr > state->beginning) ?
482 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
483 this = ((void*) ptr < state->end) ?
484 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
485 return this == that;
486 #endif
490 return 0;
493 LOCAL(int)
494 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
496 /* check if character is a member of the given set */
498 int ok = 1;
500 for (;;) {
501 switch (*set++) {
503 case SRE_OP_LITERAL:
504 /* <LITERAL> <code> */
505 if (ch == set[0])
506 return ok;
507 set++;
508 break;
510 case SRE_OP_RANGE:
511 /* <RANGE> <lower> <upper> */
512 if (set[0] <= ch && ch <= set[1])
513 return ok;
514 set += 2;
515 break;
517 case SRE_OP_CHARSET:
518 if (sizeof(SRE_CODE) == 2) {
519 /* <CHARSET> <bitmap> (16 bits per code word) */
520 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
521 return ok;
522 set += 16;
524 else {
525 /* <CHARSET> <bitmap> (32 bits per code word) */
526 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
527 return ok;
528 set += 8;
530 break;
532 case SRE_OP_BIGCHARSET:
533 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
535 int count, block;
536 count = *(set++);
538 if (sizeof(SRE_CODE) == 2) {
539 block = ((unsigned char*)set)[ch >> 8];
540 set += 128;
541 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
542 return ok;
543 set += count*16;
545 else {
546 if (ch < 65536)
547 block = ((unsigned char*)set)[ch >> 8];
548 else
549 block = -1;
550 set += 64;
551 if (block >=0 &&
552 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
553 return ok;
554 set += count*8;
556 break;
559 case SRE_OP_CATEGORY:
560 /* <CATEGORY> <code> */
561 if (sre_category(set[0], (int) ch))
562 return ok;
563 set += 1;
564 break;
566 case SRE_OP_NEGATE:
567 ok = !ok;
568 break;
570 case SRE_OP_FAILURE:
571 return !ok;
573 default:
574 /* internal error -- there's not much we can do about it
575 here, so let's just pretend it didn't match... */
576 return 0;
581 LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
583 LOCAL(int)
584 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
586 SRE_CODE chr;
587 SRE_CHAR* ptr = state->ptr;
588 SRE_CHAR* end = state->end;
589 int i;
591 /* adjust end */
592 if (maxcount < end - ptr && maxcount != 65535)
593 end = ptr + maxcount;
595 switch (pattern[0]) {
597 case SRE_OP_ANY:
598 /* repeated dot wildcard. */
599 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
600 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
601 ptr++;
602 break;
604 case SRE_OP_ANY_ALL:
605 /* repeated dot wildcare. skip to the end of the target
606 string, and backtrack from there */
607 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
608 ptr = end;
609 break;
611 case SRE_OP_LITERAL:
612 /* repeated literal */
613 chr = pattern[1];
614 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
615 while (ptr < end && (SRE_CODE) *ptr == chr)
616 ptr++;
617 break;
619 case SRE_OP_LITERAL_IGNORE:
620 /* repeated literal */
621 chr = pattern[1];
622 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
623 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
624 ptr++;
625 break;
627 case SRE_OP_NOT_LITERAL:
628 /* repeated non-literal */
629 chr = pattern[1];
630 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
631 while (ptr < end && (SRE_CODE) *ptr != chr)
632 ptr++;
633 break;
635 case SRE_OP_NOT_LITERAL_IGNORE:
636 /* repeated non-literal */
637 chr = pattern[1];
638 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
639 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
640 ptr++;
641 break;
643 case SRE_OP_IN:
644 /* repeated set */
645 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
646 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
647 ptr++;
648 break;
650 default:
651 /* repeated single character pattern */
652 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
653 while ((SRE_CHAR*) state->ptr < end) {
654 i = SRE_MATCH(state, pattern, level);
655 if (i < 0)
656 return i;
657 if (!i)
658 break;
660 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
661 (SRE_CHAR*) state->ptr - ptr));
662 return (SRE_CHAR*) state->ptr - ptr;
665 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
666 return ptr - (SRE_CHAR*) state->ptr;
669 #if 0 /* not used in this release */
670 LOCAL(int)
671 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
673 /* check if an SRE_OP_INFO block matches at the current position.
674 returns the number of SRE_CODE objects to skip if successful, 0
675 if no match */
677 SRE_CHAR* end = state->end;
678 SRE_CHAR* ptr = state->ptr;
679 int i;
681 /* check minimal length */
682 if (pattern[3] && (end - ptr) < pattern[3])
683 return 0;
685 /* check known prefix */
686 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
687 /* <length> <skip> <prefix data> <overlap data> */
688 for (i = 0; i < pattern[5]; i++)
689 if ((SRE_CODE) ptr[i] != pattern[7 + i])
690 return 0;
691 return pattern[0] + 2 * pattern[6];
693 return pattern[0];
695 #endif
697 /* The macros below should be used to protect recursive SRE_MATCH()
698 * calls that *failed* and do *not* return immediately (IOW, those
699 * that will backtrack). Explaining:
701 * - Recursive SRE_MATCH() returned true: that's usually a success
702 * (besides atypical cases like ASSERT_NOT), therefore there's no
703 * reason to restore lastmark;
705 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
706 * is returning to the caller: If the current SRE_MATCH() is the
707 * top function of the recursion, returning false will be a matching
708 * failure, and it doesn't matter where lastmark is pointing to.
709 * If it's *not* the top function, it will be a recursive SRE_MATCH()
710 * failure by itself, and the calling SRE_MATCH() will have to deal
711 * with the failure by the same rules explained here (it will restore
712 * lastmark by itself if necessary);
714 * - Recursive SRE_MATCH() returned false, and will continue the
715 * outside 'for' loop: must be protected when breaking, since the next
716 * OP could potentially depend on lastmark;
718 * - Recursive SRE_MATCH() returned false, and will be called again
719 * inside a local for/while loop: must be protected between each
720 * loop iteration, since the recursive SRE_MATCH() could do anything,
721 * and could potentially depend on lastmark.
723 * For more information, check the discussion at SF patch #712900.
725 #define LASTMARK_SAVE() \
726 do { \
727 lastmark = state->lastmark; \
728 lastindex = state->lastindex; \
729 } while (0)
730 #define LASTMARK_RESTORE() \
731 do { \
732 if (state->lastmark > lastmark) { \
733 memset(state->mark + lastmark + 1, 0, \
734 (state->lastmark - lastmark) * sizeof(void*)); \
735 state->lastmark = lastmark; \
736 state->lastindex = lastindex; \
738 } while (0)
740 LOCAL(int)
741 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
743 /* check if string matches the given pattern. returns <0 for
744 error, 0 for failure, and 1 for success */
746 SRE_CHAR* end = state->end;
747 SRE_CHAR* ptr = state->ptr;
748 int i, count;
749 SRE_REPEAT* rp;
750 int lastmark, lastindex, mark_stack_base;
751 SRE_CODE chr;
753 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
755 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
757 #if defined(USE_STACKCHECK)
758 if (level % 10 == 0 && PyOS_CheckStack())
759 return SRE_ERROR_RECURSION_LIMIT;
760 #endif
762 #if defined(USE_RECURSION_LIMIT)
763 if (level > USE_RECURSION_LIMIT)
764 return SRE_ERROR_RECURSION_LIMIT;
765 #endif
767 if (pattern[0] == SRE_OP_INFO) {
768 /* optimization info block */
769 /* <INFO> <1=skip> <2=flags> <3=min> ... */
770 if (pattern[3] && (end - ptr) < pattern[3]) {
771 TRACE(("reject (got %d chars, need %d)\n",
772 (end - ptr), pattern[3]));
773 return 0;
775 pattern += pattern[1] + 1;
778 for (;;) {
780 switch (*pattern++) {
782 case SRE_OP_FAILURE:
783 /* immediate failure */
784 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
785 return 0;
787 case SRE_OP_SUCCESS:
788 /* end of pattern */
789 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
790 state->ptr = ptr;
791 return 1;
793 case SRE_OP_AT:
794 /* match at given position */
795 /* <AT> <code> */
796 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
797 if (!SRE_AT(state, ptr, *pattern))
798 return 0;
799 pattern++;
800 break;
802 case SRE_OP_CATEGORY:
803 /* match at given category */
804 /* <CATEGORY> <code> */
805 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
806 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
807 return 0;
808 pattern++;
809 ptr++;
810 break;
812 case SRE_OP_LITERAL:
813 /* match literal string */
814 /* <LITERAL> <code> */
815 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
816 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
817 return 0;
818 pattern++;
819 ptr++;
820 break;
822 case SRE_OP_NOT_LITERAL:
823 /* match anything that is not literal character */
824 /* <NOT_LITERAL> <code> */
825 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
826 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
827 return 0;
828 pattern++;
829 ptr++;
830 break;
832 case SRE_OP_ANY:
833 /* match anything (except a newline) */
834 /* <ANY> */
835 TRACE(("|%p|%p|ANY\n", pattern, ptr));
836 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
837 return 0;
838 ptr++;
839 break;
841 case SRE_OP_ANY_ALL:
842 /* match anything */
843 /* <ANY_ALL> */
844 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
845 if (ptr >= end)
846 return 0;
847 ptr++;
848 break;
850 case SRE_OP_IN:
851 /* match set member (or non_member) */
852 /* <IN> <skip> <set> */
853 TRACE(("|%p|%p|IN\n", pattern, ptr));
854 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
855 return 0;
856 pattern += pattern[0];
857 ptr++;
858 break;
860 case SRE_OP_GROUPREF:
861 /* match backreference */
862 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
863 i = pattern[0];
865 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
866 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
867 if (!p || !e || e < p)
868 return 0;
869 while (p < e) {
870 if (ptr >= end || *ptr != *p)
871 return 0;
872 p++; ptr++;
875 pattern++;
876 break;
878 case SRE_OP_GROUPREF_IGNORE:
879 /* match backreference */
880 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
881 i = pattern[0];
883 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
884 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
885 if (!p || !e || e < p)
886 return 0;
887 while (p < e) {
888 if (ptr >= end ||
889 state->lower(*ptr) != state->lower(*p))
890 return 0;
891 p++; ptr++;
894 pattern++;
895 break;
897 case SRE_OP_LITERAL_IGNORE:
898 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
899 if (ptr >= end ||
900 state->lower(*ptr) != state->lower(*pattern))
901 return 0;
902 pattern++;
903 ptr++;
904 break;
906 case SRE_OP_NOT_LITERAL_IGNORE:
907 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
908 if (ptr >= end ||
909 state->lower(*ptr) == state->lower(*pattern))
910 return 0;
911 pattern++;
912 ptr++;
913 break;
915 case SRE_OP_IN_IGNORE:
916 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
917 if (ptr >= end
918 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
919 return 0;
920 pattern += pattern[0];
921 ptr++;
922 break;
924 case SRE_OP_MARK:
925 /* set mark */
926 /* <MARK> <gid> */
927 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
928 i = pattern[0];
929 if (i & 1)
930 state->lastindex = i/2 + 1;
931 if (i > state->lastmark)
932 state->lastmark = i;
933 state->mark[i] = ptr;
934 pattern++;
935 break;
937 case SRE_OP_JUMP:
938 case SRE_OP_INFO:
939 /* jump forward */
940 /* <JUMP> <offset> */
941 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
942 pattern += pattern[0];
943 break;
945 case SRE_OP_ASSERT:
946 /* assert subpattern */
947 /* <ASSERT> <skip> <back> <pattern> */
948 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
949 state->ptr = ptr - pattern[1];
950 if (state->ptr < state->beginning)
951 return 0;
952 i = SRE_MATCH(state, pattern + 2, level + 1);
953 if (i <= 0)
954 return i;
955 pattern += pattern[0];
956 break;
958 case SRE_OP_ASSERT_NOT:
959 /* assert not subpattern */
960 /* <ASSERT_NOT> <skip> <back> <pattern> */
961 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
962 state->ptr = ptr - pattern[1];
963 if (state->ptr >= state->beginning) {
964 i = SRE_MATCH(state, pattern + 2, level + 1);
965 if (i < 0)
966 return i;
967 if (i)
968 return 0;
970 pattern += pattern[0];
971 break;
973 case SRE_OP_BRANCH:
974 /* alternation */
975 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
976 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
977 LASTMARK_SAVE();
978 if (state->repeat) {
979 i = mark_save(state, 0, lastmark, &mark_stack_base);
980 if (i < 0)
981 return i;
983 for (; pattern[0]; pattern += pattern[0]) {
984 if (pattern[1] == SRE_OP_LITERAL &&
985 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
986 continue;
987 if (pattern[1] == SRE_OP_IN &&
988 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
989 continue;
990 state->ptr = ptr;
991 i = SRE_MATCH(state, pattern + 1, level + 1);
992 if (i)
993 return i;
994 if (state->repeat) {
995 i = mark_restore(state, 0, lastmark, &mark_stack_base);
996 if (i < 0)
997 return i;
999 LASTMARK_RESTORE();
1001 return 0;
1003 case SRE_OP_REPEAT_ONE:
1004 /* match repeated sequence (maximizing regexp) */
1006 /* this operator only works if the repeated item is
1007 exactly one character wide, and we're not already
1008 collecting backtracking points. for other cases,
1009 use the MAX_REPEAT operator */
1011 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1013 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
1014 pattern[1], pattern[2]));
1016 if (ptr + pattern[1] > end)
1017 return 0; /* cannot match */
1019 state->ptr = ptr;
1021 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
1022 if (count < 0)
1023 return count;
1025 ptr += count;
1027 /* when we arrive here, count contains the number of
1028 matches, and ptr points to the tail of the target
1029 string. check if the rest of the pattern matches,
1030 and backtrack if not. */
1032 if (count < (int) pattern[1])
1033 return 0;
1035 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
1036 /* tail is empty. we're finished */
1037 state->ptr = ptr;
1038 return 1;
1041 LASTMARK_SAVE();
1043 if (pattern[pattern[0]] == SRE_OP_LITERAL) {
1044 /* tail starts with a literal. skip positions where
1045 the rest of the pattern cannot possibly match */
1046 chr = pattern[pattern[0]+1];
1047 for (;;) {
1048 while (count >= (int) pattern[1] &&
1049 (ptr >= end || *ptr != chr)) {
1050 ptr--;
1051 count--;
1053 if (count < (int) pattern[1])
1054 break;
1055 state->ptr = ptr;
1056 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
1057 if (i)
1058 return i;
1059 ptr--;
1060 count--;
1061 LASTMARK_RESTORE();
1064 } else {
1065 /* general case */
1066 while (count >= (int) pattern[1]) {
1067 state->ptr = ptr;
1068 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
1069 if (i)
1070 return i;
1071 ptr--;
1072 count--;
1073 LASTMARK_RESTORE();
1076 return 0;
1078 case SRE_OP_MIN_REPEAT_ONE:
1079 /* match repeated sequence (minimizing regexp) */
1081 /* this operator only works if the repeated item is
1082 exactly one character wide, and we're not already
1083 collecting backtracking points. for other cases,
1084 use the MIN_REPEAT operator */
1086 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1088 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
1089 pattern[1], pattern[2]));
1091 if (ptr + pattern[1] > end)
1092 return 0; /* cannot match */
1094 state->ptr = ptr;
1096 if (pattern[1] == 0)
1097 count = 0;
1098 else {
1099 /* count using pattern min as the maximum */
1100 count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
1102 if (count < 0)
1103 return count; /* exception */
1104 if (count < (int) pattern[1])
1105 return 0; /* did not match minimum number of times */
1106 ptr += count; /* advance past minimum matches of repeat */
1109 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
1110 /* tail is empty. we're finished */
1111 state->ptr = ptr;
1112 return 1;
1114 } else {
1115 /* general case */
1116 int matchmax = ((int)pattern[2] == 65535);
1117 int c;
1118 LASTMARK_SAVE();
1119 while (matchmax || count <= (int) pattern[2]) {
1120 state->ptr = ptr;
1121 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
1122 if (i)
1123 return i;
1124 state->ptr = ptr;
1125 c = SRE_COUNT(state, pattern+3, 1, level+1);
1126 if (c < 0)
1127 return c;
1128 if (c == 0)
1129 break;
1130 assert(c == 1);
1131 ptr++;
1132 count++;
1133 LASTMARK_RESTORE();
1136 return 0;
1138 case SRE_OP_REPEAT:
1139 /* create repeat context. all the hard work is done
1140 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1141 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1142 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1143 pattern[1], pattern[2]));
1145 rep.count = -1;
1146 rep.pattern = pattern;
1148 /* install new repeat context */
1149 rep.prev = state->repeat;
1150 state->repeat = &rep;
1152 state->ptr = ptr;
1153 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
1155 state->repeat = rep.prev;
1157 return i;
1159 case SRE_OP_MAX_UNTIL:
1160 /* maximizing repeat */
1161 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1163 /* FIXME: we probably need to deal with zero-width
1164 matches in here... */
1166 rp = state->repeat;
1167 if (!rp)
1168 return SRE_ERROR_STATE;
1170 state->ptr = ptr;
1172 count = rp->count + 1;
1174 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
1176 if (count < rp->pattern[1]) {
1177 /* not enough matches */
1178 rp->count = count;
1179 /* RECURSIVE */
1180 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1181 if (i)
1182 return i;
1183 rp->count = count - 1;
1184 state->ptr = ptr;
1185 return 0;
1188 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
1189 /* we may have enough matches, but if we can
1190 match another item, do so */
1191 rp->count = count;
1192 LASTMARK_SAVE();
1193 i = mark_save(state, 0, lastmark, &mark_stack_base);
1194 if (i < 0)
1195 return i;
1196 /* RECURSIVE */
1197 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1198 if (i)
1199 return i;
1200 i = mark_restore(state, 0, lastmark, &mark_stack_base);
1201 if (i < 0)
1202 return i;
1203 LASTMARK_RESTORE();
1204 rp->count = count - 1;
1205 state->ptr = ptr;
1208 /* cannot match more repeated items here. make sure the
1209 tail matches */
1210 state->repeat = rp->prev;
1211 i = SRE_MATCH(state, pattern, level + 1);
1212 if (i)
1213 return i;
1214 state->repeat = rp;
1215 state->ptr = ptr;
1216 return 0;
1218 case SRE_OP_MIN_UNTIL:
1219 /* minimizing repeat */
1220 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1222 rp = state->repeat;
1223 if (!rp)
1224 return SRE_ERROR_STATE;
1226 state->ptr = ptr;
1228 count = rp->count + 1;
1230 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1231 rp->pattern));
1233 if (count < rp->pattern[1]) {
1234 /* not enough matches */
1235 rp->count = count;
1236 /* RECURSIVE */
1237 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1238 if (i)
1239 return i;
1240 rp->count = count-1;
1241 state->ptr = ptr;
1242 return 0;
1245 LASTMARK_SAVE();
1247 /* see if the tail matches */
1248 state->repeat = rp->prev;
1249 i = SRE_MATCH(state, pattern, level + 1);
1250 if (i)
1251 return i;
1253 state->ptr = ptr;
1254 state->repeat = rp;
1256 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1257 return 0;
1259 LASTMARK_RESTORE();
1261 rp->count = count;
1262 /* RECURSIVE */
1263 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
1264 if (i)
1265 return i;
1266 rp->count = count - 1;
1267 state->ptr = ptr;
1269 return 0;
1271 default:
1272 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
1273 return SRE_ERROR_ILLEGAL;
1277 /* can't end up here */
1278 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
1281 LOCAL(int)
1282 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1284 SRE_CHAR* ptr = state->start;
1285 SRE_CHAR* end = state->end;
1286 int status = 0;
1287 int prefix_len = 0;
1288 int prefix_skip = 0;
1289 SRE_CODE* prefix = NULL;
1290 SRE_CODE* charset = NULL;
1291 SRE_CODE* overlap = NULL;
1292 int flags = 0;
1294 if (pattern[0] == SRE_OP_INFO) {
1295 /* optimization info block */
1296 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1298 flags = pattern[2];
1300 if (pattern[3] > 1) {
1301 /* adjust end point (but make sure we leave at least one
1302 character in there, so literal search will work) */
1303 end -= pattern[3]-1;
1304 if (end <= ptr)
1305 end = ptr+1;
1308 if (flags & SRE_INFO_PREFIX) {
1309 /* pattern starts with a known prefix */
1310 /* <length> <skip> <prefix data> <overlap data> */
1311 prefix_len = pattern[5];
1312 prefix_skip = pattern[6];
1313 prefix = pattern + 7;
1314 overlap = prefix + prefix_len - 1;
1315 } else if (flags & SRE_INFO_CHARSET)
1316 /* pattern starts with a character from a known set */
1317 /* <charset> */
1318 charset = pattern + 5;
1320 pattern += 1 + pattern[1];
1323 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1324 TRACE(("charset = %p\n", charset));
1326 #if defined(USE_FAST_SEARCH)
1327 if (prefix_len > 1) {
1328 /* pattern starts with a known prefix. use the overlap
1329 table to skip forward as fast as we possibly can */
1330 int i = 0;
1331 end = state->end;
1332 while (ptr < end) {
1333 for (;;) {
1334 if ((SRE_CODE) ptr[0] != prefix[i]) {
1335 if (!i)
1336 break;
1337 else
1338 i = overlap[i];
1339 } else {
1340 if (++i == prefix_len) {
1341 /* found a potential match */
1342 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1343 state->start = ptr + 1 - prefix_len;
1344 state->ptr = ptr + 1 - prefix_len + prefix_skip;
1345 if (flags & SRE_INFO_LITERAL)
1346 return 1; /* we got all of it */
1347 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
1348 if (status != 0)
1349 return status;
1350 /* close but no cigar -- try again */
1351 i = overlap[i];
1353 break;
1357 ptr++;
1359 return 0;
1361 #endif
1363 if (pattern[0] == SRE_OP_LITERAL) {
1364 /* pattern starts with a literal character. this is used
1365 for short prefixes, and if fast search is disabled */
1366 SRE_CODE chr = pattern[1];
1367 end = state->end;
1368 for (;;) {
1369 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1370 ptr++;
1371 if (ptr >= end)
1372 return 0;
1373 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1374 state->start = ptr;
1375 state->ptr = ++ptr;
1376 if (flags & SRE_INFO_LITERAL)
1377 return 1; /* we got all of it */
1378 status = SRE_MATCH(state, pattern + 2, 1);
1379 if (status != 0)
1380 break;
1382 } else if (charset) {
1383 /* pattern starts with a character from a known set */
1384 end = state->end;
1385 for (;;) {
1386 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1387 ptr++;
1388 if (ptr >= end)
1389 return 0;
1390 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1391 state->start = ptr;
1392 state->ptr = ptr;
1393 status = SRE_MATCH(state, pattern, 1);
1394 if (status != 0)
1395 break;
1396 ptr++;
1398 } else
1399 /* general case */
1400 while (ptr <= end) {
1401 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1402 state->start = state->ptr = ptr++;
1403 status = SRE_MATCH(state, pattern, 1);
1404 if (status != 0)
1405 break;
1408 return status;
1411 LOCAL(int)
1412 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1414 /* check if given string is a literal template (i.e. no escapes) */
1415 while (len-- > 0)
1416 if (*ptr++ == '\\')
1417 return 0;
1418 return 1;
1421 #if !defined(SRE_RECURSIVE)
1423 /* -------------------------------------------------------------------- */
1424 /* factories and destructors */
1426 /* see sre.h for object declarations */
1428 static PyTypeObject Pattern_Type;
1429 static PyTypeObject Match_Type;
1430 static PyTypeObject Scanner_Type;
1432 static PyObject *
1433 _compile(PyObject* self_, PyObject* args)
1435 /* "compile" pattern descriptor to pattern object */
1437 PatternObject* self;
1438 int i, n;
1440 PyObject* pattern;
1441 int flags = 0;
1442 PyObject* code;
1443 int groups = 0;
1444 PyObject* groupindex = NULL;
1445 PyObject* indexgroup = NULL;
1446 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1447 &PyList_Type, &code, &groups,
1448 &groupindex, &indexgroup))
1449 return NULL;
1451 n = PyList_GET_SIZE(code);
1453 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1454 if (!self)
1455 return NULL;
1457 self->codesize = n;
1459 for (i = 0; i < n; i++) {
1460 PyObject *o = PyList_GET_ITEM(code, i);
1461 if (PyInt_Check(o))
1462 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
1463 else
1464 self->code[i] = (SRE_CODE) PyLong_AsUnsignedLong(o);
1467 if (PyErr_Occurred()) {
1468 PyObject_DEL(self);
1469 return NULL;
1472 Py_INCREF(pattern);
1473 self->pattern = pattern;
1475 self->flags = flags;
1477 self->groups = groups;
1479 Py_XINCREF(groupindex);
1480 self->groupindex = groupindex;
1482 Py_XINCREF(indexgroup);
1483 self->indexgroup = indexgroup;
1485 return (PyObject*) self;
1488 static PyObject *
1489 sre_codesize(PyObject* self, PyObject* args)
1491 return Py_BuildValue("i", sizeof(SRE_CODE));
1494 static PyObject *
1495 sre_getlower(PyObject* self, PyObject* args)
1497 int character, flags;
1498 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1499 return NULL;
1500 if (flags & SRE_FLAG_LOCALE)
1501 return Py_BuildValue("i", sre_lower_locale(character));
1502 if (flags & SRE_FLAG_UNICODE)
1503 #if defined(HAVE_UNICODE)
1504 return Py_BuildValue("i", sre_lower_unicode(character));
1505 #else
1506 return Py_BuildValue("i", sre_lower_locale(character));
1507 #endif
1508 return Py_BuildValue("i", sre_lower(character));
1511 LOCAL(void)
1512 state_reset(SRE_STATE* state)
1514 state->lastmark = 0;
1516 /* FIXME: dynamic! */
1517 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
1519 state->lastindex = -1;
1521 state->repeat = NULL;
1523 mark_fini(state);
1526 static void*
1527 getstring(PyObject* string, int* p_length, int* p_charsize)
1529 /* given a python object, return a data pointer, a length (in
1530 characters), and a character size. return NULL if the object
1531 is not a string (or not compatible) */
1533 PyBufferProcs *buffer;
1534 int size, bytes, charsize;
1535 void* ptr;
1537 #if defined(HAVE_UNICODE)
1538 if (PyUnicode_Check(string)) {
1539 /* unicode strings doesn't always support the buffer interface */
1540 ptr = (void*) PyUnicode_AS_DATA(string);
1541 bytes = PyUnicode_GET_DATA_SIZE(string);
1542 size = PyUnicode_GET_SIZE(string);
1543 charsize = sizeof(Py_UNICODE);
1545 } else {
1546 #endif
1548 /* get pointer to string buffer */
1549 buffer = string->ob_type->tp_as_buffer;
1550 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1551 buffer->bf_getsegcount(string, NULL) != 1) {
1552 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1553 return NULL;
1556 /* determine buffer size */
1557 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1558 if (bytes < 0) {
1559 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1560 return NULL;
1563 /* determine character size */
1564 #if PY_VERSION_HEX >= 0x01060000
1565 size = PyObject_Size(string);
1566 #else
1567 size = PyObject_Length(string);
1568 #endif
1570 if (PyString_Check(string) || bytes == size)
1571 charsize = 1;
1572 #if defined(HAVE_UNICODE)
1573 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1574 charsize = sizeof(Py_UNICODE);
1575 #endif
1576 else {
1577 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1578 return NULL;
1581 #if defined(HAVE_UNICODE)
1583 #endif
1585 *p_length = size;
1586 *p_charsize = charsize;
1588 return ptr;
1591 LOCAL(PyObject*)
1592 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1593 int start, int end)
1595 /* prepare state object */
1597 int length;
1598 int charsize;
1599 void* ptr;
1601 memset(state, 0, sizeof(SRE_STATE));
1603 state->lastindex = -1;
1605 ptr = getstring(string, &length, &charsize);
1606 if (!ptr)
1607 return NULL;
1609 /* adjust boundaries */
1610 if (start < 0)
1611 start = 0;
1612 else if (start > length)
1613 start = length;
1615 if (end < 0)
1616 end = 0;
1617 else if (end > length)
1618 end = length;
1620 state->charsize = charsize;
1622 state->beginning = ptr;
1624 state->start = (void*) ((char*) ptr + start * state->charsize);
1625 state->end = (void*) ((char*) ptr + end * state->charsize);
1627 Py_INCREF(string);
1628 state->string = string;
1629 state->pos = start;
1630 state->endpos = end;
1632 if (pattern->flags & SRE_FLAG_LOCALE)
1633 state->lower = sre_lower_locale;
1634 else if (pattern->flags & SRE_FLAG_UNICODE)
1635 #if defined(HAVE_UNICODE)
1636 state->lower = sre_lower_unicode;
1637 #else
1638 state->lower = sre_lower_locale;
1639 #endif
1640 else
1641 state->lower = sre_lower;
1643 return string;
1646 LOCAL(void)
1647 state_fini(SRE_STATE* state)
1649 Py_XDECREF(state->string);
1650 mark_fini(state);
1653 /* calculate offset from start of string */
1654 #define STATE_OFFSET(state, member)\
1655 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1657 LOCAL(PyObject*)
1658 state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
1660 int i, j;
1662 index = (index - 1) * 2;
1664 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1665 if (empty)
1666 /* want empty string */
1667 i = j = 0;
1668 else {
1669 Py_INCREF(Py_None);
1670 return Py_None;
1672 } else {
1673 i = STATE_OFFSET(state, state->mark[index]);
1674 j = STATE_OFFSET(state, state->mark[index+1]);
1677 return PySequence_GetSlice(string, i, j);
1680 static void
1681 pattern_error(int status)
1683 switch (status) {
1684 case SRE_ERROR_RECURSION_LIMIT:
1685 PyErr_SetString(
1686 PyExc_RuntimeError,
1687 "maximum recursion limit exceeded"
1689 break;
1690 case SRE_ERROR_MEMORY:
1691 PyErr_NoMemory();
1692 break;
1693 default:
1694 /* other error codes indicate compiler/engine bugs */
1695 PyErr_SetString(
1696 PyExc_RuntimeError,
1697 "internal error in regular expression engine"
1702 static PyObject*
1703 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
1705 /* create match object (from state object) */
1707 MatchObject* match;
1708 int i, j;
1709 char* base;
1710 int n;
1712 if (status > 0) {
1714 /* create match object (with room for extra group marks) */
1715 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
1716 2*(pattern->groups+1));
1717 if (!match)
1718 return NULL;
1720 Py_INCREF(pattern);
1721 match->pattern = pattern;
1723 Py_INCREF(state->string);
1724 match->string = state->string;
1726 match->regs = NULL;
1727 match->groups = pattern->groups+1;
1729 /* fill in group slices */
1731 base = (char*) state->beginning;
1732 n = state->charsize;
1734 match->mark[0] = ((char*) state->start - base) / n;
1735 match->mark[1] = ((char*) state->ptr - base) / n;
1737 for (i = j = 0; i < pattern->groups; i++, j+=2)
1738 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1739 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1740 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1741 } else
1742 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1744 match->pos = state->pos;
1745 match->endpos = state->endpos;
1747 match->lastindex = state->lastindex;
1749 return (PyObject*) match;
1751 } else if (status == 0) {
1753 /* no match */
1754 Py_INCREF(Py_None);
1755 return Py_None;
1759 /* internal error */
1760 pattern_error(status);
1761 return NULL;
1764 static PyObject*
1765 pattern_scanner(PatternObject* pattern, PyObject* args)
1767 /* create search state object */
1769 ScannerObject* self;
1771 PyObject* string;
1772 int start = 0;
1773 int end = INT_MAX;
1774 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1775 return NULL;
1777 /* create scanner object */
1778 self = PyObject_NEW(ScannerObject, &Scanner_Type);
1779 if (!self)
1780 return NULL;
1782 string = state_init(&self->state, pattern, string, start, end);
1783 if (!string) {
1784 PyObject_DEL(self);
1785 return NULL;
1788 Py_INCREF(pattern);
1789 self->pattern = (PyObject*) pattern;
1791 return (PyObject*) self;
1794 static void
1795 pattern_dealloc(PatternObject* self)
1797 Py_XDECREF(self->pattern);
1798 Py_XDECREF(self->groupindex);
1799 Py_XDECREF(self->indexgroup);
1800 PyObject_DEL(self);
1803 static PyObject*
1804 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1806 SRE_STATE state;
1807 int status;
1809 PyObject* string;
1810 int start = 0;
1811 int end = INT_MAX;
1812 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1813 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1814 &string, &start, &end))
1815 return NULL;
1817 string = state_init(&state, self, string, start, end);
1818 if (!string)
1819 return NULL;
1821 state.ptr = state.start;
1823 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1825 if (state.charsize == 1) {
1826 status = sre_match(&state, PatternObject_GetCode(self), 1);
1827 } else {
1828 #if defined(HAVE_UNICODE)
1829 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
1830 #endif
1833 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1835 state_fini(&state);
1837 return pattern_new_match(self, &state, status);
1840 static PyObject*
1841 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1843 SRE_STATE state;
1844 int status;
1846 PyObject* string;
1847 int start = 0;
1848 int end = INT_MAX;
1849 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1850 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1851 &string, &start, &end))
1852 return NULL;
1854 string = state_init(&state, self, string, start, end);
1855 if (!string)
1856 return NULL;
1858 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1860 if (state.charsize == 1) {
1861 status = sre_search(&state, PatternObject_GetCode(self));
1862 } else {
1863 #if defined(HAVE_UNICODE)
1864 status = sre_usearch(&state, PatternObject_GetCode(self));
1865 #endif
1868 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1870 state_fini(&state);
1872 return pattern_new_match(self, &state, status);
1875 static PyObject*
1876 call(char* module, char* function, PyObject* args)
1878 PyObject* name;
1879 PyObject* mod;
1880 PyObject* func;
1881 PyObject* result;
1883 if (!args)
1884 return NULL;
1885 name = PyString_FromString(module);
1886 if (!name)
1887 return NULL;
1888 mod = PyImport_Import(name);
1889 Py_DECREF(name);
1890 if (!mod)
1891 return NULL;
1892 func = PyObject_GetAttrString(mod, function);
1893 Py_DECREF(mod);
1894 if (!func)
1895 return NULL;
1896 result = PyObject_CallObject(func, args);
1897 Py_DECREF(func);
1898 Py_DECREF(args);
1899 return result;
1902 #ifdef USE_BUILTIN_COPY
1903 static int
1904 deepcopy(PyObject** object, PyObject* memo)
1906 PyObject* copy;
1908 copy = call(
1909 "copy", "deepcopy",
1910 Py_BuildValue("OO", *object, memo)
1912 if (!copy)
1913 return 0;
1915 Py_DECREF(*object);
1916 *object = copy;
1918 return 1; /* success */
1920 #endif
1922 static PyObject*
1923 join_list(PyObject* list, PyObject* pattern)
1925 /* join list elements */
1927 PyObject* joiner;
1928 #if PY_VERSION_HEX >= 0x01060000
1929 PyObject* function;
1930 PyObject* args;
1931 #endif
1932 PyObject* result;
1934 switch (PyList_GET_SIZE(list)) {
1935 case 0:
1936 Py_DECREF(list);
1937 return PySequence_GetSlice(pattern, 0, 0);
1938 case 1:
1939 result = PyList_GET_ITEM(list, 0);
1940 Py_INCREF(result);
1941 Py_DECREF(list);
1942 return result;
1945 /* two or more elements: slice out a suitable separator from the
1946 first member, and use that to join the entire list */
1948 joiner = PySequence_GetSlice(pattern, 0, 0);
1949 if (!joiner)
1950 return NULL;
1952 #if PY_VERSION_HEX >= 0x01060000
1953 function = PyObject_GetAttrString(joiner, "join");
1954 if (!function) {
1955 Py_DECREF(joiner);
1956 return NULL;
1958 args = PyTuple_New(1);
1959 if (!args) {
1960 Py_DECREF(function);
1961 Py_DECREF(joiner);
1962 return NULL;
1964 PyTuple_SET_ITEM(args, 0, list);
1965 result = PyObject_CallObject(function, args);
1966 Py_DECREF(args); /* also removes list */
1967 Py_DECREF(function);
1968 #else
1969 result = call(
1970 "string", "join",
1971 Py_BuildValue("OO", list, joiner)
1973 #endif
1974 Py_DECREF(joiner);
1976 return result;
1979 static PyObject*
1980 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
1982 SRE_STATE state;
1983 PyObject* list;
1984 int status;
1985 int i, b, e;
1987 PyObject* string;
1988 int start = 0;
1989 int end = INT_MAX;
1990 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1991 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1992 &string, &start, &end))
1993 return NULL;
1995 string = state_init(&state, self, string, start, end);
1996 if (!string)
1997 return NULL;
1999 list = PyList_New(0);
2000 if (!list) {
2001 state_fini(&state);
2002 return NULL;
2005 while (state.start <= state.end) {
2007 PyObject* item;
2009 state_reset(&state);
2011 state.ptr = state.start;
2013 if (state.charsize == 1) {
2014 status = sre_search(&state, PatternObject_GetCode(self));
2015 } else {
2016 #if defined(HAVE_UNICODE)
2017 status = sre_usearch(&state, PatternObject_GetCode(self));
2018 #endif
2021 if (status <= 0) {
2022 if (status == 0)
2023 break;
2024 pattern_error(status);
2025 goto error;
2028 /* don't bother to build a match object */
2029 switch (self->groups) {
2030 case 0:
2031 b = STATE_OFFSET(&state, state.start);
2032 e = STATE_OFFSET(&state, state.ptr);
2033 item = PySequence_GetSlice(string, b, e);
2034 if (!item)
2035 goto error;
2036 break;
2037 case 1:
2038 item = state_getslice(&state, 1, string, 1);
2039 if (!item)
2040 goto error;
2041 break;
2042 default:
2043 item = PyTuple_New(self->groups);
2044 if (!item)
2045 goto error;
2046 for (i = 0; i < self->groups; i++) {
2047 PyObject* o = state_getslice(&state, i+1, string, 1);
2048 if (!o) {
2049 Py_DECREF(item);
2050 goto error;
2052 PyTuple_SET_ITEM(item, i, o);
2054 break;
2057 status = PyList_Append(list, item);
2058 Py_DECREF(item);
2059 if (status < 0)
2060 goto error;
2062 if (state.ptr == state.start)
2063 state.start = (void*) ((char*) state.ptr + state.charsize);
2064 else
2065 state.start = state.ptr;
2069 state_fini(&state);
2070 return list;
2072 error:
2073 Py_DECREF(list);
2074 state_fini(&state);
2075 return NULL;
2079 #if PY_VERSION_HEX >= 0x02020000
2080 static PyObject*
2081 pattern_finditer(PatternObject* pattern, PyObject* args)
2083 PyObject* scanner;
2084 PyObject* search;
2085 PyObject* iterator;
2087 scanner = pattern_scanner(pattern, args);
2088 if (!scanner)
2089 return NULL;
2091 search = PyObject_GetAttrString(scanner, "search");
2092 Py_DECREF(scanner);
2093 if (!search)
2094 return NULL;
2096 iterator = PyCallIter_New(search, Py_None);
2097 Py_DECREF(search);
2099 return iterator;
2101 #endif
2103 static PyObject*
2104 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2106 SRE_STATE state;
2107 PyObject* list;
2108 PyObject* item;
2109 int status;
2110 int n;
2111 int i;
2112 void* last;
2114 PyObject* string;
2115 int maxsplit = 0;
2116 static char* kwlist[] = { "source", "maxsplit", NULL };
2117 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
2118 &string, &maxsplit))
2119 return NULL;
2121 string = state_init(&state, self, string, 0, INT_MAX);
2122 if (!string)
2123 return NULL;
2125 list = PyList_New(0);
2126 if (!list) {
2127 state_fini(&state);
2128 return NULL;
2131 n = 0;
2132 last = state.start;
2134 while (!maxsplit || n < maxsplit) {
2136 state_reset(&state);
2138 state.ptr = state.start;
2140 if (state.charsize == 1) {
2141 status = sre_search(&state, PatternObject_GetCode(self));
2142 } else {
2143 #if defined(HAVE_UNICODE)
2144 status = sre_usearch(&state, PatternObject_GetCode(self));
2145 #endif
2148 if (status <= 0) {
2149 if (status == 0)
2150 break;
2151 pattern_error(status);
2152 goto error;
2155 if (state.start == state.ptr) {
2156 if (last == state.end)
2157 break;
2158 /* skip one character */
2159 state.start = (void*) ((char*) state.ptr + state.charsize);
2160 continue;
2163 /* get segment before this match */
2164 item = PySequence_GetSlice(
2165 string, STATE_OFFSET(&state, last),
2166 STATE_OFFSET(&state, state.start)
2168 if (!item)
2169 goto error;
2170 status = PyList_Append(list, item);
2171 Py_DECREF(item);
2172 if (status < 0)
2173 goto error;
2175 /* add groups (if any) */
2176 for (i = 0; i < self->groups; i++) {
2177 item = state_getslice(&state, i+1, string, 0);
2178 if (!item)
2179 goto error;
2180 status = PyList_Append(list, item);
2181 Py_DECREF(item);
2182 if (status < 0)
2183 goto error;
2186 n = n + 1;
2188 last = state.start = state.ptr;
2192 /* get segment following last match (even if empty) */
2193 item = PySequence_GetSlice(
2194 string, STATE_OFFSET(&state, last), state.endpos
2196 if (!item)
2197 goto error;
2198 status = PyList_Append(list, item);
2199 Py_DECREF(item);
2200 if (status < 0)
2201 goto error;
2203 state_fini(&state);
2204 return list;
2206 error:
2207 Py_DECREF(list);
2208 state_fini(&state);
2209 return NULL;
2213 static PyObject*
2214 pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2215 int count, int subn)
2217 SRE_STATE state;
2218 PyObject* list;
2219 PyObject* item;
2220 PyObject* filter;
2221 PyObject* args;
2222 PyObject* match;
2223 void* ptr;
2224 int status;
2225 int n;
2226 int i, b, e;
2227 int filter_is_callable;
2229 if (PyCallable_Check(template)) {
2230 /* sub/subn takes either a function or a template */
2231 filter = template;
2232 Py_INCREF(filter);
2233 filter_is_callable = 1;
2234 } else {
2235 /* if not callable, check if it's a literal string */
2236 int literal;
2237 ptr = getstring(template, &n, &b);
2238 if (ptr) {
2239 if (b == 1) {
2240 literal = sre_literal_template(ptr, n);
2241 } else {
2242 #if defined(HAVE_UNICODE)
2243 literal = sre_uliteral_template(ptr, n);
2244 #endif
2246 } else {
2247 PyErr_Clear();
2248 literal = 0;
2250 if (literal) {
2251 filter = template;
2252 Py_INCREF(filter);
2253 filter_is_callable = 0;
2254 } else {
2255 /* not a literal; hand it over to the template compiler */
2256 filter = call(
2257 SRE_MODULE, "_subx",
2258 Py_BuildValue("OO", self, template)
2260 if (!filter)
2261 return NULL;
2262 filter_is_callable = PyCallable_Check(filter);
2266 string = state_init(&state, self, string, 0, INT_MAX);
2267 if (!string) {
2268 Py_DECREF(filter);
2269 return NULL;
2272 list = PyList_New(0);
2273 if (!list) {
2274 Py_DECREF(filter);
2275 state_fini(&state);
2276 return NULL;
2279 n = i = 0;
2281 while (!count || n < count) {
2283 state_reset(&state);
2285 state.ptr = state.start;
2287 if (state.charsize == 1) {
2288 status = sre_search(&state, PatternObject_GetCode(self));
2289 } else {
2290 #if defined(HAVE_UNICODE)
2291 status = sre_usearch(&state, PatternObject_GetCode(self));
2292 #endif
2295 if (status <= 0) {
2296 if (status == 0)
2297 break;
2298 pattern_error(status);
2299 goto error;
2302 b = STATE_OFFSET(&state, state.start);
2303 e = STATE_OFFSET(&state, state.ptr);
2305 if (i < b) {
2306 /* get segment before this match */
2307 item = PySequence_GetSlice(string, i, b);
2308 if (!item)
2309 goto error;
2310 status = PyList_Append(list, item);
2311 Py_DECREF(item);
2312 if (status < 0)
2313 goto error;
2315 } else if (i == b && i == e && n > 0)
2316 /* ignore empty match on latest position */
2317 goto next;
2319 if (filter_is_callable) {
2320 /* pass match object through filter */
2321 match = pattern_new_match(self, &state, 1);
2322 if (!match)
2323 goto error;
2324 args = Py_BuildValue("(O)", match);
2325 if (!args) {
2326 Py_DECREF(match);
2327 goto error;
2329 item = PyObject_CallObject(filter, args);
2330 Py_DECREF(args);
2331 Py_DECREF(match);
2332 if (!item)
2333 goto error;
2334 } else {
2335 /* filter is literal string */
2336 item = filter;
2337 Py_INCREF(item);
2340 /* add to list */
2341 if (item != Py_None) {
2342 status = PyList_Append(list, item);
2343 Py_DECREF(item);
2344 if (status < 0)
2345 goto error;
2348 i = e;
2349 n = n + 1;
2351 next:
2352 /* move on */
2353 if (state.ptr == state.start)
2354 state.start = (void*) ((char*) state.ptr + state.charsize);
2355 else
2356 state.start = state.ptr;
2360 /* get segment following last match */
2361 if (i < state.endpos) {
2362 item = PySequence_GetSlice(string, i, state.endpos);
2363 if (!item)
2364 goto error;
2365 status = PyList_Append(list, item);
2366 Py_DECREF(item);
2367 if (status < 0)
2368 goto error;
2371 state_fini(&state);
2373 Py_DECREF(filter);
2375 /* convert list to single string (also removes list) */
2376 item = join_list(list, self->pattern);
2378 if (!item)
2379 return NULL;
2381 if (subn)
2382 return Py_BuildValue("Ni", item, n);
2384 return item;
2386 error:
2387 Py_DECREF(list);
2388 state_fini(&state);
2389 Py_DECREF(filter);
2390 return NULL;
2394 static PyObject*
2395 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2397 PyObject* template;
2398 PyObject* string;
2399 int count = 0;
2400 static char* kwlist[] = { "repl", "string", "count", NULL };
2401 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2402 &template, &string, &count))
2403 return NULL;
2405 return pattern_subx(self, template, string, count, 0);
2408 static PyObject*
2409 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2411 PyObject* template;
2412 PyObject* string;
2413 int count = 0;
2414 static char* kwlist[] = { "repl", "string", "count", NULL };
2415 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2416 &template, &string, &count))
2417 return NULL;
2419 return pattern_subx(self, template, string, count, 1);
2422 static PyObject*
2423 pattern_copy(PatternObject* self, PyObject* args)
2425 #ifdef USE_BUILTIN_COPY
2426 PatternObject* copy;
2427 int offset;
2429 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2430 return NULL;
2432 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2433 if (!copy)
2434 return NULL;
2436 offset = offsetof(PatternObject, groups);
2438 Py_XINCREF(self->groupindex);
2439 Py_XINCREF(self->indexgroup);
2440 Py_XINCREF(self->pattern);
2442 memcpy((char*) copy + offset, (char*) self + offset,
2443 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2445 return (PyObject*) copy;
2446 #else
2447 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2448 return NULL;
2449 #endif
2452 static PyObject*
2453 pattern_deepcopy(PatternObject* self, PyObject* args)
2455 #ifdef USE_BUILTIN_COPY
2456 PatternObject* copy;
2458 PyObject* memo;
2459 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2460 return NULL;
2462 copy = (PatternObject*) pattern_copy(self, Py_None);
2463 if (!copy)
2464 return NULL;
2466 if (!deepcopy(&copy->groupindex, memo) ||
2467 !deepcopy(&copy->indexgroup, memo) ||
2468 !deepcopy(&copy->pattern, memo)) {
2469 Py_DECREF(copy);
2470 return NULL;
2473 #else
2474 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2475 return NULL;
2476 #endif
2479 static PyMethodDef pattern_methods[] = {
2480 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2481 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2482 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2483 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2484 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2485 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
2486 #if PY_VERSION_HEX >= 0x02020000
2487 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2488 #endif
2489 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
2490 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2491 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
2492 {NULL, NULL}
2495 static PyObject*
2496 pattern_getattr(PatternObject* self, char* name)
2498 PyObject* res;
2500 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
2502 if (res)
2503 return res;
2505 PyErr_Clear();
2507 /* attributes */
2508 if (!strcmp(name, "pattern")) {
2509 Py_INCREF(self->pattern);
2510 return self->pattern;
2513 if (!strcmp(name, "flags"))
2514 return Py_BuildValue("i", self->flags);
2516 if (!strcmp(name, "groups"))
2517 return Py_BuildValue("i", self->groups);
2519 if (!strcmp(name, "groupindex") && self->groupindex) {
2520 Py_INCREF(self->groupindex);
2521 return self->groupindex;
2524 PyErr_SetString(PyExc_AttributeError, name);
2525 return NULL;
2528 statichere PyTypeObject Pattern_Type = {
2529 PyObject_HEAD_INIT(NULL)
2530 0, "_" SRE_MODULE ".SRE_Pattern",
2531 sizeof(PatternObject), sizeof(SRE_CODE),
2532 (destructor)pattern_dealloc, /*tp_dealloc*/
2533 0, /*tp_print*/
2534 (getattrfunc)pattern_getattr /*tp_getattr*/
2537 /* -------------------------------------------------------------------- */
2538 /* match methods */
2540 static void
2541 match_dealloc(MatchObject* self)
2543 Py_XDECREF(self->regs);
2544 Py_XDECREF(self->string);
2545 Py_DECREF(self->pattern);
2546 PyObject_DEL(self);
2549 static PyObject*
2550 match_getslice_by_index(MatchObject* self, int index, PyObject* def)
2552 if (index < 0 || index >= self->groups) {
2553 /* raise IndexError if we were given a bad group number */
2554 PyErr_SetString(
2555 PyExc_IndexError,
2556 "no such group"
2558 return NULL;
2561 index *= 2;
2563 if (self->string == Py_None || self->mark[index] < 0) {
2564 /* return default value if the string or group is undefined */
2565 Py_INCREF(def);
2566 return def;
2569 return PySequence_GetSlice(
2570 self->string, self->mark[index], self->mark[index+1]
2574 static int
2575 match_getindex(MatchObject* self, PyObject* index)
2577 int i;
2579 if (PyInt_Check(index))
2580 return (int) PyInt_AS_LONG(index);
2582 i = -1;
2584 if (self->pattern->groupindex) {
2585 index = PyObject_GetItem(self->pattern->groupindex, index);
2586 if (index) {
2587 if (PyInt_Check(index))
2588 i = (int) PyInt_AS_LONG(index);
2589 Py_DECREF(index);
2590 } else
2591 PyErr_Clear();
2594 return i;
2597 static PyObject*
2598 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2600 return match_getslice_by_index(self, match_getindex(self, index), def);
2603 static PyObject*
2604 match_expand(MatchObject* self, PyObject* args)
2606 PyObject* template;
2607 if (!PyArg_ParseTuple(args, "O:expand", &template))
2608 return NULL;
2610 /* delegate to Python code */
2611 return call(
2612 SRE_MODULE, "_expand",
2613 Py_BuildValue("OOO", self->pattern, self, template)
2617 static PyObject*
2618 match_group(MatchObject* self, PyObject* args)
2620 PyObject* result;
2621 int i, size;
2623 size = PyTuple_GET_SIZE(args);
2625 switch (size) {
2626 case 0:
2627 result = match_getslice(self, Py_False, Py_None);
2628 break;
2629 case 1:
2630 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2631 break;
2632 default:
2633 /* fetch multiple items */
2634 result = PyTuple_New(size);
2635 if (!result)
2636 return NULL;
2637 for (i = 0; i < size; i++) {
2638 PyObject* item = match_getslice(
2639 self, PyTuple_GET_ITEM(args, i), Py_None
2641 if (!item) {
2642 Py_DECREF(result);
2643 return NULL;
2645 PyTuple_SET_ITEM(result, i, item);
2647 break;
2649 return result;
2652 static PyObject*
2653 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
2655 PyObject* result;
2656 int index;
2658 PyObject* def = Py_None;
2659 static char* kwlist[] = { "default", NULL };
2660 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
2661 return NULL;
2663 result = PyTuple_New(self->groups-1);
2664 if (!result)
2665 return NULL;
2667 for (index = 1; index < self->groups; index++) {
2668 PyObject* item;
2669 item = match_getslice_by_index(self, index, def);
2670 if (!item) {
2671 Py_DECREF(result);
2672 return NULL;
2674 PyTuple_SET_ITEM(result, index-1, item);
2677 return result;
2680 static PyObject*
2681 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
2683 PyObject* result;
2684 PyObject* keys;
2685 int index;
2687 PyObject* def = Py_None;
2688 static char* kwlist[] = { "default", NULL };
2689 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
2690 return NULL;
2692 result = PyDict_New();
2693 if (!result || !self->pattern->groupindex)
2694 return result;
2696 keys = PyMapping_Keys(self->pattern->groupindex);
2697 if (!keys)
2698 goto failed;
2700 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
2701 int status;
2702 PyObject* key;
2703 PyObject* value;
2704 key = PyList_GET_ITEM(keys, index);
2705 if (!key)
2706 goto failed;
2707 value = match_getslice(self, key, def);
2708 if (!value) {
2709 Py_DECREF(key);
2710 goto failed;
2712 status = PyDict_SetItem(result, key, value);
2713 Py_DECREF(value);
2714 if (status < 0)
2715 goto failed;
2718 Py_DECREF(keys);
2720 return result;
2722 failed:
2723 Py_DECREF(keys);
2724 Py_DECREF(result);
2725 return NULL;
2728 static PyObject*
2729 match_start(MatchObject* self, PyObject* args)
2731 int index;
2733 PyObject* index_ = Py_False; /* zero */
2734 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2735 return NULL;
2737 index = match_getindex(self, index_);
2739 if (index < 0 || index >= self->groups) {
2740 PyErr_SetString(
2741 PyExc_IndexError,
2742 "no such group"
2744 return NULL;
2747 /* mark is -1 if group is undefined */
2748 return Py_BuildValue("i", self->mark[index*2]);
2751 static PyObject*
2752 match_end(MatchObject* self, PyObject* args)
2754 int index;
2756 PyObject* index_ = Py_False; /* zero */
2757 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2758 return NULL;
2760 index = match_getindex(self, index_);
2762 if (index < 0 || index >= self->groups) {
2763 PyErr_SetString(
2764 PyExc_IndexError,
2765 "no such group"
2767 return NULL;
2770 /* mark is -1 if group is undefined */
2771 return Py_BuildValue("i", self->mark[index*2+1]);
2774 LOCAL(PyObject*)
2775 _pair(int i1, int i2)
2777 PyObject* pair;
2778 PyObject* item;
2780 pair = PyTuple_New(2);
2781 if (!pair)
2782 return NULL;
2784 item = PyInt_FromLong(i1);
2785 if (!item)
2786 goto error;
2787 PyTuple_SET_ITEM(pair, 0, item);
2789 item = PyInt_FromLong(i2);
2790 if (!item)
2791 goto error;
2792 PyTuple_SET_ITEM(pair, 1, item);
2794 return pair;
2796 error:
2797 Py_DECREF(pair);
2798 return NULL;
2801 static PyObject*
2802 match_span(MatchObject* self, PyObject* args)
2804 int index;
2806 PyObject* index_ = Py_False; /* zero */
2807 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2808 return NULL;
2810 index = match_getindex(self, index_);
2812 if (index < 0 || index >= self->groups) {
2813 PyErr_SetString(
2814 PyExc_IndexError,
2815 "no such group"
2817 return NULL;
2820 /* marks are -1 if group is undefined */
2821 return _pair(self->mark[index*2], self->mark[index*2+1]);
2824 static PyObject*
2825 match_regs(MatchObject* self)
2827 PyObject* regs;
2828 PyObject* item;
2829 int index;
2831 regs = PyTuple_New(self->groups);
2832 if (!regs)
2833 return NULL;
2835 for (index = 0; index < self->groups; index++) {
2836 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2837 if (!item) {
2838 Py_DECREF(regs);
2839 return NULL;
2841 PyTuple_SET_ITEM(regs, index, item);
2844 Py_INCREF(regs);
2845 self->regs = regs;
2847 return regs;
2850 static PyObject*
2851 match_copy(MatchObject* self, PyObject* args)
2853 #ifdef USE_BUILTIN_COPY
2854 MatchObject* copy;
2855 int slots, offset;
2857 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2858 return NULL;
2860 slots = 2 * (self->pattern->groups+1);
2862 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2863 if (!copy)
2864 return NULL;
2866 /* this value a constant, but any compiler should be able to
2867 figure that out all by itself */
2868 offset = offsetof(MatchObject, string);
2870 Py_XINCREF(self->pattern);
2871 Py_XINCREF(self->string);
2872 Py_XINCREF(self->regs);
2874 memcpy((char*) copy + offset, (char*) self + offset,
2875 sizeof(MatchObject) + slots * sizeof(int) - offset);
2877 return (PyObject*) copy;
2878 #else
2879 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
2880 return NULL;
2881 #endif
2884 static PyObject*
2885 match_deepcopy(MatchObject* self, PyObject* args)
2887 #ifdef USE_BUILTIN_COPY
2888 MatchObject* copy;
2890 PyObject* memo;
2891 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2892 return NULL;
2894 copy = (MatchObject*) match_copy(self, Py_None);
2895 if (!copy)
2896 return NULL;
2898 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2899 !deepcopy(&copy->string, memo) ||
2900 !deepcopy(&copy->regs, memo)) {
2901 Py_DECREF(copy);
2902 return NULL;
2905 #else
2906 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2907 return NULL;
2908 #endif
2911 static PyMethodDef match_methods[] = {
2912 {"group", (PyCFunction) match_group, METH_VARARGS},
2913 {"start", (PyCFunction) match_start, METH_VARARGS},
2914 {"end", (PyCFunction) match_end, METH_VARARGS},
2915 {"span", (PyCFunction) match_span, METH_VARARGS},
2916 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2917 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2918 {"expand", (PyCFunction) match_expand, METH_VARARGS},
2919 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2920 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
2921 {NULL, NULL}
2924 static PyObject*
2925 match_getattr(MatchObject* self, char* name)
2927 PyObject* res;
2929 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2930 if (res)
2931 return res;
2933 PyErr_Clear();
2935 if (!strcmp(name, "lastindex")) {
2936 if (self->lastindex >= 0)
2937 return Py_BuildValue("i", self->lastindex);
2938 Py_INCREF(Py_None);
2939 return Py_None;
2942 if (!strcmp(name, "lastgroup")) {
2943 if (self->pattern->indexgroup && self->lastindex >= 0) {
2944 PyObject* result = PySequence_GetItem(
2945 self->pattern->indexgroup, self->lastindex
2947 if (result)
2948 return result;
2949 PyErr_Clear();
2951 Py_INCREF(Py_None);
2952 return Py_None;
2955 if (!strcmp(name, "string")) {
2956 if (self->string) {
2957 Py_INCREF(self->string);
2958 return self->string;
2959 } else {
2960 Py_INCREF(Py_None);
2961 return Py_None;
2965 if (!strcmp(name, "regs")) {
2966 if (self->regs) {
2967 Py_INCREF(self->regs);
2968 return self->regs;
2969 } else
2970 return match_regs(self);
2973 if (!strcmp(name, "re")) {
2974 Py_INCREF(self->pattern);
2975 return (PyObject*) self->pattern;
2978 if (!strcmp(name, "pos"))
2979 return Py_BuildValue("i", self->pos);
2981 if (!strcmp(name, "endpos"))
2982 return Py_BuildValue("i", self->endpos);
2984 PyErr_SetString(PyExc_AttributeError, name);
2985 return NULL;
2988 /* FIXME: implement setattr("string", None) as a special case (to
2989 detach the associated string, if any */
2991 statichere PyTypeObject Match_Type = {
2992 PyObject_HEAD_INIT(NULL)
2993 0, "_" SRE_MODULE ".SRE_Match",
2994 sizeof(MatchObject), sizeof(int),
2995 (destructor)match_dealloc, /*tp_dealloc*/
2996 0, /*tp_print*/
2997 (getattrfunc)match_getattr /*tp_getattr*/
3000 /* -------------------------------------------------------------------- */
3001 /* scanner methods (experimental) */
3003 static void
3004 scanner_dealloc(ScannerObject* self)
3006 state_fini(&self->state);
3007 Py_DECREF(self->pattern);
3008 PyObject_DEL(self);
3011 static PyObject*
3012 scanner_match(ScannerObject* self, PyObject* args)
3014 SRE_STATE* state = &self->state;
3015 PyObject* match;
3016 int status;
3018 state_reset(state);
3020 state->ptr = state->start;
3022 if (state->charsize == 1) {
3023 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
3024 } else {
3025 #if defined(HAVE_UNICODE)
3026 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
3027 #endif
3030 match = pattern_new_match((PatternObject*) self->pattern,
3031 state, status);
3033 if ((status == 0 || state->ptr == state->start) &&
3034 state->ptr < state->end)
3035 state->start = (void*) ((char*) state->ptr + state->charsize);
3036 else
3037 state->start = state->ptr;
3039 return match;
3043 static PyObject*
3044 scanner_search(ScannerObject* self, PyObject* args)
3046 SRE_STATE* state = &self->state;
3047 PyObject* match;
3048 int status;
3050 state_reset(state);
3052 state->ptr = state->start;
3054 if (state->charsize == 1) {
3055 status = sre_search(state, PatternObject_GetCode(self->pattern));
3056 } else {
3057 #if defined(HAVE_UNICODE)
3058 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
3059 #endif
3062 match = pattern_new_match((PatternObject*) self->pattern,
3063 state, status);
3065 if ((status == 0 || state->ptr == state->start) &&
3066 state->ptr < state->end)
3067 state->start = (void*) ((char*) state->ptr + state->charsize);
3068 else
3069 state->start = state->ptr;
3071 return match;
3074 static PyMethodDef scanner_methods[] = {
3075 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
3076 /* METH_OLDARGS is not in Python 1.5.2 */
3077 {"match", (PyCFunction) scanner_match, 0},
3078 {"search", (PyCFunction) scanner_search, 0},
3079 {NULL, NULL}
3082 static PyObject*
3083 scanner_getattr(ScannerObject* self, char* name)
3085 PyObject* res;
3087 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
3088 if (res)
3089 return res;
3091 PyErr_Clear();
3093 /* attributes */
3094 if (!strcmp(name, "pattern")) {
3095 Py_INCREF(self->pattern);
3096 return self->pattern;
3099 PyErr_SetString(PyExc_AttributeError, name);
3100 return NULL;
3103 statichere PyTypeObject Scanner_Type = {
3104 PyObject_HEAD_INIT(NULL)
3105 0, "_" SRE_MODULE ".SRE_Scanner",
3106 sizeof(ScannerObject), 0,
3107 (destructor)scanner_dealloc, /*tp_dealloc*/
3108 0, /*tp_print*/
3109 (getattrfunc)scanner_getattr, /*tp_getattr*/
3112 static PyMethodDef _functions[] = {
3113 {"compile", _compile, METH_VARARGS},
3114 {"getcodesize", sre_codesize, METH_VARARGS},
3115 {"getlower", sre_getlower, METH_VARARGS},
3116 {NULL, NULL}
3119 #if PY_VERSION_HEX < 0x02030000
3120 DL_EXPORT(void) init_sre(void)
3121 #else
3122 PyMODINIT_FUNC init_sre(void)
3123 #endif
3125 PyObject* m;
3126 PyObject* d;
3127 PyObject* x;
3129 /* Patch object types */
3130 Pattern_Type.ob_type = Match_Type.ob_type =
3131 Scanner_Type.ob_type = &PyType_Type;
3133 m = Py_InitModule("_" SRE_MODULE, _functions);
3134 d = PyModule_GetDict(m);
3136 x = PyInt_FromLong(SRE_MAGIC);
3137 if (x) {
3138 PyDict_SetItemString(d, "MAGIC", x);
3139 Py_DECREF(x);
3142 x = PyInt_FromLong(sizeof(SRE_CODE));
3143 if (x) {
3144 PyDict_SetItemString(d, "CODESIZE", x);
3145 Py_DECREF(x);
3148 x = PyString_FromString(copyright);
3149 if (x) {
3150 PyDict_SetItemString(d, "copyright", x);
3151 Py_DECREF(x);
3155 #endif /* !defined(SRE_RECURSIVE) */
3157 /* vim:ts=4:sw=4:et