2 # Secret Labs' Regular Expression Engine
4 # various symbols used by the regular expression engine.
5 # run this script to update the _sre include files!
7 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
9 # See the sre.py file for information on usage and redistribution.
12 """Internal support module for sre"""
14 # update when constants are added or removed
18 # max code word in this release
22 # SRE standard exception (access as sre.error)
23 # should this really be here?
25 class error(Exception):
36 ASSERT_NOT
= "assert_not"
38 BIGCHARSET
= "bigcharset"
44 GROUPREF_IGNORE
= "groupref_ignore"
46 IN_IGNORE
= "in_ignore"
50 LITERAL_IGNORE
= "literal_ignore"
52 MAX_REPEAT
= "max_repeat"
53 MAX_UNTIL
= "max_until"
54 MIN_REPEAT
= "min_repeat"
55 MIN_UNTIL
= "min_until"
57 NOT_LITERAL
= "not_literal"
58 NOT_LITERAL_IGNORE
= "not_literal_ignore"
61 REPEAT_ONE
= "repeat_one"
62 SUBPATTERN
= "subpattern"
65 AT_BEGINNING
= "at_beginning"
66 AT_BEGINNING_LINE
= "at_beginning_line"
67 AT_BEGINNING_STRING
= "at_beginning_string"
68 AT_BOUNDARY
= "at_boundary"
69 AT_NON_BOUNDARY
= "at_non_boundary"
71 AT_END_LINE
= "at_end_line"
72 AT_END_STRING
= "at_end_string"
73 AT_LOC_BOUNDARY
= "at_loc_boundary"
74 AT_LOC_NON_BOUNDARY
= "at_loc_non_boundary"
75 AT_UNI_BOUNDARY
= "at_uni_boundary"
76 AT_UNI_NON_BOUNDARY
= "at_uni_non_boundary"
79 CATEGORY_DIGIT
= "category_digit"
80 CATEGORY_NOT_DIGIT
= "category_not_digit"
81 CATEGORY_SPACE
= "category_space"
82 CATEGORY_NOT_SPACE
= "category_not_space"
83 CATEGORY_WORD
= "category_word"
84 CATEGORY_NOT_WORD
= "category_not_word"
85 CATEGORY_LINEBREAK
= "category_linebreak"
86 CATEGORY_NOT_LINEBREAK
= "category_not_linebreak"
87 CATEGORY_LOC_WORD
= "category_loc_word"
88 CATEGORY_LOC_NOT_WORD
= "category_loc_not_word"
89 CATEGORY_UNI_DIGIT
= "category_uni_digit"
90 CATEGORY_UNI_NOT_DIGIT
= "category_uni_not_digit"
91 CATEGORY_UNI_SPACE
= "category_uni_space"
92 CATEGORY_UNI_NOT_SPACE
= "category_uni_not_space"
93 CATEGORY_UNI_WORD
= "category_uni_word"
94 CATEGORY_UNI_NOT_WORD
= "category_uni_not_word"
95 CATEGORY_UNI_LINEBREAK
= "category_uni_linebreak"
96 CATEGORY_UNI_NOT_LINEBREAK
= "category_uni_not_linebreak"
100 # failure=0 success=1 (just because it looks better that way :-)
110 GROUPREF
, GROUPREF_IGNORE
,
114 LITERAL
, LITERAL_IGNORE
,
118 NOT_LITERAL
, NOT_LITERAL_IGNORE
,
128 AT_BEGINNING
, AT_BEGINNING_LINE
, AT_BEGINNING_STRING
, AT_BOUNDARY
,
129 AT_NON_BOUNDARY
, AT_END
, AT_END_LINE
, AT_END_STRING
,
130 AT_LOC_BOUNDARY
, AT_LOC_NON_BOUNDARY
, AT_UNI_BOUNDARY
,
135 CATEGORY_DIGIT
, CATEGORY_NOT_DIGIT
, CATEGORY_SPACE
,
136 CATEGORY_NOT_SPACE
, CATEGORY_WORD
, CATEGORY_NOT_WORD
,
137 CATEGORY_LINEBREAK
, CATEGORY_NOT_LINEBREAK
, CATEGORY_LOC_WORD
,
138 CATEGORY_LOC_NOT_WORD
, CATEGORY_UNI_DIGIT
, CATEGORY_UNI_NOT_DIGIT
,
139 CATEGORY_UNI_SPACE
, CATEGORY_UNI_NOT_SPACE
, CATEGORY_UNI_WORD
,
140 CATEGORY_UNI_NOT_WORD
, CATEGORY_UNI_LINEBREAK
,
141 CATEGORY_UNI_NOT_LINEBREAK
152 OPCODES
= makedict(OPCODES
)
153 ATCODES
= makedict(ATCODES
)
154 CHCODES
= makedict(CHCODES
)
156 # replacement operations for "ignore case" mode
158 GROUPREF
: GROUPREF_IGNORE
,
160 LITERAL
: LITERAL_IGNORE
,
161 NOT_LITERAL
: NOT_LITERAL_IGNORE
165 AT_BEGINNING
: AT_BEGINNING_LINE
,
170 AT_BOUNDARY
: AT_LOC_BOUNDARY
,
171 AT_NON_BOUNDARY
: AT_LOC_NON_BOUNDARY
175 AT_BOUNDARY
: AT_UNI_BOUNDARY
,
176 AT_NON_BOUNDARY
: AT_UNI_NON_BOUNDARY
180 CATEGORY_DIGIT
: CATEGORY_DIGIT
,
181 CATEGORY_NOT_DIGIT
: CATEGORY_NOT_DIGIT
,
182 CATEGORY_SPACE
: CATEGORY_SPACE
,
183 CATEGORY_NOT_SPACE
: CATEGORY_NOT_SPACE
,
184 CATEGORY_WORD
: CATEGORY_LOC_WORD
,
185 CATEGORY_NOT_WORD
: CATEGORY_LOC_NOT_WORD
,
186 CATEGORY_LINEBREAK
: CATEGORY_LINEBREAK
,
187 CATEGORY_NOT_LINEBREAK
: CATEGORY_NOT_LINEBREAK
191 CATEGORY_DIGIT
: CATEGORY_UNI_DIGIT
,
192 CATEGORY_NOT_DIGIT
: CATEGORY_UNI_NOT_DIGIT
,
193 CATEGORY_SPACE
: CATEGORY_UNI_SPACE
,
194 CATEGORY_NOT_SPACE
: CATEGORY_UNI_NOT_SPACE
,
195 CATEGORY_WORD
: CATEGORY_UNI_WORD
,
196 CATEGORY_NOT_WORD
: CATEGORY_UNI_NOT_WORD
,
197 CATEGORY_LINEBREAK
: CATEGORY_UNI_LINEBREAK
,
198 CATEGORY_NOT_LINEBREAK
: CATEGORY_UNI_NOT_LINEBREAK
202 SRE_FLAG_TEMPLATE
= 1 # template mode (disable backtracking)
203 SRE_FLAG_IGNORECASE
= 2 # case insensitive
204 SRE_FLAG_LOCALE
= 4 # honour system locale
205 SRE_FLAG_MULTILINE
= 8 # treat target as multiline string
206 SRE_FLAG_DOTALL
= 16 # treat target as a single string
207 SRE_FLAG_UNICODE
= 32 # use unicode locale
208 SRE_FLAG_VERBOSE
= 64 # ignore whitespace and comments
209 SRE_FLAG_DEBUG
= 128 # debugging
211 # flags for INFO primitive
212 SRE_INFO_PREFIX
= 1 # has prefix
213 SRE_INFO_LITERAL
= 2 # entire pattern is literal (given by prefix)
214 SRE_INFO_CHARSET
= 4 # pattern starts with character from given set
216 if __name__
== "__main__":
218 def dump(f
, d
, prefix
):
220 items
.sort(lambda a
, b
: cmp(a
[1], b
[1]))
222 f
.write("#define %s_%s %s\n" % (prefix
, string
.upper(k
), v
))
223 f
= open("sre_constants.h", "w")
226 * Secret Labs' Regular Expression Engine
228 * regular expression matching engine
230 * NOTE: This file is generated by sre_constants.py. If you need
231 * to change anything in here, edit sre_constants.py and run it.
233 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
235 * See the _sre.c file for information on usage and redistribution.
240 f
.write("#define SRE_MAGIC %d\n" % MAGIC
)
242 dump(f
, OPCODES
, "SRE_OP")
243 dump(f
, ATCODES
, "SRE")
244 dump(f
, CHCODES
, "SRE")
246 f
.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE
)
247 f
.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE
)
248 f
.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE
)
249 f
.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE
)
250 f
.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL
)
251 f
.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE
)
252 f
.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE
)
254 f
.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX
)
255 f
.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL
)
256 f
.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET
)