Fix user_self calling editGet with a wrong parameter
[Melange.git] / app / django / utils / regex_helper.py
blobb11fe960bb483fad134c2dba867c71a8da969cb3
1 """
2 Functions for reversing a regular expression (used in reverse URL resolving).
3 Used internally by Django and not intended for external use.
5 This is not, and is not intended to be, a complete reg-exp decompiler. It
6 should be good enough for a large class of URLS, however.
7 """
9 # Mapping of an escape character to a representative of that class. So, e.g.,
10 # "\w" is replaced by "x" in a reverse URL. A value of None means to ignore
11 # this sequence. Any missing key is mapped to itself.
12 ESCAPE_MAPPINGS = {
13 "A": None,
14 "b": None,
15 "B": None,
16 "d": u"0",
17 "D": u"x",
18 "s": u" ",
19 "S": u"x",
20 "w": u"x",
21 "W": u"!",
22 "Z": None,
25 class Choice(list):
26 """
27 Used to represent multiple possibilities at this point in a pattern string.
28 We use a distinguished type, rather than a list, so that the usage in the
29 code is clear.
30 """
32 class Group(list):
33 """
34 Used to represent a capturing group in the pattern string.
35 """
37 class NonCapture(list):
38 """
39 Used to represent a non-capturing group in the pattern string.
40 """
42 def normalize(pattern):
43 """
44 Given a reg-exp pattern, normalizes it to a list of forms that suffice for
45 reverse matching. This does the following:
47 (1) For any repeating sections, keeps the minimum number of occurrences
48 permitted (this means zero for optional groups).
49 (2) If an optional group includes parameters, include one occurrence of
50 that group (along with the zero occurrence case from step (1)).
51 (3) Select the first (essentially an arbitrary) element from any character
52 class. Select an arbitrary character for any unordered class (e.g. '.'
53 or '\w') in the pattern.
54 (5) Ignore comments and any of the reg-exp flags that won't change
55 what we construct ("iLmsu"). "(?x)" is an error, however.
56 (6) Raise an error on all other non-capturing (?...) forms (e.g.
57 look-ahead and look-behind matches) and any disjunctive ('|')
58 constructs.
60 Django's URLs for forward resolving are either all positional arguments or
61 all keyword arguments. That is assumed here, as well. Although reverse
62 resolving can be done using positional args when keyword args are
63 specified, the two cannot be mixed in the same reverse() call.
64 """
65 # Do a linear scan to work out the special features of this pattern. The
66 # idea is that we scan once here and collect all the information we need to
67 # make future decisions.
68 result = []
69 non_capturing_groups = []
70 consume_next = True
71 pattern_iter = next_char(iter(pattern))
72 num_args = 0
74 # A "while" loop is used here because later on we need to be able to peek
75 # at the next character and possibly go around without consuming another
76 # one at the top of the loop.
77 try:
78 ch, escaped = pattern_iter.next()
79 except StopIteration:
80 return zip([u''], [[]])
82 try:
83 while True:
84 if escaped:
85 result.append(ch)
86 elif ch == '.':
87 # Replace "any character" with an arbitrary representative.
88 result.append(u".")
89 elif ch == '|':
90 # FIXME: One day we'll should do this, but not in 1.0.
91 raise NotImplementedError
92 elif ch == "^":
93 pass
94 elif ch == '$':
95 break
96 elif ch == ')':
97 # This can only be the end of a non-capturing group, since all
98 # other unescaped parentheses are handled by the grouping
99 # section later (and the full group is handled there).
101 # We regroup everything inside the capturing group so that it
102 # can be quantified, if necessary.
103 start = non_capturing_groups.pop()
104 inner = NonCapture(result[start:])
105 result = result[:start] + [inner]
106 elif ch == '[':
107 # Replace ranges with the first character in the range.
108 ch, escaped = pattern_iter.next()
109 result.append(ch)
110 ch, escaped = pattern_iter.next()
111 while escaped or ch != ']':
112 ch, escaped = pattern_iter.next()
113 elif ch == '(':
114 # Some kind of group.
115 ch, escaped = pattern_iter.next()
116 if ch != '?' or escaped:
117 # A positional group
118 name = "_%d" % num_args
119 num_args += 1
120 result.append(Group(((u"%%(%s)s" % name), name)))
121 walk_to_end(ch, pattern_iter)
122 else:
123 ch, escaped = pattern_iter.next()
124 if ch in "iLmsu#":
125 # All of these are ignorable. Walk to the end of the
126 # group.
127 walk_to_end(ch, pattern_iter)
128 elif ch == ':':
129 # Non-capturing group
130 non_capturing_groups.append(len(result))
131 elif ch != 'P':
132 # Anything else, other than a named group, is something
133 # we cannot reverse.
134 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch)
135 else:
136 ch, escaped = pattern_iter.next()
137 if ch != '<':
138 raise ValueError("Non-reversible reg-exp portion: '(?P%s'" % ch)
139 # We are in a named capturing group. Extra the name and
140 # then skip to the end.
141 name = []
142 ch, escaped = pattern_iter.next()
143 while ch != '>':
144 name.append(ch)
145 ch, escaped = pattern_iter.next()
146 param = ''.join(name)
147 result.append(Group(((u"%%(%s)s" % param), param)))
148 walk_to_end(ch, pattern_iter)
149 elif ch in "*?+{":
150 # Quanitifers affect the previous item in the result list.
151 count, ch = get_quantifier(ch, pattern_iter)
152 if ch:
153 # We had to look ahead, but it wasn't need to compute the
154 # quanitifer, so use this character next time around the
155 # main loop.
156 consume_next = False
158 if count == 0:
159 if contains(result[-1], Group):
160 # If we are quantifying a capturing group (or
161 # something containing such a group) and the minimum is
162 # zero, we must also handle the case of one occurrence
163 # being present. All the quantifiers (except {0,0},
164 # which we conveniently ignore) that have a 0 minimum
165 # also allow a single occurrence.
166 result[-1] = Choice([None, result[-1]])
167 else:
168 result.pop()
169 elif count > 1:
170 result.extend([result[-1]] * (count - 1))
171 else:
172 # Anything else is a literal.
173 result.append(ch)
175 if consume_next:
176 ch, escaped = pattern_iter.next()
177 else:
178 consume_next = True
179 except StopIteration:
180 pass
181 except NotImplementedError:
182 # A case of using the disjunctive form. No results for you!
183 return zip([u''], [[]])
185 return zip(*flatten_result(result))
187 def next_char(input_iter):
189 An iterator that yields the next character from "pattern_iter", respecting
190 escape sequences. An escaped character is replaced by a representative of
191 its class (e.g. \w -> "x"). If the escaped character is one that is
192 skipped, it is not returned (the next character is returned instead).
194 Yields the next character, along with a boolean indicating whether it is a
195 raw (unescaped) character or not.
197 for ch in input_iter:
198 if ch != '\\':
199 yield ch, False
200 continue
201 ch = input_iter.next()
202 representative = ESCAPE_MAPPINGS.get(ch, ch)
203 if representative is None:
204 continue
205 yield representative, True
207 def walk_to_end(ch, input_iter):
209 The iterator is currently inside a capturing group. We want to walk to the
210 close of this group, skipping over any nested groups and handling escaped
211 parentheses correctly.
213 if ch == '(':
214 nesting = 1
215 else:
216 nesting = 0
217 for ch, escaped in input_iter:
218 if escaped:
219 continue
220 elif ch == '(':
221 nesting += 1
222 elif ch == ')':
223 if not nesting:
224 return
225 nesting -= 1
227 def get_quantifier(ch, input_iter):
229 Parse a quantifier from the input, where "ch" is the first character in the
230 quantifier.
232 Returns the minimum number of occurences permitted by the quantifier and
233 either None or the next character from the input_iter if the next character
234 is not part of the quantifier.
236 if ch in '*?+':
237 try:
238 ch2, escaped = input_iter.next()
239 except StopIteration:
240 ch2 = None
241 if ch2 == '?':
242 ch2 = None
243 if ch == '+':
244 return 1, ch2
245 return 0, ch2
247 quant = []
248 while ch != '}':
249 ch, escaped = input_iter.next()
250 quant.append(ch)
251 quant = quant[:-1]
252 values = ''.join(quant).split(',')
254 # Consume the trailing '?', if necessary.
255 try:
256 ch, escaped = input_iter.next()
257 except StopIteration:
258 ch = None
259 if ch == '?':
260 ch = None
261 return int(values[0]), ch
263 def contains(source, inst):
265 Returns True if the "source" contains an instance of "inst". False,
266 otherwise.
268 if isinstance(source, inst):
269 return True
270 if isinstance(source, NonCapture):
271 for elt in source:
272 if contains(elt, inst):
273 return True
274 return False
276 def flatten_result(source):
278 Turns the given source sequence into a list of reg-exp possibilities and
279 their arguments. Returns a list of strings and a list of argument lists.
280 Each of the two lists will be of the same length.
282 if source is None:
283 return [u''], [[]]
284 if isinstance(source, Group):
285 if source[1] is None:
286 params = []
287 else:
288 params = [source[1]]
289 return [source[0]], [params]
290 result = [u'']
291 result_args = [[]]
292 pos = last = 0
293 for pos, elt in enumerate(source):
294 if isinstance(elt, basestring):
295 continue
296 piece = u''.join(source[last:pos])
297 if isinstance(elt, Group):
298 piece += elt[0]
299 param = elt[1]
300 else:
301 param = None
302 last = pos + 1
303 for i in range(len(result)):
304 result[i] += piece
305 if param:
306 result_args[i].append(param)
307 if isinstance(elt, (Choice, NonCapture)):
308 if isinstance(elt, NonCapture):
309 elt = [elt]
310 inner_result, inner_args = [], []
311 for item in elt:
312 res, args = flatten_result(item)
313 inner_result.extend(res)
314 inner_args.extend(args)
315 new_result = []
316 new_args = []
317 for item, args in zip(result, result_args):
318 for i_item, i_args in zip(inner_result, inner_args):
319 new_result.append(item + i_item)
320 new_args.append(args[:] + i_args)
321 result = new_result
322 result_args = new_args
323 if pos >= last:
324 piece = u''.join(source[last:])
325 for i in range(len(result)):
326 result[i] += piece
327 return result, result_args