2 Functions for reversing a regular expression (used in reverse URL resolving).
3 Used internally by Django and not intended for external use.
5 This is not, and is not intended to be, a complete reg-exp decompiler. It
6 should be good enough for a large class of URLS, however.
9 # Mapping of an escape character to a representative of that class. So, e.g.,
10 # "\w" is replaced by "x" in a reverse URL. A value of None means to ignore
11 # this sequence. Any missing key is mapped to itself.
27 Used to represent multiple possibilities at this point in a pattern string.
28 We use a distinguished type, rather than a list, so that the usage in the
34 Used to represent a capturing group in the pattern string.
37 class NonCapture(list):
39 Used to represent a non-capturing group in the pattern string.
42 def normalize(pattern
):
44 Given a reg-exp pattern, normalizes it to a list of forms that suffice for
45 reverse matching. This does the following:
47 (1) For any repeating sections, keeps the minimum number of occurrences
48 permitted (this means zero for optional groups).
49 (2) If an optional group includes parameters, include one occurrence of
50 that group (along with the zero occurrence case from step (1)).
51 (3) Select the first (essentially an arbitrary) element from any character
52 class. Select an arbitrary character for any unordered class (e.g. '.'
53 or '\w') in the pattern.
54 (5) Ignore comments and any of the reg-exp flags that won't change
55 what we construct ("iLmsu"). "(?x)" is an error, however.
56 (6) Raise an error on all other non-capturing (?...) forms (e.g.
57 look-ahead and look-behind matches) and any disjunctive ('|')
60 Django's URLs for forward resolving are either all positional arguments or
61 all keyword arguments. That is assumed here, as well. Although reverse
62 resolving can be done using positional args when keyword args are
63 specified, the two cannot be mixed in the same reverse() call.
65 # Do a linear scan to work out the special features of this pattern. The
66 # idea is that we scan once here and collect all the information we need to
67 # make future decisions.
69 non_capturing_groups
= []
71 pattern_iter
= next_char(iter(pattern
))
74 # A "while" loop is used here because later on we need to be able to peek
75 # at the next character and possibly go around without consuming another
76 # one at the top of the loop.
78 ch
, escaped
= pattern_iter
.next()
80 return zip([u
''], [[]])
87 # Replace "any character" with an arbitrary representative.
90 # FIXME: One day we'll should do this, but not in 1.0.
91 raise NotImplementedError
97 # This can only be the end of a non-capturing group, since all
98 # other unescaped parentheses are handled by the grouping
99 # section later (and the full group is handled there).
101 # We regroup everything inside the capturing group so that it
102 # can be quantified, if necessary.
103 start
= non_capturing_groups
.pop()
104 inner
= NonCapture(result
[start
:])
105 result
= result
[:start
] + [inner
]
107 # Replace ranges with the first character in the range.
108 ch
, escaped
= pattern_iter
.next()
110 ch
, escaped
= pattern_iter
.next()
111 while escaped
or ch
!= ']':
112 ch
, escaped
= pattern_iter
.next()
114 # Some kind of group.
115 ch
, escaped
= pattern_iter
.next()
116 if ch
!= '?' or escaped
:
118 name
= "_%d" % num_args
120 result
.append(Group(((u
"%%(%s)s" % name
), name
)))
121 walk_to_end(ch
, pattern_iter
)
123 ch
, escaped
= pattern_iter
.next()
125 # All of these are ignorable. Walk to the end of the
127 walk_to_end(ch
, pattern_iter
)
129 # Non-capturing group
130 non_capturing_groups
.append(len(result
))
132 # Anything else, other than a named group, is something
134 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch
)
136 ch
, escaped
= pattern_iter
.next()
138 raise ValueError("Non-reversible reg-exp portion: '(?P%s'" % ch
)
139 # We are in a named capturing group. Extra the name and
140 # then skip to the end.
142 ch
, escaped
= pattern_iter
.next()
145 ch
, escaped
= pattern_iter
.next()
146 param
= ''.join(name
)
147 result
.append(Group(((u
"%%(%s)s" % param
), param
)))
148 walk_to_end(ch
, pattern_iter
)
150 # Quanitifers affect the previous item in the result list.
151 count
, ch
= get_quantifier(ch
, pattern_iter
)
153 # We had to look ahead, but it wasn't need to compute the
154 # quanitifer, so use this character next time around the
159 if contains(result
[-1], Group
):
160 # If we are quantifying a capturing group (or
161 # something containing such a group) and the minimum is
162 # zero, we must also handle the case of one occurrence
163 # being present. All the quantifiers (except {0,0},
164 # which we conveniently ignore) that have a 0 minimum
165 # also allow a single occurrence.
166 result
[-1] = Choice([None, result
[-1]])
170 result
.extend([result
[-1]] * (count
- 1))
172 # Anything else is a literal.
176 ch
, escaped
= pattern_iter
.next()
179 except StopIteration:
181 except NotImplementedError:
182 # A case of using the disjunctive form. No results for you!
183 return zip([u
''], [[]])
185 return zip(*flatten_result(result
))
187 def next_char(input_iter
):
189 An iterator that yields the next character from "pattern_iter", respecting
190 escape sequences. An escaped character is replaced by a representative of
191 its class (e.g. \w -> "x"). If the escaped character is one that is
192 skipped, it is not returned (the next character is returned instead).
194 Yields the next character, along with a boolean indicating whether it is a
195 raw (unescaped) character or not.
197 for ch
in input_iter
:
201 ch
= input_iter
.next()
202 representative
= ESCAPE_MAPPINGS
.get(ch
, ch
)
203 if representative
is None:
205 yield representative
, True
207 def walk_to_end(ch
, input_iter
):
209 The iterator is currently inside a capturing group. We want to walk to the
210 close of this group, skipping over any nested groups and handling escaped
211 parentheses correctly.
217 for ch
, escaped
in input_iter
:
227 def get_quantifier(ch
, input_iter
):
229 Parse a quantifier from the input, where "ch" is the first character in the
232 Returns the minimum number of occurences permitted by the quantifier and
233 either None or the next character from the input_iter if the next character
234 is not part of the quantifier.
238 ch2
, escaped
= input_iter
.next()
239 except StopIteration:
249 ch
, escaped
= input_iter
.next()
252 values
= ''.join(quant
).split(',')
254 # Consume the trailing '?', if necessary.
256 ch
, escaped
= input_iter
.next()
257 except StopIteration:
261 return int(values
[0]), ch
263 def contains(source
, inst
):
265 Returns True if the "source" contains an instance of "inst". False,
268 if isinstance(source
, inst
):
270 if isinstance(source
, NonCapture
):
272 if contains(elt
, inst
):
276 def flatten_result(source
):
278 Turns the given source sequence into a list of reg-exp possibilities and
279 their arguments. Returns a list of strings and a list of argument lists.
280 Each of the two lists will be of the same length.
284 if isinstance(source
, Group
):
285 if source
[1] is None:
289 return [source
[0]], [params
]
293 for pos
, elt
in enumerate(source
):
294 if isinstance(elt
, basestring
):
296 piece
= u
''.join(source
[last
:pos
])
297 if isinstance(elt
, Group
):
303 for i
in range(len(result
)):
306 result_args
[i
].append(param
)
307 if isinstance(elt
, (Choice
, NonCapture
)):
308 if isinstance(elt
, NonCapture
):
310 inner_result
, inner_args
= [], []
312 res
, args
= flatten_result(item
)
313 inner_result
.extend(res
)
314 inner_args
.extend(args
)
317 for item
, args
in zip(result
, result_args
):
318 for i_item
, i_args
in zip(inner_result
, inner_args
):
319 new_result
.append(item
+ i_item
)
320 new_args
.append(args
[:] + i_args
)
322 result_args
= new_args
324 piece
= u
''.join(source
[last
:])
325 for i
in range(len(result
)):
327 return result
, result_args