2 A simple XPath-like language for tree traversal.
4 This works by creating a filter chain of generator functions. Each
5 function selects a part of the expression, e.g. a child node, a
6 specific descendant or a node that holds an attribute.
12 path_tokenizer
= re
.compile(
19 "([^/\[\]\(\)@=\s]+)|"
23 def iterchildren(node
, attr_name
):
24 # returns an iterable of all child nodes of that name
25 child
= getattr(node
, attr_name
)
27 if type(child
) is list:
34 def _get_first_or_none(it
):
38 except AttributeError:
46 return node
.__class
__.__name
__.split('.')[-1]
48 def parse_func(next
, token
):
52 raise ValueError("Expected '(' after function name '%s'" % name
)
53 predicate
= handle_predicate(next
, token
)
54 return name
, predicate
56 def handle_func_not(next
, token
):
60 name
, predicate
= parse_func(next
, token
)
64 if _get_first_or_none(predicate([node
])) is None:
68 def handle_name(next
, token
):
76 return functions
[name
](next
, token
)
79 for attr_name
in node
.child_attrs
:
80 for child
in iterchildren(node
, attr_name
):
81 if type_name(child
) == name
:
85 def handle_star(next
, token
):
91 for name
in node
.child_attrs
:
92 for child
in iterchildren(node
, name
):
96 def handle_dot(next
, token
):
104 def handle_descendants(next
, token
):
110 def iter_recursive(node
):
111 for name
in node
.child_attrs
:
112 for child
in iterchildren(node
, name
):
114 for c
in iter_recursive(child
):
118 def iter_recursive(node
):
119 for name
in node
.child_attrs
:
120 for child
in iterchildren(node
, name
):
121 if type_name(child
) == node_name
:
123 for c
in iter_recursive(child
):
126 raise ValueError("Expected node name after '//'")
130 for child
in iter_recursive(node
):
135 def handle_attribute(next
, token
):
138 raise ValueError("Expected attribute name")
143 except StopIteration:
147 value
= parse_path_value(next
)
148 if sys
.version_info
>= (2,6) or (sys
.version_info
>= (2,4) and '.' not in name
):
150 readattr
= operator
.attrgetter(name
)
152 name_path
= name
.split('.')
155 for attr
in name_path
:
156 attr_value
= getattr(attr_value
, attr
)
162 attr_value
= readattr(node
)
163 except AttributeError:
165 if attr_value
is not None:
171 attr_value
= readattr(node
)
172 except AttributeError:
174 if attr_value
== value
:
178 def parse_path_value(next
):
182 if value
[:1] == "'" or value
[:1] == '"':
189 name
= token
[1].lower()
192 elif name
== 'false':
194 raise ValueError("Invalid attribute predicate: '%s'" % value
)
196 def handle_predicate(next
, token
):
199 while token
[0] != ']':
200 selector
.append( operations
[token
[0]](next
, token
) )
203 except StopIteration:
209 if not token
[0] and token
[1] == 'and':
210 return logical_and(selector
, handle_predicate(next
, token
))
214 subresult
= iter((node
,))
215 for select
in selector
:
216 subresult
= select(subresult
)
217 predicate_result
= _get_first_or_none(subresult
)
218 if predicate_result
is not None:
222 def logical_and(lhs_selects
, rhs_select
):
225 subresult
= iter((node
,))
226 for select
in lhs_selects
:
227 subresult
= select(subresult
)
228 predicate_result
= _get_first_or_none(subresult
)
229 subresult
= iter((node
,))
230 if predicate_result
is not None:
231 for result_node
in rhs_select(subresult
):
237 "@": handle_attribute
,
241 "//": handle_descendants
,
242 "[": handle_predicate
,
246 'not' : handle_func_not
249 def _build_path_iterator(path
):
251 stream
= iter([ (special
,text
)
252 for (special
,text
) in path_tokenizer(path
)
253 if special
or text
])
256 except AttributeError:
264 selector
.append(operations
[token
[0]](_next
, token
))
265 except StopIteration:
266 raise ValueError("invalid path")
271 except StopIteration:
277 def iterfind(node
, path
):
278 selector_chain
= _build_path_iterator(path
)
279 result
= iter((node
,))
280 for select
in selector_chain
:
281 result
= select(result
)
284 def find_first(node
, path
):
285 return _get_first_or_none(iterfind(node
, path
))
287 def find_all(node
, path
):
288 return list(iterfind(node
, path
))