8 our @EXPORT_OK = qw(iq_reply error_tag iq_error feature
9 has_subtags no_match strip_resource
10 same_name normalize_name get_attr
11 extract multi_extract save save_match save_sub
12 spec_is_optional take text_extractor
13 extract_disco_items extract_disco_info
15 our %EXPORT_TAGS = (all
=> \
@EXPORT_OK);
18 use Thrasher
::Constants
qw(:all);
19 use Thrasher
::Log
qw(:all);
23 my %namespaces = (stream
=> 'http://etherx.jabber.org/streams',
24 '' => 'jabber:client', # 'default" namespace
31 Thrasher::XML - a collection of functions for manipulating XML
35 =head1 CONVENIENCE FUNCTIONS
37 Several convenience functions for working with namespaced tag names
44 C<same_name>($l, $r): Given two names, returns true if they are
45 the same name, false otherwise. This function calls C<normalize_name>
46 on both arguments for you.
50 # Compare two names, as represented in this code, to see if they are
51 # the same. If the first element exists in %namespaces, the full
52 # namespace will be substituted. If the value is a string, it will
53 # be in the default namespace, which for streams is 'jabber:client'.
58 $left = normalize_name
($left);
59 $right = normalize_name
($right);
61 return $left->[0] eq $right->[0] && $left->[1] eq $right->[1];
68 C<normalize_name>($name): Normalizes the given name according to the
75 If the name is a "Clark-style" URI+name {uri}name, breaks out
76 the uri and the name separately.
80 If the name is a simple string, the name is returned as being in
81 the default namespace ("jabber:client").
85 If the name is a two-element array, and the namespace has an entry
86 in the internal %namespaces hash, the entry in the hash will
87 replace the namespaces. This is why you can so ofter refer to
88 ['stream', 'stream'] instead of ['http://etherx.jabber.org/streams',
93 If the name is a two-element array that didn't meet the prior
94 condition, the array is returned.
98 If the name makes it this far, the function dies with a descriptive
99 error message showing what you passed in.
103 You shouldn't really need to call this function, everything in this
104 module tends to call it at every opportunity.
106 Also note that this is only legit because the shortcuts are
107 hard-coded, not pulled from the XML itself. Using actual
108 prefixes in the XML would be wrong.
116 if ($name =~ /^\{([^}]+)\}(.*)$/) {
119 return [$namespaces{''}, $name];
122 if (ref($name) eq 'ARRAY') {
123 my ($namespace, $tag) = @
$name;
124 if (exists($namespaces{$namespace})) {
125 $namespace = $namespaces{$namespace};
127 return [$namespace, $tag];
130 confess
"Invalid parameter passed to normalize_name:\n"
138 C<get_attr>($attr_name, $attrs_hash): Get the value of an attribute
139 from an attribute list, using the correct attribute name. $attr_name
140 is passed through normalize name.
148 $name = normalize_name
($name);
149 $name = '{' . $name->[0] . '}' . $name->[1];
150 return $atts->{$name};
155 C<extract>($xml_pattern, $xml): Extracts values from processed XML
156 into a hash for your consumption, or dies if the xml_pattern doesn't
157 match, using a pattern matching syntax.
159 The XML pattern is a structure that looks like the structure
160 we use for XML, and matches in the following way:
164 =item * If a field is undef, the field must exist, but may
167 =item * If the field is a placeholder, the field must exist,
170 =item * If the field is a constant value, it must exactly match
171 the given constant value.
173 =item * If the field is a variable specification, as described below,
174 the field must match the match specification passed in as a parameter
177 =item * Fields not defined in the match clause at all are ignored.
181 As a special case, for children specifications, the match will
182 succeed if I<any> of the children match, and for placeholders
183 with conditions, any matching children will be extracted.
185 If the match is successful, a hashref will be returned with the
186 result of all the placeholders, according to the names given.
187 If you had no placeholders (pure matching), then the hashref
188 will be empty. Otherwise, the placeholders will receive whatever
190 This is a powerful method, but I'm going to wave at the other code
191 and say to look at the examples for usage; if you need more docs,
194 As a special exeption, if the $xml_pattern is undef, the function
195 simply returns $xml, implementing a "match everything" clause
196 that is useful for error cases.
198 The variables and placeholders that you can use:
202 =item C<save>($name): The basic placeholder that will
203 simply return the contents with no further processing.
205 =item C<save_match>([$name,] $match): Recursively calls
206 match. If this fails (returns undef), the entire match
207 fails; if it succeeds, the values in the matched hash
208 are merged into the final returned hash.
210 If used in a children match, this will return only the
211 first such matching result, useful for extracting subelements.
213 If no name is given, it will only be used to match, it
214 won't save the match specifically. (This can be used to
215 recursively match and save inner elements.)
217 =item C<save_sub>($name, $sub): Calls the sub with
218 the result, returns the result of the I<sub> as the result
221 If used in the children match spec, where this will collect
222 all sub runs that don't return "undef" into the resulting
223 arrayref, and fail if no children match the spec.
225 =item C<take>($name): This requires that you pass in a
226 value for taking to the "extract" hash, which will be
227 treated exactly as if it were in the original specification,
228 right down to allowing you to return values. This allows
229 us to factor out constant or repeating patterns and avoid
230 constructing and garbage collecting them over and over.
237 my $xml_pattern = shift;
239 my $provided_vars = shift || {};
241 # undefined $xml_pattern matches everything
242 if (!defined($xml_pattern)) {
250 if (!defined($xml_pattern)) {
256 my ($pat_element, $pat_atts, $pats_children) = @
$xml_pattern;
257 my ($element, $atts, $children) = @
$xml;
259 my $string_equal = sub { $_[0] eq $_[1] };
261 my $extract_fail_reason = '';
266 my $match_spec = shift;
267 my $match_target = shift;
268 my $comparator = shift || $string_equal;
270 if (!defined($match_spec)) {
271 return $match_target;
272 } elsif (ref($match_spec) eq
273 'Thrasher::XMPPStreamIn::Placeholder') {
274 $final_hash->{$match_spec->{name
}} = $match_target;
275 return $match_target || $match_spec->{optional
};
276 } elsif (ref($match_spec) eq
277 'Thrasher::XMPPStreamIn::Variable') {
278 return $compare->($provided_vars->{$$match_spec},
281 } elsif(ref($match_spec) eq
282 'Thrasher::XMPPStreamIn::Placeholder::Match') {
283 my $match = $match_spec->{match
};
284 my $result = extract
($match, $match_target, $provided_vars);
286 if (!defined($result) && !$result->{optional
}) {
287 $extract_fail_reason = "Matching XML against a string.";
290 # failure automatically propogates as a die.
291 while (my ($key, $value) = each %$result) {
292 $final_hash->{$key} = $value;
294 return $match_target;
296 } elsif (ref($match_spec) eq
297 'Thrasher::XMPPStreamIn::Placeholder::Sub') {
298 my $name = $match_spec->{name
};
299 my $subref = $match_spec->{subref
};
300 my $result = $subref->($match_target);
301 if (!defined($result)) {
302 $extract_fail_reason = 'Subroutine returned an undef.';
305 if (defined($name)) {
306 $final_hash->{$name} = $result;
310 return $comparator->($match_spec, $match_target) ?
311 $match_target : undef;
315 # Clean up after the closure, which tends to confuse perl
318 $cleanup = sub { undef $compare; undef $cleanup; undef $string_equal; };
321 my $element_compare = sub {
322 my ($pat_namespace, $pat_element_name) = @
{shift()};
323 my ($namespace, $element_name) = @
{shift()};
325 if (defined($pat_namespace) &&
326 $pat_namespace ne $namespace) {
327 $extract_fail_reason =
328 "Namespace mismatch: $pat_namespace ne $namespace";
332 if (defined($pat_element_name) &&
333 $pat_element_name ne $element_name) {
334 $extract_fail_reason =
335 "Element name mismatch: $pat_element_name ne $element_name";
342 $element = normalize_name
($element);
343 my $result = $compare->($pat_element, $element,
345 undef $element_compare;
347 # Upgrade undefs to a die
350 confess
"In element name: $extract_fail_reason";
355 if (!defined($pat_atts)) {
356 # deliberately blank; no action
358 while (my ($key, $value) = each %$pat_atts) {
359 # verify existence in the target atts
360 # before passing off to $compare
361 if (substr($key, 0, 1) ne '{') {
365 if (!exists($atts->{$key})) {
366 if (spec_is_optional
($value)) {
370 confess
"Attribute $key unexpectedly didn't exist.";
372 if (!$compare->($value, $atts->{$key})) {
374 confess
"In attribute: $extract_fail_reason";
380 my $match_children1 = sub {
381 my ($pat_children) = @_;
383 my $final_children = [];
384 for my $child (@
$children) {
385 my $match_result = eval {
386 $compare->($pat_children,
389 if (!$@
&& defined($match_result)) {
390 if (ref($pat_children) eq
391 'Thrasher::XMPPStreamIn::Placeholder::Match' &&
392 $pat_children->{name
}) {
393 return $match_result;
395 push @
$final_children, $match_result;
399 if (!@
$final_children && !spec_is_optional
($pat_children)) {
401 confess
"In attempting to match children, none matched.";
403 # Transfer the matching results into the name for the sub if needed
404 if (ref($pat_children) eq 'Thrasher::XMPPStreamIn::Placeholder::Sub') {
405 return $final_children;
407 elsif (ref($pat_children) eq 'Thrasher::XMPPStreamIn::Placeholder') {
408 return $final_children;
416 if (defined($pats_children)) {
417 if (ref($pats_children) ne 'ARRAY') {
418 $pats_children = [ $pats_children ];
420 elsif (scalar(@
{$pats_children}) == 0) {
421 # Special case: [] means "no children"
422 if (scalar(@
{$children}) != 0) {
424 confess
('Match wanted no children.');
428 for my $pat_children (@
{$pats_children}) {
429 my $to_save = $match_children1->($pat_children);
431 $final_hash->{$pat_children->{name
}} = $to_save;
441 # Taking in pairs of extract_specifications and sub actions,
442 # try multiple matches at a time, dying only if they all fail.
443 # Put the most likely success stanza up front.
445 # assumes none of the matches will change $xml_message
446 my $xml_message = shift;
449 my $match = shift @_;
450 my $action = shift @_;
451 if (!defined($action)) {
452 confess
"Match rule given without action for multi_extract.";
458 $result = extract
($match, $xml_message);
461 if (defined($result)) {
462 return $action->($result);
466 confess
("In multi-extract, no matches found. Message: ".
467 Dumper
($xml_message));
472 my $optional = shift;
473 my $self = {name
=> $name,
474 optional
=> $optional};
475 return bless $self, 'Thrasher::XMPPStreamIn::Placeholder';
478 sub save_match
($;$$) {
490 my $self = {match
=> $match, name
=> $name, optional
=> $optional};
491 bless $self, 'Thrasher::XMPPStreamIn::Placeholder::Match';
494 sub save_sub
($$;$) {
497 my $optional = shift;
498 my $var_spec = {name
=> $name, subref
=> $subref, optional
=> $optional};
499 bless $var_spec, 'Thrasher::XMPPStreamIn::Placeholder::Sub';
502 sub spec_is_optional
{
504 return (ref($spec) eq 'Thrasher::XMPPStreamIn::Placeholder::Match'
506 ref($spec) eq 'Thrasher::XMPPStreamIn::Placeholder'
508 ref($spec) eq 'Thrasher::XMPPStreamIn::Placeholder::Sub')
509 && $spec->{optional
};
515 return bless \
$name, 'Thrasher::XMPPStreamIn::Variable';
518 # This does not descend into subtags, by design
520 if (!ref($_[0])) { return $_[0] }
526 Some shortcuts for common specification patterns:
537 # NOTE NOTE NOTE: You pass back the *original* to and from to
538 # this function, with centralizes the reversing of those
542 my $iq_params = shift;
543 my $response = shift;
545 # Reconstruct an IQ response
546 my $iq = [[$NS_COMPONENT, 'iq'],
547 {from
=> $iq_params->{to
},
548 to
=> $iq_params->{from
},
549 id
=> $iq_params->{id
},
551 ($response ?
[$response] : [])];
558 if (!exists($IQ_ERRORS{$error})) {
559 confess
"Undefined error code $error used.";
562 my ($error_code, $error_type, $error_tag) =
563 @
{$IQ_ERRORS{$error}};
565 return [[$NS_COMPONENT, 'error'],
566 {code
=> $error_code,
567 type
=> $error_type},
568 [[[$NS_ERROR, $error_tag], {}, []]]
574 my $iq_params = shift;
575 my $error_id = shift;
577 my $query = $iq_params->{query
};
578 my $children = [$query];
580 push @
$children, error_tag
($error_id);
582 my $iq = [[$NS_COMPONENT, 'iq'],
583 {from
=> $iq_params->{to
},
584 to
=> $iq_params->{from
},
585 id
=> $iq_params->{id
},
591 sub feature
{ [[$NS_DISCO_INFO, 'feature'], {var
=> $_[0]}, []] }
593 # Pass the children list;
595 my $children = shift;
596 return !!(grep { ref($_) } @
$children);
599 # Returns a multi_extract clause that matches everything, and
600 # prints off an useful error logging event. This in effect turns
601 # multi_extract into a function that dies when unexpected stuff
602 # occurs, into printing something out on STDERR.
603 # This requires the children to be
607 my $iq_params = shift;
608 my @other_params = @_;
611 log Dumper
(\
@other_params) if @other_params;
612 $self->iq_error($iq_params, 'bad_request');
618 if (!defined($jid)) {
619 confess
"stripping the resource from an undefined JID";
625 # Tear apart a disco result, returning an array of identities
626 # [type, category, name], followed by an array of features. Give this
627 # the perl data corresponding to the <query> tag.
628 sub extract_disco_info
{
631 my $disco = extract
([undef, undef,
634 (($_[0]->[0]->[1] eq 'identity') ||
635 ($_[0]->[0]->[1] eq 'feature'))) ?
637 # Don't require children; empty
638 # results may be e.g. an
639 # incompletely started service.
645 for my $feature_or_identity (@
{$disco->{disco
}}) {
646 my $atts = $feature_or_identity->[1];
647 if ($feature_or_identity->[0]->[1] eq 'identity') {
648 my $identity = $feature_or_identity;
650 ($atts->{'{}type'}, $atts->{'{}category'},
652 push @identities, \
@identity_data;
654 my $feature = $feature_or_identity;
655 push @features, $atts->{'{}var'};
659 return \
@identities, \
@features;
662 sub extract_disco_items
{
665 my $disco = extract
([undef, undef,
668 (($_[0]->[0]->[1] eq 'item'))) ?
671 my @items = map { $_->[1]->{'{}jid'} } @
{$disco->{disco
}};
676 # Given a list of &extract specifications, recurse down the
677 # list and return the whole chunk of extract results in one
678 # convenient array. Indicate what XML to recurse on by saving the
679 # target XML in the "rec" field of the extract result.
680 sub recursive_extract
{
684 my $recursion = $xml;
686 while ($recursion && @_) {
687 my $extract_spec = shift @_;
693 $extract_results = extract
($extract_spec,
697 # If there was an error, return what we got.
703 if ($extract_results &&
705 !exists($extract_results->{rec
})) {
706 warn "Further recursive extractions are specified, "
707 ."but no 'rec' element resulted. Did you forget "
708 ."to specify a 'rec'?";
711 $recursion = $extract_results->{rec
};
713 if ($extract_results) {
714 while (my ($key, $val) = each %$extract_results) {
715 $results->{$key} = $val;