1 package Thrasher
::XMPPStreamIn
;
5 Thrasher::XMPPStream - wrap XML::Parser for XMPP stream parsing
9 my $stream = new Thrasher::XMPPStream;
11 # messages as described below
12 my $messages = $stream->parse($some_text);
13 my $more_messages = $stream->parse($more_text_in_stream);
16 my $yet_more_messages = $stream->parse($invalid_xml);
20 Thrasher::XMPPStream converts from the Jabber XML stream into
21 perl objects representing the events from the XML stream.
23 This abstracts out managing the XML::Parser correctly.
27 Events are output as array refs with three elements, containing the
34 B<Tag name reference>: The full name of the tag. See "Tags" section
39 B<Attributes hash>: A hash of full attribute names to their
40 values. See "Tags" section below.
44 B<Children>: The children of this tag. The children are each one of
45 two things: A string, representing character data, or another tag
46 arrayref as described here.
52 Thrasher::XMPPStream correctly handles namespaces; therefore, all
53 tags and attribute names actually have two parts, the namespace
54 and the tag name itself.
56 Tags are typically represented as an array ref, with the first part
57 representing the full name of the namespace (NOT the prefix,
58 like "stream", the whole URI, like
59 "http://etherx.jabber.org/streams"), and the second part representing
62 Attributes are represented in "Clark-style" attribute names, as
63 XML::SAX's documentation calls it, which is the namespace in
64 {}, followed by the tag/attribute name. Note that this means
65 that an attribute with no explicit namespace will be
66 C<{}attname>, not C<attname>.
74 $XML::SAX
::ParserPackage
= "XML::SAX::Expat::Incremental";
75 use XML
::SAX
::ParserFactory
;
80 use Thrasher
::XML
qw(:all);
82 use base
'XML::SAX::Base', 'Exporter';
86 # Just export the non-methods.
87 our @EXPORT_OK = qw(get_parser);
88 our %EXPORT_TAGS = (all
=> \
@EXPORT_OK);
91 my $self = Thrasher
::XMPPStreamIn
->new();
92 my $parser = XML
::SAX
::ParserFactory
->parser
94 $self->{parser
} = $parser;
99 my $self = {_debug
=> 1};
100 bless $self, 'Thrasher::XMPPStreamIn';
102 my $handlers = {Start
=> sub { $self->tag_start(@_) },
103 End
=> sub { $self->tag_end(@_) },
104 Char
=> sub { $self->char(@_) },
105 XMLDecl
=> sub { $self->xml_decl(@_) }};
107 # Consists of a list of prefixes->array ref of strings,
108 # where the last string is the current namespace for that
110 $self->{namespaces
} = {};
112 # this stores up the messages as they are generated, so the
113 # end of the "parse" call can return them.
114 $self->{messages
} = [];
120 my ($self, $el) = @_;
122 my $atts = $el->{Attributes
};
123 while (my ($name, $att_data) = each %$atts) {
124 # Filter out namespace declarations
125 if ($name =~ m
|http
://www
.w3
.org
/2000/xmlns
| ||
126 $name eq '{}xmlns') {
127 delete $atts->{$name};
131 # If it's not a namespace declaration, cut the info
133 $atts->{$name} = $atts->{$name}->{Value
};
136 my $element = [$el->{NamespaceURI
}, $el->{LocalName
}];
138 # As a special case, if this is the initial stream tag,
139 # it constitutes a message
140 if (same_name
(['stream', 'stream'], $element)) {
141 # pretend the children are empty
142 push @
{$self->{messages
}}, [$element, $atts, []];
144 # Otherwise, we need to remember it
145 if (!exists($self->{current_message
})) {
146 $self->{children
} = [[]];
147 $self->{current_message
} = [$element, $atts,
148 $self->{children
}->[0]];
150 my $new_children = [];
151 my $current_children = $self->{children
}->[-1];
152 push @
{$self->{children
}}, $new_children;
153 my $new_element = [$element, $atts,
155 push @
$current_children, $new_element;
161 my ($self, $el) = @_;
163 if (!scalar@
{$self->{children
}}) {
164 return; # got a final </stream:stream>
167 # Return to the previous tag's children.
168 pop @
{$self->{children
}};
170 # If we've received a complete message, remember it
171 # so we can return it.
172 if (!scalar(@
{$self->{children
}})) {
173 my $message = $self->{current_message
};
174 push @
{$self->{messages
}}, $message;
175 delete $self->{current_message
};
180 my ($self, $xml) = @_;
182 $self->{parser
}->parse_string($xml);
184 my $messages = $self->{messages
};
185 $self->{messages
} = [];
191 if ($self->{_debug
}) {
199 my $att_name = shift;
200 my $name = $self->convert_name($att_name);
201 return '{' . $name->[0] . '}' . $name->[1];
208 my ($left, $right) = split /:/, $name, 2;
209 if (defined($right)) {
210 return [$self->namespace_for_prefix($left), $right];
212 return [$self->namespace_for_prefix(''), $left];
216 sub namespace_for_prefix
{
220 my $namespaces = $self->{namespaces
}->{$prefix};
221 if (defined($namespaces) && scalar(@
$namespaces) > 0) {
222 return $namespaces->[-1];
229 my ($self, $el_data) = @_;
231 my $chars = $el_data->{Data
};
232 my $current_children = $self->{children
}->[-1];
233 if (!defined($current_children)) {
234 # We're still not to the first message yet.
238 if (scalar(@
$current_children) && ref($current_children->[-1]) eq '') {
239 $current_children->[-1] = $current_children->[-1] . $chars;
241 push @
$current_children, $chars;