ConnectionManager: Disable hard limit in favor of twiddled hammering values.
[thrasher.git] / perl / lib / Thrasher / XMPPStreamOut.pm
blobe7aa5479277de27b9bd4e29875cba0ab463ee113
1 package Thrasher::XMPPStreamOut;
2 use strict;
3 use warnings;
5 # This package is responsible for the XML output. It outputs by
6 # calling a closure you give it, which makes it easy to test.
7 # This is highly tuned for XMPP streams and the messages it
8 # sends. It is intended to do the "right thing" if you send it
9 # the right data as input, but it does not validate the XML
10 # for things like legal tag names.
12 use CGI qw(escapeHTML); # doubling as XML escaping
14 use Thrasher::Constants qw(:all);
15 use Thrasher::Log qw(:all);
16 use Encode;
18 use Carp qw(confess);
20 use Data::Dumper;
22 # This runs an escaping process over the data, and also strips out
23 # invalid
24 sub pcdata_process {
25 my $s = shift;
27 if (!Encode::is_utf8($s)) {
28 $s = Encode::decode("UTF-8", $s);
30 $s = remove_invalid_chars($s);
31 $s = escapeHTML($s);
32 return $s;
35 sub remove_invalid_chars {
36 my $s = shift;
37 # \x{fffd} is the substitution character, which ends up
38 # coming out evil if we don't do something here for
39 # some reason.
40 $s =~ s/\x{fffd}/?/g;
41 $s =~ s/[\x00-\x08\x0b\x0c\x0e-\x1f\x80-\xff]+//g;
42 return $s;
45 # This is a set of conventional prefixes, so the output stream
46 # looks like people expect.
47 my %CONVENTIONAL_PREFIXES =
48 ($NS_STREAM => 'stream',
49 $NS_CLIENT => '',
50 $NS_COMPONENT => '',
51 $NS_DISCO_INFO => '',
52 $NS_DISCO_ITEMS => '',
53 $NS_REGISTER => '',
54 $NS_ERROR => '',
55 $NS_GATEWAY => '',
56 $NS_TIME => '',
57 $NS_VERSION => '',
58 $NS_CAPS => '',
59 $NS_PUBSUB => '',
60 $NS_TUNE => '',
61 $NS_PEP_AVATAR => '',
62 $NS_PEP_AVATAR_METADATA => '',
63 $NS_LAST => '',
64 $NS_VCARD => '',
65 $NS_VCARD_UPDATE => '',
66 $NS_ROSTER_EXCHANGE => '',
67 $NS_XHTML_IM => '',
68 $NS_XHTML => '',
69 $NS_NICK => '',
70 $NS_BYTESTREAMS => '',
71 $NS_FILE_TRANSFER => '',
72 $NS_FEATURE_NEG => '',
73 $NS_DATA => '',
74 $NS_STREAM_INITIATION => '',
75 $NS_FILE_TRANSFER => '',
76 $NS_CHATSTATES => '',
78 $NS_THRASHER_PRESENCE => 'thrasher');
80 my %XML_NAMESPACES =
81 ($NS_XMLNS => 'xmlns', $NS_XML => 'xml');
83 sub new {
84 my $class = shift;
85 my $self = {};
86 bless $self, $class;
88 $self->{output} = shift;
89 if (ref($self->{output}) ne 'CODE') {
90 die "Thrasher::XMPPStreamOut needs an output method.";
92 $self->{namespace_to_prefixes} = {};
93 $self->{prefix_to_namespace} = {};
95 return $self;
98 # This outputs everything but the closing tag, so it
99 # can be used to start "stream"
100 sub output_tag_and_children {
101 my $self = shift;
102 my $fragment = shift;
103 my $is_root_element = shift;
104 my $namespace_declarations = {};
105 my ($element, $atts, $children) = @$fragment;
107 if (!defined($children)) {
108 confess "Undefined \$children in output_tag_and_children.";
111 my $element_text = $self->xml_element_text($element,
112 $namespace_declarations);
113 my $outputtable_atts = $self->outputtable_atts($atts, $namespace_declarations);
114 $self->output_tag($element_text, $outputtable_atts,
115 # immediately close? not root, no children
116 !$is_root_element && !@$children);
117 if ($children) {
118 for my $child (@$children) {
119 $self->output($child);
123 # If this is the root element, we didn't close the tag,
124 # and any initial namespace declarations are still in effect,
125 # so don't clear them!
126 if (!$is_root_element) {
127 for my $key (values %$namespace_declarations) {
128 my $prefix = pop @{$self->{namespace_to_prefixes}->{$key}};
129 if (!@{$self->{namespace_to_prefixes}->{$key}}) {
130 delete $self->{namespace_to_prefixes}->{$key};
133 pop @{$self->{prefixes_to_namespace}->{$prefix}};
134 if (!@{$self->{prefixes_to_namespace}->{$prefix}}) {
135 delete $self->{prefixes_to_namespace}->{$prefix};
139 # If this isn't a root element, we want to close the tag.
140 if (@$children) {
141 $self->{output}->('</' . $element_text . '>');
147 return $element_text;
150 # Core routine: Output the XML bits as they come. This
151 # outputs closing tags too, so it can't be used for the
152 # initial tag.
153 sub output {
154 my $self = shift;
156 my $output = $self->{output};
158 for my $fragment (@_) {
159 if (ref($fragment) eq '') {
160 $output->(pcdata_process($fragment));
161 } elsif (ref($fragment) eq 'ARRAY') {
162 my $element_text =
163 $self->output_tag_and_children($fragment);
164 } elsif (ref($fragment) eq 'SCALAR') {
165 $output->(remove_invalid_chars($$fragment));
166 } else {
167 log("Error: outputting something to the XML "
168 ."stream which is invalid (ignored): "
169 .Dumper($fragment));
174 # This may add a namespace declaration to the atts, which is why
175 # it has to be passed in.
176 sub xml_element_text {
177 my $self = shift;
178 if (!ref($_[0])) {
179 confess "Not a ref in xml_element_text.";
181 my ($namespace, $element) = @{shift()};
182 my $namespace_declarations = shift;
184 my $prefix = $self->prefix_for_namespace($namespace,
185 $namespace_declarations);
186 if ($prefix eq '') {
187 return $element;
188 } else {
189 return "$prefix\:$element";
193 our $namespace_index = 0;
195 # Get the prefix for the given namespace. May add a prefix if
196 # it needs to.
197 sub prefix_for_namespace {
198 my $self = shift;
199 my $namespace = shift;
200 my $namespace_declarations = shift;
202 if (!defined($namespace)) {
203 confess "No defined namespace.";
206 if (my $xml_prefix = $XML_NAMESPACES{$namespace}) {
207 return $xml_prefix;
210 my $prefixes = $self->{namespace_to_prefixes}->{$namespace};
211 if (!defined($prefixes)) {
212 # need to create a new one.
213 my $conventional_prefix = $CONVENTIONAL_PREFIXES{$namespace};
214 if (defined($conventional_prefix)) {
215 if ($conventional_prefix eq '') {
216 $namespace_declarations->{'xmlns'} = $namespace;
217 $self->declare_prefix('', $namespace);
218 return '';
219 } else {
220 $namespace_declarations->{"xmlns:$conventional_prefix"} =
221 $namespace;
222 $self->declare_prefix($conventional_prefix, $namespace);
223 return $conventional_prefix;
227 # Otherwise, we need to select a new one, and
228 # put out the namespace definition.
229 $namespace_index++;
230 $self->declare_prefix("ns$namespace_index", $namespace);
231 $namespace_declarations->{"xmlns:ns$namespace_index"} = $namespace;
232 return "ns$namespace_index";
233 } else {
234 return $prefixes->[-1];
238 sub outputtable_atts {
239 my $self = shift;
240 my $atts = shift; # in "Clarke-style" format
241 my $namespace_declarations = shift || {}; # in straight {$attname => $att}
243 my %final_atts;
245 while (my ($clarke_key, $value) = each %$atts) {
246 # process {namespace}att_name
247 my ($namespace, $name) = $clarke_key =~ /^\{([^}]*)\}(.*)$/;
248 if ($namespace) {
249 my $prefix = $self->prefix_for_namespace($namespace, $namespace_declarations);
250 if ($prefix eq '') {
251 $final_atts{$name} = $value;
252 } else {
253 $final_atts{$prefix . ':' . $name} = $value;
255 } else {
256 # things in the default namespace go out without
257 # xmlns labels; I don't like this aspect of XML,
258 # but there you are.
259 $clarke_key =~ s/^\{\}//;
260 $final_atts{$clarke_key} = $value;
264 while (my ($key, $value) = each %$namespace_declarations) {
265 $final_atts{$key} = $value;
268 return \%final_atts;
271 sub declare_prefix {
272 my $self = shift;
273 my $prefix = shift;
274 my $namespace = shift;
276 push @{$self->{namespace_to_prefixes}->{$namespace}}, $prefix;
277 push @{$self->{prefixes_to_namespace}->{$prefix}}, $namespace;
280 sub output_tag {
281 my $self = shift;
282 my $element_name = shift;
283 my $atts = shift;
284 my $immediately_close = shift;
286 my @strings = ($element_name);
287 # "Why sort this? XML doesn't care about attribute order?"
288 # Stability for the test suite; with this, I can guarantee
289 # a testable order.
290 for my $name (sort keys %$atts) {
291 my $value = $atts->{$name};
292 if (!defined($value)) {
293 confess "In outputing att $name, undefined value.";
295 push @strings, "$name='" . pcdata_process($value) . "'";
298 my $element = ('<' . (join ' ', @strings) .
299 ($immediately_close ? '/' : '') . '>');
300 $self->{output}->($element);