Get the status actually active at buddy signon instead of hardcoding AVAILABLE.
[thrasher.git] / perl / tests / xhtml_normalize.pl
blob8e141bb980075e08e0905378140901e9acece1aa
1 use Test::More 'no_plan';
2 use Test::Deep;
4 use strict;
5 use warnings;
7 BEGIN {
8 use_ok 'Thrasher::XHTML_IM_Normalize', ':all';
9 use_ok 'Thrasher::HTMLNormalize', ':all';
12 my $RANDOM_TEST_COUNT = 1000;
14 ESCAPE: {
15 # Verify escape works
16 my $x = 'hello <>& &amp;!';
17 my $y = escape($x);
19 isnt($x, $y, "escape doesn't modify existing strings");
20 is($y, 'hello &lt;&gt;&amp; &amp;amp;!',
21 'escaping basically works');
24 # We test two cases: full-permission normalization, and the
25 # normalization defined by $CONSERVATIVE_PERMISSIONS. This creates a
26 # series of test cases [$input => $result]. The first set are used
27 # in both. %CONSERVATIVE_TESTS defines a series of tests used only for
28 # the permissions, and $FULL_TESTS defines tests for the
29 # full-permission normalization.
30 # something), but I decided to leave them in for readability.
32 my %BOTH_TESTS = (
33 # Had trouble with this
34 '<ul><li>Test<ul><li>nested</li><li>nested2</li></ul></li><li>moo</li></ul>' =>
35 '<ul><li>Test<ul><li>nested</li><li>nested2</li></ul></li><li>moo</li></ul>',
36 'a' => '<p>a</p>',
37 't&a' => '<p>t&amp;a</p>',
38 "t\n\na" => "<p>t</p>\n\n<p>a</p>",
39 "This is a test.\nMore.\n\nSo there." =>
40 "<p>This is a test.<br />\nMore.</p>\n\n<p>So there.</p>",
41 "<p>This is a test.<br />\nMore.</p>" =>
42 "<p>This is a test.<br />\nMore.</p>",
43 "<P>This is a test.<BR />\nMore.</P>" =>
44 "<p>This is a test.<br />\nMore.</p>",
45 '<b><i>moo</b></i>' => '<p><b><i>moo</i></b></p>',
46 '<b><i><abbr title="moo">argle</b></i></abbr>' =>
47 '<p><b><i><abbr title=\'moo\'>argle</abbr></i></b></p>',
48 '' => '',
49 "\n\t \n" => '',
50 # Incomplete CDATA tags get escaped no matter whether we are
51 # allowing CDATA or not
52 '<pre><![CDATA[moo' =>
53 '<pre>&lt;![CDATA[moo</pre>',
54 '&trade;' => '<p>&#8482;</p>',
55 "\x{2033}" => "<p>\x{2033}</p>",
56 '\x{e2}' => '<p>\x{e2}</p>',
58 # Real-life example:
59 '<A href="http://www.wssource.com/cgi-bin/auth/dispatch.cgi?_code=307&amp;alert=123256652117586003&amp;noback=1">[edit]</A>' =>
60 '<p><a href=\'http://www.wssource.com/cgi-bin/auth/dispatch.cgi?_code=307&amp;alert=123256652117586003&amp;noback=1\'>[edit]</a></p>',
61 '<A HREF="http://www.wssource.com/cgi-bin/auth/dispatch.cgi?_code=307&amp;alert=123256652117586003&amp;noback=1">[edit]</A>' =>
62 '<p><a href=\'http://www.wssource.com/cgi-bin/auth/dispatch.cgi?_code=307&amp;alert=123256652117586003&amp;noback=1\'>[edit]</a></p>'
65 my %CONSERVATIVE_TESTS = (
66 %BOTH_TESTS,
67 '<p id="moo">test</p>' => '<p>test</p>',
68 '<p><a href="http://www.jerf.org/">test</a></p>' =>
69 "<p><a href='http://www.jerf.org/'>test</a></p>",
70 '<p><a href="/moo">test</a></p>' =>
71 "<p><a href='http:///moo'>test</a></p>",
72 '<ul><li>A list element.</li>\n\n<li>Another list '.
73 'element</li></ul>' =>
74 '<ul><li>A list element.</li>\n\n<li>Another list '.
75 'element</li></ul>',
76 '<ul><li>A list element</li><li>another list element</li>' =>
77 '<ul><li>A list element</li><li>another list element</li></ul>',
78 '<ul><li>A list element</li>\n<li>another list element</li>' =>
79 '<ul><li>A list element</li>\n<li>another list '.
80 'element</li></ul>',
81 '<ul><li>A list element<li>another' =>
82 '<ul><li>A list element</li><li>another</li></ul>',
83 "<p>Moo.\n\n<p>More moo.\n\nForgot my p." =>
84 "<p>Moo.</p>\n\n<p>More moo.</p>\n\n<p>Forgot my p.</p>",
85 '<scr<script>ipt>' => '<p>ipt&gt;</p>',
86 '<a href="javascript:alert(\'moo\')">test</a>' =>
87 '<p>test</p>',
88 '<bgsound src="javascript:alert()">moo' =>
89 '<p>moo</p>',
90 '<abbr title="oh & my">OM</abbr>' =>
91 "<p><abbr title='oh &amp; my'>OM</abbr></p>"
92 );
94 my %FULL_PERM_TESTS = (
95 %BOTH_TESTS,
96 "Moo\n\n<h3>Header</h3>\n\nMoo" =>
97 "<p>Moo</p>\n\n<h3>Header</h3>\n\n<p>Moo</p>",
98 "<div class=\"NewsItemTitle\">title</div>\n<div ".
99 "class=\"NewsItemBody\">moo\n\nthere</div>" =>
100 "<div class=\'NewsItemTitle\'>title</div>\n<div ".
101 "class=\'NewsItemBody\'>moo\n\n<p>there</p></div>",
102 '<p>a</p> <hr><p>hello</p>' =>
103 '<p>a</p> <hr /><p>hello</p>',
104 '<table><tr><td>a</td></tr></table>' =>
105 '<table><tr><td>a</td></tr></table>',
106 # Non-quoted attributes handled as expected
107 '<p name=cheesy 444 poofs>meh</p>' => "<p name='cheesy' poofs='1'>meh</p>",
108 '<p name=cheesy poofs 333>meh</p>' => "<p name='cheesy' poofs='1'>meh</p>",
111 my %CDATA_TESTS =
113 '<pre><![CDATA[ & < > <script> &amp ]]></pre>' =>
114 '<pre><![CDATA[ & < > <script> &amp ]]></pre>' );
116 my @RANDOM_HTML_FRAGMENTS =
117 ('<scr', '<i>', '</i>',
118 '<p id="testing">ddd',
119 '<script>alert ()</script>',
120 '<<<', '>>>', "\x{0437}",
121 '&amp', '&amp;',
122 '&notanentity;', " \n\r\r\n",
123 '<!-- -->', 'rc', '<!--',
125 # An excitingly Unicode-laden string
126 join('', map { chr($_) } 32..1024),
127 '<?xml wha?>',
128 '<![CDATA[', ']]>');
130 sub test_data_with_allowed_elements {
131 my $hash = shift;
132 my $allowed_elements = shift;
133 my $allow_cdata = shift;
134 my $name = shift;
136 while (my ($input, $desired_output) = each %$hash) {
137 my $actual_output = normalize($input, $allowed_elements,
138 $allow_cdata);
139 is($actual_output, $desired_output, 'test with ' . $name);
143 test_data_with_allowed_elements(\%FULL_PERM_TESTS, undef, undef,
144 'full permissions');
145 test_data_with_allowed_elements(\%CONSERVATIVE_TESTS,
146 $CONSERVATIVE_PERMISSIONS, undef,
147 'conservative permissions');
148 test_data_with_allowed_elements(\%CDATA_TESTS,
149 undef, 1, 'cdata permissions');
151 # This is primarily a verification that there exists no input that
152 # results in invalid XML. "Sadly", this code doesn't solve the GIGO
153 # problem.
154 RANDOM_FRAGMENTS: {
155 last;
156 for (my $i = 0; $i < $RANDOM_TEST_COUNT; $i++) {
157 my @random_fragments;
158 for (my $j = 0; $j < 10; $j++) {
159 push(@random_fragments,
160 $RANDOM_HTML_FRAGMENTS[int(rand(@RANDOM_HTML_FRAGMENTS))]);
162 my $random_input = join '', @random_fragments;
163 # Allow both cdata options.
164 normalize($random_input, $CONSERVATIVE_PERMISSIONS, 0);
165 normalize($random_input, $CONSERVATIVE_PERMISSIONS, 1);
169 ALLOW_TAGLIKE: {
170 my %allow_taglike =
171 ('<ping>' => '<p>&lt;ping&gt;</p>',
172 '<p>' => '<p></p>',
173 '<html><body><p>hello</body></html>' => '<p>hello</p>',
174 %CONSERVATIVE_TESTS);
175 # behaves differently
176 $allow_taglike{'<scr<script>ipt>'} =
177 '<p>&lt;scr&lt;script&gt;ipt&gt;</p>';
179 while (my ($input, $desired_output) = each %allow_taglike) {
180 is(normalize($input, $CONSERVATIVE_PERMISSIONS,
181 0, 1), $desired_output,
182 'test with allow_taglike');
183 is(normalize($input, $CONSERVATIVE_PERMISSIONS,
184 1, 1), $desired_output,
185 'test with allow_taglike');
190 # The preceding is actually the generic test script for
191 # Thrasher::HTMLNormalize. This tests the XHTML modifications, at
192 # least to some degree.
193 ACTUAL_XHTML_TESTS: {
194 # Make sure the automatic <p> suppression occurs for
195 # text mode
196 my ($xhtml, $text) = xhtml_and_text("hello!\nhello!\n\nhello!");
197 is($text, "hello!\nhello!\n\nhello!",
198 'text only correctly suppresses all automatically-added tags');
199 is($xhtml, "<p>hello!<br />\nhello!</p>\n\n<p>hello!</p>",
200 'xhtml mode still correctly adds them');
202 # Test the translations
203 $xhtml = xhtml("<p>hello <b>there</b> buddy</p>");
204 is($xhtml, "hello <strong>there</strong> buddy",
205 'simple tag normalization works');
207 $xhtml = xhtml("<a href='http://www.jerf.org/'>Test</a>");
208 is($xhtml, "<a href='http://www.jerf.org/'>Test</a>",
209 "can handle links correctly.");
211 $xhtml = xhtml("<FONT FACE='ARIAL'><i>itali</FONT></i>c");
212 is($xhtml, "<em>itali</em>c",
213 'correctly handles upper-case crap in the tags');