Fix bug in PL2+ implementation
[xapian.git] / xapian-applications / omega / urlenctest.cc
blob2a040b6607784e70192178ab312abca04675711c
1 /** @file
2 * @brief Test URL encoding and decoding functions
3 */
4 /* Copyright (C) 2011,2012,2015 Olly Betts
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
25 #include <config.h>
27 #include <cstdlib>
28 #include <iostream>
29 #include <map>
30 #include <string>
32 #include "urldecode.h"
33 #include "urlencode.h"
35 using namespace std;
37 struct enc_testcase {
38 const char * input;
39 const char * result;
42 static enc_testcase urlenc_testcases[] = {
43 { "", "" },
44 { "foo", "foo" },
45 { "%", "%25" },
46 { "%xyz", "%25xyz" },
47 { "xyz%", "xyz%25" },
48 { "xyz%25", "xyz%2525" },
49 { "~olly/hello-world_2.txt", "~olly%2Fhello-world_2.txt" },
50 // Test every possible character (except '\0') encodes as it should:
51 { "\x01\x02\x03\x04\x05\x06\x07", "%01%02%03%04%05%06%07" },
52 { "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "%08%09%0A%0B%0C%0D%0E%0F" },
53 { "\x10\x11\x12\x13\x14\x15\x16\x17", "%10%11%12%13%14%15%16%17" },
54 { "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "%18%19%1A%1B%1C%1D%1E%1F" },
55 { " !\"#$%&'()*+,-./", "%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F" },
56 { "0123456789:;<=>?", "0123456789%3A%3B%3C%3D%3E%3F" },
57 { "@ABCDEFGHIJKLMNO", "%40ABCDEFGHIJKLMNO" },
58 { "PQRSTUVWXYZ[\\]^_", "PQRSTUVWXYZ%5B%5C%5D%5E_" },
59 { "`abcdefghijklmno", "%60abcdefghijklmno" },
60 { "pqrstuvwxyz{|}~\x7F", "pqrstuvwxyz%7B%7C%7D~%7F" },
61 { "\x80\x81\x82\x83\x84\x85\x86\x87", "%80%81%82%83%84%85%86%87" },
62 { "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F", "%88%89%8A%8B%8C%8D%8E%8F" },
63 { "\x90\x91\x92\x93\x94\x95\x96\x97", "%90%91%92%93%94%95%96%97" },
64 { "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F", "%98%99%9A%9B%9C%9D%9E%9F" },
65 { "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7", "%A0%A1%A2%A3%A4%A5%A6%A7" },
66 { "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF", "%A8%A9%AA%AB%AC%AD%AE%AF" },
67 { "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7", "%B0%B1%B2%B3%B4%B5%B6%B7" },
68 { "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", "%B8%B9%BA%BB%BC%BD%BE%BF" },
69 { "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7", "%C0%C1%C2%C3%C4%C5%C6%C7" },
70 { "\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF", "%C8%C9%CA%CB%CC%CD%CE%CF" },
71 { "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7", "%D0%D1%D2%D3%D4%D5%D6%D7" },
72 { "\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF", "%D8%D9%DA%DB%DC%DD%DE%DF" },
73 { "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7", "%E0%E1%E2%E3%E4%E5%E6%E7" },
74 { "\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF", "%E8%E9%EA%EB%EC%ED%EE%EF" },
75 { "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7", "%F0%F1%F2%F3%F4%F5%F6%F7" },
76 { "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF", "%F8%F9%FA%FB%FC%FD%FE%FF" },
77 { NULL, NULL }
80 struct dec_testcase {
81 const char * input;
82 const char * result[7];
85 static dec_testcase urldec_testcases[] = {
86 { "", { 0 } },
87 { "foo=bar", { "foo", "bar", 0 } },
88 { "foo=a%20b&", { "foo", "a b", 0 } },
89 { "&foo=hello+world", { "foo", "hello world", 0 } },
90 { "&foo=1&", { "foo", "1", 0 } },
91 { "foo=1&&bar=2", { "bar", "2", "foo", "1", 0 } },
92 { "a+1=bar%", { "a 1", "bar%", 0 } },
93 { "a%201=bar%0", { "a 1", "bar%0", 0 } },
94 { "a%2x1=bar%x", { "a%2x1", "bar%x", 0 } },
95 { "a%2x1%%40=bar%x", { "a%2x1%@", "bar%x", 0 } },
96 { "a%01%1f%2A%30%4d%5a%9A%9f%Aa%bF%C0%De%E2%FF=bar%0%",
97 { "a\x01\x1f*0MZ\x9a\x9f\xaa\xbf\xc0\xde\xe2\xff", "bar%0%", 0 } },
98 { "a=1&b=2&a=1", { "a", "1", "a", "1", "b", "2", 0 } },
99 // Regression test for bug fixed in 1.2.13 and 1.3.1:
100 { "price=10%24", { "price", "10$" } },
101 { NULL, { 0 } }
104 struct pretty_testcase {
105 const char * input;
106 const char * result;
109 // 0 for result means "same as input" here.
110 struct pretty_testcase pretty_testcases[] = {
111 { "", 0 },
112 { "http://localhost/", 0 },
113 { "%", 0 },
114 { "%x", 0 },
115 { "%xy", 0 },
116 { "%xyz", 0 },
117 { "%25", 0 },
118 { "%20", " " },
119 { "%20hello", " hello" },
120 { "http://example.com/%7ehello%20world/",
121 "http://example.com/~hello world/" },
122 { "http://example.com/%25/a%20b%80/100%",
123 "http://example.com/%25/a b%80/100%" },
124 { "http:http.html", 0 },
125 { "http%3ahttp.html", 0 },
126 { "/foo.html?a%3db=c%2bd", 0 },
127 { "/foo.html#%31", 0 },
128 { "/x%3dy.html", "/x=y.html" },
129 { "/XML%3a%3aSimple.html", "/XML::Simple.html" },
130 { "back%20slash%2fco%3alon", "back slash%2fco%3alon" },
131 { "%5b%5D%40%21%24%26%27%28%29%2A%2B%2c%3b%3D", "%5b%5D%40!$&'()*+,;=" },
132 { "/%5b%5D%40%21%24%26%27%28%29%2A%2B%2c%3b%3D", "/[]@!$&'()*+,;=" },
133 { "https://x%3ax%40x%5b%5dx/", 0 },
134 { "//x%3ax%40x%5b%5dx/", 0 },
135 { "/f%c3%bcr", "/f\xc3\xbcr" },
136 { "%c3%bc", "\xc3\xbc" },
137 { "%c3%b", 0 },
138 { "%c3%", 0 },
139 { "%c3", 0 },
140 { "%c3x", 0 },
141 { "%80", 0 },
142 { "%bf", 0 },
143 { "/%ff", 0 },
144 { "/%fe%ff%20/", "/%fe%ff /" },
145 { "/%c3%20.htm", "/%c3 .htm" },
146 { "hellip%e2%80%a6.gif", "hellip\xe2\x80\xa6.gif" },
147 { "hellip%e2%80%a", 0 },
148 { "hellip%e2%80", 0 },
149 // Example from #644:
150 { "Szerz%C5%91d%C3%A9sek", "Szerz\xc5\x91""d\xc3\xa9sek" },
151 // Overlong sequences:
152 { "/%C080.nul", 0 },
153 { "%e0%9f%88/index.html", 0 },
154 { "%e0%81%9e/f0%82%81%80-fyi", 0 },
155 // Code point above Unicode range:
156 { "/%f4%90%80%80/", 0 },
157 { NULL, NULL }
160 static multimap<string, string> params;
162 void
163 CGIParameterHandler::operator()(const string& var, const string& val) const
165 params.insert(multimap<string, string>::value_type(var, val));
168 int main() {
169 for (enc_testcase * e = urlenc_testcases; e->input; ++e) {
170 string result;
171 url_encode(result, e->input);
172 if (result != e->result) {
173 cerr << "urlencode of " << e->input << " should be " << e->result
174 << "\", got \"" << result << "\"" << endl;
175 exit(1);
179 for (dec_testcase * d = urldec_testcases; d->input; ++d) {
180 params.clear();
181 const char * input = d->input;
182 url_decode(CGIParameterHandler(), CStringItor(input), CStringItor());
183 const char ** p = d->result;
184 bool ok = true;
185 for (multimap<string, string>::const_iterator i = params.begin();
186 i != params.end(); ++i) {
187 if (!*p || i->first.compare(*p) != 0 ||
188 i->second.compare(p[1]) != 0) {
189 // Variable and/or value doesn't match.
190 ok = false;
191 break;
193 p += 2;
195 if (!ok || *p) {
196 cerr << "Expected these parameters:\n";
197 for (p = d->result; *p; p += 2) {
198 cerr << " " << p[0] << " = " << p[1] << endl;
200 cerr << "Got these parameters:\n";
201 for (multimap<string, string>::const_iterator j = params.begin();
202 j != params.end(); ++j) {
203 cerr << " " << j->first << " = " << j->second << endl;
205 exit(1);
209 for (pretty_testcase * e = pretty_testcases; e->input; ++e) {
210 string url = e->input;
211 url_prettify(url);
212 const char * result = (e->result ? e->result : e->input);
213 if (url != result) {
214 cerr << "url_prettify of " << e->input << " should be " << result
215 << "\", got \"" << url << "\"" << endl;
216 exit(1);