1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/strings/stringprintf.h"
8 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
9 #include "crypto/sha2.h"
10 #include "testing/gtest/include/gtest/gtest.h"
15 bool VectorContains(const std::vector
<std::string
>& data
,
16 const std::string
& str
) {
17 return std::find(data
.begin(), data
.end(), str
) != data
.end();
22 // Tests that we generate the required host/path combinations for testing
23 // according to the Safe Browsing spec.
25 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
26 TEST(SafeBrowsingUtilTest
, UrlParsing
) {
27 std::vector
<std::string
> hosts
, paths
;
29 GURL
url("http://a.b.c/1/2.html?param=1");
30 safe_browsing_util::GenerateHostsToCheck(url
, &hosts
);
31 safe_browsing_util::GeneratePathsToCheck(url
, &paths
);
32 EXPECT_EQ(hosts
.size(), static_cast<size_t>(2));
33 EXPECT_EQ(paths
.size(), static_cast<size_t>(4));
34 EXPECT_EQ(hosts
[0], "b.c");
35 EXPECT_EQ(hosts
[1], "a.b.c");
37 EXPECT_TRUE(VectorContains(paths
, "/1/2.html?param=1"));
38 EXPECT_TRUE(VectorContains(paths
, "/1/2.html"));
39 EXPECT_TRUE(VectorContains(paths
, "/1/"));
40 EXPECT_TRUE(VectorContains(paths
, "/"));
42 url
= GURL("http://a.b.c.d.e.f.g/1.html");
43 safe_browsing_util::GenerateHostsToCheck(url
, &hosts
);
44 safe_browsing_util::GeneratePathsToCheck(url
, &paths
);
45 EXPECT_EQ(hosts
.size(), static_cast<size_t>(5));
46 EXPECT_EQ(paths
.size(), static_cast<size_t>(2));
47 EXPECT_EQ(hosts
[0], "f.g");
48 EXPECT_EQ(hosts
[1], "e.f.g");
49 EXPECT_EQ(hosts
[2], "d.e.f.g");
50 EXPECT_EQ(hosts
[3], "c.d.e.f.g");
51 EXPECT_EQ(hosts
[4], "a.b.c.d.e.f.g");
52 EXPECT_TRUE(VectorContains(paths
, "/1.html"));
53 EXPECT_TRUE(VectorContains(paths
, "/"));
55 url
= GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
56 safe_browsing_util::GeneratePathsToCheck(url
, &paths
);
57 EXPECT_EQ(paths
.size(), static_cast<size_t>(3));
58 EXPECT_TRUE(VectorContains(paths
, "/saw-cgi/eBayISAPI.dll/"));
59 EXPECT_TRUE(VectorContains(paths
, "/saw-cgi/"));
60 EXPECT_TRUE(VectorContains(paths
, "/"));
63 // Tests the url canonicalization according to the Safe Browsing spec.
65 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
66 TEST(SafeBrowsingUtilTest
, CanonicalizeUrl
) {
68 const char* input_url
;
69 const char* expected_canonicalized_hostname
;
70 const char* expected_canonicalized_path
;
71 const char* expected_canonicalized_query
;
74 "http://host/%25%32%35",
79 "http://host/%25%32%35%25%32%35",
84 "http://host/%2525252525252525",
89 "http://host/asdf%25%32%35asd",
94 "http://host/%%%25%32%35asd%%",
96 "/%25%25%25asd%25%25",
99 "http://host/%%%25%32%35asd%%",
101 "/%25%25%25asd%25%25",
104 "http://www.google.com/",
109 "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
110 "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
112 "/.secure/www.ebay.com/",
115 "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
116 "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
118 "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
119 "alidateinfoswqpcmlx=hgplmcx/",
122 "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
123 "22%252833%252944_55%252B",
125 "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
128 "http://3279880203/blah",
133 "http://www.google.com/blah/..",
138 "http://www.google.com/blah#fraq",
143 "http://www.GOOgle.com/",
148 "http://www.google.com.../",
153 "http://www.google.com/q?",
158 "http://www.google.com/q?r?",
163 "http://www.google.com/q?r?s",
168 "http://evil.com/foo#bar#baz",
173 "http://evil.com/foo;",
178 "http://evil.com/foo?bar;",
183 "http://notrailingslash.com",
184 "notrailingslash.com",
188 "http://www.gotaport.com:1234/",
193 " http://www.google.com/ ",
198 "http:// leadingspace.com/",
199 "%20leadingspace.com",
203 "http://%20leadingspace.com/",
204 "%20leadingspace.com",
208 "https://www.securesite.com/",
209 "www.securesite.com",
213 "http://host.com/ab%23cd",
218 "http://host%3e.com//twoslashes?more//slashes",
223 "http://host.com/abc?val=xyz#anything",
228 "http://abc:def@host.com/xyz",
233 "http://host%3e.com/abc/%2e%2e%2fdef",
238 "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
243 "ftp://host.com/foo?bar",
248 "data:text/html;charset=utf-8,%0D%0A",
253 "javascript:alert()",
258 "mailto:abc@example.com",
264 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(tests
); ++i
) {
265 SCOPED_TRACE(base::StringPrintf("Test: %s", tests
[i
].input_url
));
266 GURL
url(tests
[i
].input_url
);
268 std::string canonicalized_hostname
;
269 std::string canonicalized_path
;
270 std::string canonicalized_query
;
271 safe_browsing_util::CanonicalizeUrl(url
, &canonicalized_hostname
,
272 &canonicalized_path
, &canonicalized_query
);
274 EXPECT_EQ(tests
[i
].expected_canonicalized_hostname
,
275 canonicalized_hostname
);
276 EXPECT_EQ(tests
[i
].expected_canonicalized_path
,
278 EXPECT_EQ(tests
[i
].expected_canonicalized_query
,
279 canonicalized_query
);
283 TEST(SafeBrowsingUtilTest
, GetUrlHashIndex
) {
284 GURL
url("http://www.evil.com/phish.html");
285 SBFullHashResult full_hash
;
286 crypto::SHA256HashString(url
.host() + url
.path(),
289 std::vector
<SBFullHashResult
> full_hashes
;
290 full_hashes
.push_back(full_hash
);
292 EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url
, full_hashes
), 0);
294 url
= GURL("http://www.evil.com/okay_path.html");
295 EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url
, full_hashes
), -1);
298 TEST(SafeBrowsingUtilTest
, ListIdListNameConversion
) {
299 std::string list_name
;
300 EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID
,
302 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE
,
304 EXPECT_EQ(list_name
, std::string(safe_browsing_util::kMalwareList
));
305 EXPECT_EQ(safe_browsing_util::MALWARE
,
306 safe_browsing_util::GetListId(list_name
));
308 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH
,
310 EXPECT_EQ(list_name
, std::string(safe_browsing_util::kPhishingList
));
311 EXPECT_EQ(safe_browsing_util::PHISH
,
312 safe_browsing_util::GetListId(list_name
));
314 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL
,
316 EXPECT_EQ(list_name
, std::string(safe_browsing_util::kBinUrlList
));
317 EXPECT_EQ(safe_browsing_util::BINURL
,
318 safe_browsing_util::GetListId(list_name
));
321 // Since the ids are saved in file, we need to make sure they don't change.
322 // Since only the last bit of each id is saved in file together with
323 // chunkids, this checks only last bit.
324 TEST(SafeBrowsingUtilTest
, ListIdVerification
) {
325 EXPECT_EQ(0, safe_browsing_util::MALWARE
% 2);
326 EXPECT_EQ(1, safe_browsing_util::PHISH
% 2);
327 EXPECT_EQ(0, safe_browsing_util::BINURL
%2);
330 TEST(SafeBrowsingUtilTest
, StringToSBFullHashAndSBFullHashToString
) {
331 // 31 chars plus the last \0 as full_hash.
332 const std::string hash_in
= "12345678902234567890323456789012";
333 SBFullHash hash_out
= safe_browsing_util::StringToSBFullHash(hash_in
);
334 EXPECT_EQ(0x34333231, hash_out
.prefix
);
335 EXPECT_EQ(0, memcmp(hash_in
.data(), hash_out
.full_hash
, sizeof(SBFullHash
)));
337 std::string hash_final
= safe_browsing_util::SBFullHashToString(hash_out
);
338 EXPECT_EQ(hash_in
, hash_final
);