Explicitly add python-numpy dependency to install-build-deps.
[chromium-blink-merge.git] / chrome / renderer / safe_browsing / phishing_url_feature_extractor_unittest.cc
blobe5412a7bd4ea78f99851b23e9e2c216d2710321f
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
7 #include <string>
8 #include <vector>
9 #include "chrome/renderer/safe_browsing/features.h"
10 #include "chrome/renderer/safe_browsing/test_utils.h"
11 #include "testing/gmock/include/gmock/gmock.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13 #include "url/gurl.h"
15 using ::testing::ElementsAre;
17 namespace safe_browsing {
19 class PhishingUrlFeatureExtractorTest : public ::testing::Test {
20 protected:
21 PhishingUrlFeatureExtractor extractor_;
23 void SplitStringIntoLongAlphanumTokens(const std::string& full,
24 std::vector<std::string>* tokens) {
25 PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full,
26 tokens);
30 TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
31 std::string url = "http://123.0.0.1/mydocuments/a.file.html";
32 FeatureMap expected_features;
33 expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress);
34 expected_features.AddBooleanFeature(features::kUrlPathToken +
35 std::string("mydocuments"));
36 expected_features.AddBooleanFeature(features::kUrlPathToken +
37 std::string("file"));
38 expected_features.AddBooleanFeature(features::kUrlPathToken +
39 std::string("html"));
41 FeatureMap features;
42 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
43 ExpectFeatureMapsAreEqual(features, expected_features);
45 url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear";
46 expected_features.Clear();
47 expected_features.AddBooleanFeature(features::kUrlTldToken +
48 std::string("co.uk"));
49 expected_features.AddBooleanFeature(features::kUrlDomainToken +
50 std::string("cnn"));
51 expected_features.AddBooleanFeature(features::kUrlOtherHostToken +
52 std::string("www"));
53 expected_features.AddBooleanFeature(features::kUrlNumOtherHostTokensGTOne);
54 expected_features.AddBooleanFeature(features::kUrlPathToken +
55 std::string("sports"));
56 expected_features.AddBooleanFeature(features::kUrlPathToken +
57 std::string("index"));
58 expected_features.AddBooleanFeature(features::kUrlPathToken +
59 std::string("html"));
61 features.Clear();
62 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
63 ExpectFeatureMapsAreEqual(features, expected_features);
65 url = "http://justadomain.com/";
66 expected_features.Clear();
67 expected_features.AddBooleanFeature(features::kUrlTldToken +
68 std::string("com"));
69 expected_features.AddBooleanFeature(features::kUrlDomainToken +
70 std::string("justadomain"));
72 features.Clear();
73 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
74 ExpectFeatureMapsAreEqual(features, expected_features);
76 url = "http://witharef.com/#abc";
77 expected_features.Clear();
78 expected_features.AddBooleanFeature(features::kUrlTldToken +
79 std::string("com"));
80 expected_features.AddBooleanFeature(features::kUrlDomainToken +
81 std::string("witharef"));
83 features.Clear();
84 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
85 ExpectFeatureMapsAreEqual(features, expected_features);
87 url = "http://...www..lotsodots....com./";
88 expected_features.Clear();
89 expected_features.AddBooleanFeature(features::kUrlTldToken +
90 std::string("com"));
91 expected_features.AddBooleanFeature(features::kUrlDomainToken +
92 std::string("lotsodots"));
93 expected_features.AddBooleanFeature(features::kUrlOtherHostToken +
94 std::string("www"));
96 features.Clear();
97 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
98 ExpectFeatureMapsAreEqual(features, expected_features);
100 url = "http://unrecognized.tld/";
101 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
103 url = "http://com/123";
104 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
106 url = "http://.co.uk/";
107 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
109 url = "file:///nohost.txt";
110 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
112 url = "not:valid:at:all";
113 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
116 TEST_F(PhishingUrlFeatureExtractorTest, SplitStringIntoLongAlphanumTokens) {
117 std::string full = "This.is/a_pretty\\unusual-!path,indeed";
118 std::vector<std::string> long_tokens;
119 SplitStringIntoLongAlphanumTokens(full, &long_tokens);
120 EXPECT_THAT(long_tokens,
121 ElementsAre("This", "pretty", "unusual", "path", "indeed"));
123 long_tokens.clear();
124 full = "...i-am_re/al&ly\\b,r,o|k=e:n///up%20";
125 SplitStringIntoLongAlphanumTokens(full, &long_tokens);
126 EXPECT_THAT(long_tokens, ElementsAre());
129 } // namespace safe_browsing