1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/safe_browsing/scorer.h"
7 #include "base/containers/hash_tables.h"
8 #include "base/files/file_path.h"
9 #include "base/files/scoped_temp_dir.h"
10 #include "base/format_macros.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/threading/thread.h"
14 #include "chrome/common/safe_browsing/client_model.pb.h"
15 #include "chrome/renderer/safe_browsing/features.h"
16 #include "testing/gmock/include/gmock/gmock.h"
17 #include "testing/gtest/include/gtest/gtest.h"
19 namespace safe_browsing
{
21 class PhishingScorerTest
: public ::testing::Test
{
23 void SetUp() override
{
24 // Setup a simple model. Note that the scorer does not care about
25 // how features are encoded so we use readable strings here to make
26 // the test simpler to follow.
28 model_
.add_hashes("feature1");
29 model_
.add_hashes("feature2");
30 model_
.add_hashes("feature3");
31 model_
.add_hashes("token one");
32 model_
.add_hashes("token two");
34 ClientSideModel::Rule
* rule
;
35 rule
= model_
.add_rule();
36 rule
->set_weight(0.5);
38 rule
= model_
.add_rule();
39 rule
->add_feature(0); // feature1
40 rule
->set_weight(2.0);
42 rule
= model_
.add_rule();
43 rule
->add_feature(0); // feature1
44 rule
->add_feature(1); // feature2
45 rule
->set_weight(3.0);
47 model_
.add_page_term(3); // token one
48 model_
.add_page_term(4); // token two
50 // These will be murmur3 hashes, but for this test it's not necessary
51 // that the hashes correspond to actual words.
52 model_
.add_page_word(1000U);
53 model_
.add_page_word(2000U);
54 model_
.add_page_word(3000U);
56 model_
.set_max_words_per_term(2);
57 model_
.set_murmur_hash_seed(12345U);
58 model_
.set_max_shingles_per_page(10);
59 model_
.set_shingle_size(3);
62 ClientSideModel model_
;
65 TEST_F(PhishingScorerTest
, HasValidModel
) {
66 scoped_ptr
<Scorer
> scorer
;
67 scorer
.reset(Scorer::Create(model_
.SerializeAsString()));
68 EXPECT_TRUE(scorer
.get() != NULL
);
70 // Invalid model string.
71 scorer
.reset(Scorer::Create("bogus string"));
72 EXPECT_FALSE(scorer
.get());
74 // Mode is missing a required field.
75 model_
.clear_max_words_per_term();
76 scorer
.reset(Scorer::Create(model_
.SerializePartialAsString()));
77 EXPECT_FALSE(scorer
.get());
80 TEST_F(PhishingScorerTest
, PageTerms
) {
81 scoped_ptr
<Scorer
> scorer(Scorer::Create(model_
.SerializeAsString()));
82 ASSERT_TRUE(scorer
.get());
84 // Use std::vector instead of base::hash_set for comparison.
85 // On Android, EXPECT_THAT(..., ContainerEq(...)) doesn't support
86 // std::hash_set, but std::vector works fine.
87 std::vector
<std::string
> expected_page_terms
;
88 expected_page_terms
.push_back("token one");
89 expected_page_terms
.push_back("token two");
90 std::sort(expected_page_terms
.begin(), expected_page_terms
.end());
92 base::hash_set
<std::string
> page_terms
= scorer
->page_terms();
93 std::vector
<std::string
> page_terms_v(page_terms
.begin(), page_terms
.end());
94 std::sort(page_terms_v
.begin(), page_terms_v
.end());
96 EXPECT_THAT(page_terms_v
, ::testing::ContainerEq(expected_page_terms
));
99 TEST_F(PhishingScorerTest
, PageWords
) {
100 scoped_ptr
<Scorer
> scorer(Scorer::Create(model_
.SerializeAsString()));
101 ASSERT_TRUE(scorer
.get());
102 std::vector
<uint32
> expected_page_words
;
103 expected_page_words
.push_back(1000U);
104 expected_page_words
.push_back(2000U);
105 expected_page_words
.push_back(3000U);
106 std::sort(expected_page_words
.begin(), expected_page_words
.end());
108 base::hash_set
<uint32
> page_words
= scorer
->page_words();
109 std::vector
<uint32
> page_words_v(page_words
.begin(), page_words
.end());
110 std::sort(page_words_v
.begin(), page_words_v
.end());
112 EXPECT_THAT(page_words_v
, ::testing::ContainerEq(expected_page_words
));
114 EXPECT_EQ(2U, scorer
->max_words_per_term());
115 EXPECT_EQ(12345U, scorer
->murmurhash3_seed());
116 EXPECT_EQ(10U, scorer
->max_shingles_per_page());
117 EXPECT_EQ(3U, scorer
->shingle_size());
120 TEST_F(PhishingScorerTest
, ComputeScore
) {
121 scoped_ptr
<Scorer
> scorer(Scorer::Create(model_
.SerializeAsString()));
122 ASSERT_TRUE(scorer
.get());
124 // An empty feature map should match the empty rule.
126 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
127 // => 0.62245933120185459
128 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer
->ComputeScore(features
));
129 // Same if the feature does not match any rule.
130 EXPECT_TRUE(features
.AddBooleanFeature("not existing feature"));
131 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer
->ComputeScore(features
));
133 // Feature 1 matches which means that the logodds will be:
134 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
135 // => p = 0.6899744811276125
136 EXPECT_TRUE(features
.AddRealFeature("feature1", 0.15));
137 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer
->ComputeScore(features
));
139 // Now, both feature 1 and feature 2 match. Expected logodds:
140 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
141 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
142 // => p = 0.99999627336071584
143 EXPECT_TRUE(features
.AddBooleanFeature("feature2"));
144 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer
->ComputeScore(features
));
146 } // namespace safe_browsing