chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
   6
   7 #include <string>
   8
   9 #include "base/bind.h"
  10 #include "base/command_line.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/strings/string16.h"
  13 #include "base/strings/utf_string_conversions.h"
  14 #include "chrome/common/chrome_switches.h"
  15 #include "chrome/common/safe_browsing/client_model.pb.h"
  16 #include "chrome/common/safe_browsing/csd.pb.h"
  17 #include "chrome/renderer/safe_browsing/features.h"
  18 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
  19 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
  20 #include "chrome/renderer/safe_browsing/scorer.h"
  21 #include "chrome/test/base/in_process_browser_test.h"
  22 #include "chrome/test/base/ui_test_utils.h"
  23 #include "content/public/renderer/render_view.h"
  24 #include "crypto/sha2.h"
  25 #include "net/dns/mock_host_resolver.h"
  26 #include "net/test/embedded_test_server/embedded_test_server.h"
  27 #include "net/test/embedded_test_server/http_response.h"
  28 #include "testing/gmock/include/gmock/gmock.h"
  29 #include "url/gurl.h"
  30
  31 using ::testing::AllOf;
  32 using ::testing::Contains;
  33 using ::testing::Not;
  34 using ::testing::Pair;
  35
  36 namespace {
  37
  38 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
  39 const int kRenderViewRoutingId = 2;
  40
  41 }
  42
  43 namespace safe_browsing {
  44
  45 class PhishingClassifierTest : public InProcessBrowserTest {
  46  protected:
  47   PhishingClassifierTest()
  48       : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
  49         page_link_domain_phishing_(features::kPageLinkDomain +
  50                                    std::string("phishing.com")),
  51         page_term_login_(features::kPageTerm + std::string("login")) {
  52   }
  53
  54   void SetUpCommandLine(base::CommandLine* command_line) override {
  55     command_line->AppendSwitch(switches::kSingleProcess);
  56 #if defined(OS_WIN)
  57     // Don't want to try to create a GPU process.
  58     command_line->AppendSwitch(switches::kDisableGpu);
  59 #endif
  60   }
  61
  62   void SetUpOnMainThread() override {
  63     // Construct a model to test with.  We include one feature from each of
  64     // the feature extractors, which allows us to verify that they all ran.
  65     ClientSideModel model;
  66
  67     model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
  68     model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
  69     model.add_hashes(crypto::SHA256HashString(page_term_login_));
  70     model.add_hashes(crypto::SHA256HashString("login"));
  71     model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
  72                                               std::string("net")));
  73     model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
  74                                               std::string("phishing.com")));
  75     model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
  76                                               std::string("login")));
  77     model.add_hashes(crypto::SHA256HashString("login"));
  78
  79     // Add a default rule with a non-phishy weight.
  80     ClientSideModel::Rule* rule = model.add_rule();
  81     rule->set_weight(-1.0);
  82
  83     // To give a phishy score, the total weight needs to be >= 0
  84     // (0.5 when converted to a probability).  This will only happen
  85     // if all of the listed features are present.
  86     rule = model.add_rule();
  87     rule->add_feature(0);
  88     rule->add_feature(1);
  89     rule->add_feature(2);
  90     rule->set_weight(1.0);
  91
  92     model.add_page_term(3);
  93     model.set_murmur_hash_seed(2777808611U);
  94     model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
  95     model.set_max_words_per_term(1);
  96     model.set_max_shingles_per_page(100);
  97     model.set_shingle_size(3);
  98
  99     clock_ = new MockFeatureExtractorClock;
 100     scorer_.reset(Scorer::Create(model.SerializeAsString()));
 101     ASSERT_TRUE(scorer_.get());
 102
 103     classifier_.reset(new PhishingClassifier(
 104         content::RenderView::FromRoutingID(kRenderViewRoutingId),
 105         clock_));
 106   }
 107
 108   void TearDownOnMainThread() override {
 109     content::RunAllPendingInMessageLoop();
 110   }
 111
 112   // Helper method to start phishing classification and wait for it to
 113   // complete.  Returns the true if the page is classified as phishy and
 114   // false otherwise.
 115   bool RunPhishingClassifier(const base::string16* page_text,
 116                              float* phishy_score,
 117                              FeatureMap* features) {
 118     ClientPhishingRequest verdict;
 119     // The classifier accesses the RenderView and must run in the RenderThread.
 120     PostTaskToInProcessRendererAndWait(
 121         base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
 122                    base::Unretained(this),
 123                    page_text, phishy_score, features, &verdict));
 124     return verdict.is_phishing();
 125   }
 126
 127   void DoRunPhishingClassifier(const base::string16* page_text,
 128                                float* phishy_score,
 129                                FeatureMap* features,
 130                                ClientPhishingRequest* verdict) {
 131     *phishy_score = PhishingClassifier::kInvalidScore;
 132     features->Clear();
 133
 134     // Force synchronous behavior for ease of unittesting.
 135     base::RunLoop run_loop;
 136     classifier_->BeginClassification(
 137         page_text,
 138         base::Bind(&PhishingClassifierTest::ClassificationFinished,
 139                    base::Unretained(this), &run_loop, verdict));
 140     content::RunThisRunLoop(&run_loop);
 141
 142     *phishy_score = verdict->client_score();
 143     for (int i = 0; i < verdict->feature_map_size(); ++i) {
 144       features->AddRealFeature(verdict->feature_map(i).name(),
 145                                verdict->feature_map(i).value());
 146     }
 147   }
 148
 149   // Completion callback for classification.
 150   void ClassificationFinished(base::RunLoop* run_loop,
 151                               ClientPhishingRequest* verdict_out,
 152                               const ClientPhishingRequest& verdict) {
 153     *verdict_out = verdict;  // Copy the verdict.
 154     run_loop->Quit();
 155   }
 156
 157   scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
 158   net::test_server::EmbeddedTestServer* embedded_test_server() {
 159     // TODO(ajwong): Merge this into BrowserTestBase.
 160     if (!embedded_test_server_) {
 161       embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
 162       embedded_test_server_->RegisterRequestHandler(
 163           base::Bind(&PhishingClassifierTest::HandleRequest,
 164                      base::Unretained(this)));
 165       CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
 166     }
 167     return embedded_test_server_.get();
 168   }
 169
 170   void LoadHtml(const std::string& host, const std::string& content) {
 171     GURL::Replacements replace_host;
 172     replace_host.SetHostStr(host);
 173     response_content_ = content;
 174     ui_test_utils::NavigateToURL(
 175         browser(),
 176         embedded_test_server()->base_url().ReplaceComponents(replace_host));
 177   }
 178
 179   void LoadHtmlPost(const std::string& host, const std::string& content) {
 180     GURL::Replacements replace_host;
 181     replace_host.SetHostStr(host);
 182     response_content_ = content;
 183     ui_test_utils::NavigateToURLWithPost(
 184         browser(),
 185         embedded_test_server()->base_url().ReplaceComponents(replace_host));
 186   }
 187
 188   scoped_ptr<net::test_server::HttpResponse>
 189       HandleRequest(const net::test_server::HttpRequest& request) {
 190     scoped_ptr<net::test_server::BasicHttpResponse> http_response(
 191         new net::test_server::BasicHttpResponse());
 192     http_response->set_code(net::HTTP_OK);
 193     http_response->set_content_type("text/html");
 194     http_response->set_content(response_content_);
 195     return http_response.Pass();
 196   }
 197
 198   std::string response_content_;
 199   scoped_ptr<Scorer> scorer_;
 200   scoped_ptr<PhishingClassifier> classifier_;
 201   MockFeatureExtractorClock* clock_;  // Owned by classifier_.
 202
 203   // Features that are in the model.
 204   const std::string url_tld_token_net_;
 205   const std::string page_link_domain_phishing_;
 206   const std::string page_term_login_;
 207 };
 208
 209 // This test flakes on Mac with force compositing mode.
 210 // http://crbug.com/316709
 211 #if defined(OS_MACOSX)
 212 #define MAYBE_TestClassification DISABLED_TestClassification
 213 #else
 214 #define MAYBE_TestClassification TestClassification
 215 #endif
 216 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
 217   host_resolver()->AddRule("*", "127.0.0.1");
 218
 219   // No scorer yet, so the classifier is not ready.
 220   ASSERT_FALSE(classifier_->is_ready());
 221
 222   // Now set the scorer.
 223   classifier_->set_phishing_scorer(scorer_.get());
 224   ASSERT_TRUE(classifier_->is_ready());
 225
 226   // This test doesn't exercise the extraction timing.
 227   EXPECT_CALL(*clock_, Now())
 228       .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
 229
 230   base::string16 page_text = base::ASCIIToUTF16("login");
 231   float phishy_score;
 232   FeatureMap features;
 233
 234   LoadHtml("host.net",
 235       "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
 236   EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 237   // Note: features.features() might contain other features that simply aren't
 238   // in the model.
 239   EXPECT_THAT(features.features(),
 240               AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
 241                     Contains(Pair(page_link_domain_phishing_, 1.0)),
 242                     Contains(Pair(page_term_login_, 1.0))));
 243   EXPECT_FLOAT_EQ(0.5, phishy_score);
 244
 245   // Change the link domain to something non-phishy.
 246   LoadHtml("host.net",
 247            "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
 248   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 249   EXPECT_THAT(features.features(),
 250               AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
 251                     Contains(Pair(page_term_login_, 1.0))));
 252   EXPECT_THAT(features.features(),
 253               Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
 254   EXPECT_GE(phishy_score, 0.0);
 255   EXPECT_LT(phishy_score, 0.5);
 256
 257   // Extraction should fail for this case since there is no TLD.
 258   LoadHtml("localhost", "<html><body>content</body></html>");
 259   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 260   EXPECT_EQ(0U, features.features().size());
 261   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
 262
 263   // Extraction should also fail for this case because the URL is not http.
 264   net::SpawnedTestServer https_server(
 265       net::SpawnedTestServer::TYPE_HTTPS,
 266       net::SpawnedTestServer::kLocalhost,
 267       base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
 268   ASSERT_TRUE(https_server.Start());
 269   GURL::Replacements replace_host;
 270   replace_host.SetHostStr("host.net");
 271   GURL test_url = https_server.GetURL("/files/title1.html");
 272   ui_test_utils::NavigateToURL(browser(),
 273                                test_url.ReplaceComponents(replace_host));
 274   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 275   EXPECT_EQ(0U, features.features().size());
 276   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
 277
 278   // Extraction should fail for this case because the URL is a POST request.
 279   LoadHtmlPost("host.net", "<html><body>content</body></html>");
 280   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 281   EXPECT_EQ(0U, features.features().size());
 282   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
 283 }
 284
 285 // Test flakes with LSAN enabled. See http://crbug.com/373155.
 286 #if defined(LEAK_SANITIZER)
 287 #define MAYBE_DisableDetection DISABLED_DisableDetection
 288 #else
 289 #define MAYBE_DisableDetection DisableDetection
 290 #endif
 291 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) {
 292   // No scorer yet, so the classifier is not ready.
 293   EXPECT_FALSE(classifier_->is_ready());
 294
 295   // Now set the scorer.
 296   classifier_->set_phishing_scorer(scorer_.get());
 297   EXPECT_TRUE(classifier_->is_ready());
 298
 299   // Set a NULL scorer, which turns detection back off.
 300   classifier_->set_phishing_scorer(NULL);
 301   EXPECT_FALSE(classifier_->is_ready());
 302 }
 303
 304 }  // namespace safe_browsing