chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
   6
   7 #include <string>
   8
   9 #include "base/bind.h"
  10 #include "base/command_line.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/strings/string16.h"
  13 #include "base/strings/utf_string_conversions.h"
  14 #include "chrome/common/chrome_switches.h"
  15 #include "chrome/common/safe_browsing/client_model.pb.h"
  16 #include "chrome/common/safe_browsing/csd.pb.h"
  17 #include "chrome/renderer/safe_browsing/features.h"
  18 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
  19 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
  20 #include "chrome/renderer/safe_browsing/scorer.h"
  21 #include "chrome/test/base/in_process_browser_test.h"
  22 #include "chrome/test/base/ui_test_utils.h"
  23 #include "content/public/renderer/render_view.h"
  24 #include "crypto/sha2.h"
  25 #include "net/dns/mock_host_resolver.h"
  26 #include "net/test/embedded_test_server/embedded_test_server.h"
  27 #include "net/test/embedded_test_server/http_response.h"
  28 #include "testing/gmock/include/gmock/gmock.h"
  29 #include "url/gurl.h"
  30
  31 using ::testing::AllOf;
  32 using ::testing::Contains;
  33 using ::testing::Not;
  34 using ::testing::Pair;
  35
  36 namespace {
  37
  38 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
  39 const int kRenderViewRoutingId = 2;
  40
  41 }
  42
  43 namespace safe_browsing {
  44
  45 class PhishingClassifierTest : public InProcessBrowserTest {
  46  protected:
  47   PhishingClassifierTest()
  48       : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
  49         page_link_domain_phishing_(features::kPageLinkDomain +
  50                                    std::string("phishing.com")),
  51         page_term_login_(features::kPageTerm + std::string("login")) {
  52   }
  53
  54   virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
  55     command_line->AppendSwitch(switches::kSingleProcess);
  56 #if defined(OS_WIN)
  57     // Don't want to try to create a GPU process.
  58     command_line->AppendSwitch(switches::kDisableAcceleratedCompositing);
  59 #endif
  60   }
  61
  62   virtual void SetUpOnMainThread() OVERRIDE {
  63     // Construct a model to test with.  We include one feature from each of
  64     // the feature extractors, which allows us to verify that they all ran.
  65     ClientSideModel model;
  66
  67     model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
  68     model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
  69     model.add_hashes(crypto::SHA256HashString(page_term_login_));
  70     model.add_hashes(crypto::SHA256HashString("login"));
  71     model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
  72                                               std::string("net")));
  73     model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
  74                                               std::string("phishing.com")));
  75     model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
  76                                               std::string("login")));
  77     model.add_hashes(crypto::SHA256HashString("login"));
  78
  79     // Add a default rule with a non-phishy weight.
  80     ClientSideModel::Rule* rule = model.add_rule();
  81     rule->set_weight(-1.0);
  82
  83     // To give a phishy score, the total weight needs to be >= 0
  84     // (0.5 when converted to a probability).  This will only happen
  85     // if all of the listed features are present.
  86     rule = model.add_rule();
  87     rule->add_feature(0);
  88     rule->add_feature(1);
  89     rule->add_feature(2);
  90     rule->set_weight(1.0);
  91
  92     model.add_page_term(3);
  93     model.set_murmur_hash_seed(2777808611U);
  94     model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
  95     model.set_max_words_per_term(1);
  96
  97     clock_ = new MockFeatureExtractorClock;
  98     scorer_.reset(Scorer::Create(model.SerializeAsString()));
  99     ASSERT_TRUE(scorer_.get());
 100
 101     classifier_.reset(new PhishingClassifier(
 102         content::RenderView::FromRoutingID(kRenderViewRoutingId),
 103         clock_));
 104   }
 105
 106   virtual void TearDownOnMainThread() OVERRIDE {
 107     content::RunAllPendingInMessageLoop();
 108   }
 109
 110   // Helper method to start phishing classification and wait for it to
 111   // complete.  Returns the true if the page is classified as phishy and
 112   // false otherwise.
 113   bool RunPhishingClassifier(const base::string16* page_text,
 114                              float* phishy_score,
 115                              FeatureMap* features) {
 116     ClientPhishingRequest verdict;
 117     // The classifier accesses the RenderView and must run in the RenderThread.
 118     PostTaskToInProcessRendererAndWait(
 119         base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
 120                    base::Unretained(this),
 121                    page_text, phishy_score, features, &verdict));
 122     return verdict.is_phishing();
 123   }
 124
 125   void DoRunPhishingClassifier(const base::string16* page_text,
 126                                float* phishy_score,
 127                                FeatureMap* features,
 128                                ClientPhishingRequest* verdict) {
 129     *phishy_score = PhishingClassifier::kInvalidScore;
 130     features->Clear();
 131
 132     // Force synchronous behavior for ease of unittesting.
 133     base::RunLoop run_loop;
 134     classifier_->BeginClassification(
 135         page_text,
 136         base::Bind(&PhishingClassifierTest::ClassificationFinished,
 137                    base::Unretained(this), &run_loop, verdict));
 138     content::RunThisRunLoop(&run_loop);
 139
 140     *phishy_score = verdict->client_score();
 141     for (int i = 0; i < verdict->feature_map_size(); ++i) {
 142       features->AddRealFeature(verdict->feature_map(i).name(),
 143                                verdict->feature_map(i).value());
 144     }
 145   }
 146
 147   // Completion callback for classification.
 148   void ClassificationFinished(base::RunLoop* run_loop,
 149                               ClientPhishingRequest* verdict_out,
 150                               const ClientPhishingRequest& verdict) {
 151     *verdict_out = verdict;  // Copy the verdict.
 152     run_loop->Quit();
 153   }
 154
 155   scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
 156   net::test_server::EmbeddedTestServer* embedded_test_server() {
 157     // TODO(ajwong): Merge this into BrowserTestBase.
 158     if (!embedded_test_server_) {
 159       embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
 160       embedded_test_server_->RegisterRequestHandler(
 161           base::Bind(&PhishingClassifierTest::HandleRequest,
 162                      base::Unretained(this)));
 163       CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
 164     }
 165     return embedded_test_server_.get();
 166   }
 167
 168   void LoadHtml(const std::string& host, const std::string& content) {
 169     GURL::Replacements replace_host;
 170     replace_host.SetHostStr(host);
 171     response_content_ = content;
 172     ui_test_utils::NavigateToURL(
 173         browser(),
 174         embedded_test_server()->base_url().ReplaceComponents(replace_host));
 175   }
 176
 177   void LoadHtmlPost(const std::string& host, const std::string& content) {
 178     GURL::Replacements replace_host;
 179     replace_host.SetHostStr(host);
 180     response_content_ = content;
 181     ui_test_utils::NavigateToURLWithPost(
 182         browser(),
 183         embedded_test_server()->base_url().ReplaceComponents(replace_host));
 184   }
 185
 186   scoped_ptr<net::test_server::HttpResponse>
 187       HandleRequest(const net::test_server::HttpRequest& request) {
 188     scoped_ptr<net::test_server::BasicHttpResponse> http_response(
 189         new net::test_server::BasicHttpResponse());
 190     http_response->set_code(net::HTTP_OK);
 191     http_response->set_content_type("text/html");
 192     http_response->set_content(response_content_);
 193     return http_response.PassAs<net::test_server::HttpResponse>();
 194   }
 195
 196   std::string response_content_;
 197   scoped_ptr<Scorer> scorer_;
 198   scoped_ptr<PhishingClassifier> classifier_;
 199   MockFeatureExtractorClock* clock_;  // Owned by classifier_.
 200
 201   // Features that are in the model.
 202   const std::string url_tld_token_net_;
 203   const std::string page_link_domain_phishing_;
 204   const std::string page_term_login_;
 205 };
 206
 207 // This test flakes on Mac with force compositing mode.
 208 // http://crbug.com/316709
 209 #if defined(OS_MACOSX)
 210 #define MAYBE_TestClassification DISABLED_TestClassification
 211 #else
 212 #define MAYBE_TestClassification TestClassification
 213 #endif
 214 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
 215   host_resolver()->AddRule("*", "127.0.0.1");
 216
 217   // No scorer yet, so the classifier is not ready.
 218   ASSERT_FALSE(classifier_->is_ready());
 219
 220   // Now set the scorer.
 221   classifier_->set_phishing_scorer(scorer_.get());
 222   ASSERT_TRUE(classifier_->is_ready());
 223
 224   // This test doesn't exercise the extraction timing.
 225   EXPECT_CALL(*clock_, Now())
 226       .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
 227
 228   base::string16 page_text = base::ASCIIToUTF16("login");
 229   float phishy_score;
 230   FeatureMap features;
 231
 232   LoadHtml("host.net",
 233       "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
 234   EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 235   // Note: features.features() might contain other features that simply aren't
 236   // in the model.
 237   EXPECT_THAT(features.features(),
 238               AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
 239                     Contains(Pair(page_link_domain_phishing_, 1.0)),
 240                     Contains(Pair(page_term_login_, 1.0))));
 241   EXPECT_FLOAT_EQ(0.5, phishy_score);
 242
 243   // Change the link domain to something non-phishy.
 244   LoadHtml("host.net",
 245            "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
 246   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 247   EXPECT_THAT(features.features(),
 248               AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
 249                     Contains(Pair(page_term_login_, 1.0))));
 250   EXPECT_THAT(features.features(),
 251               Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
 252   EXPECT_GE(phishy_score, 0.0);
 253   EXPECT_LT(phishy_score, 0.5);
 254
 255   // Extraction should fail for this case since there is no TLD.
 256   LoadHtml("localhost", "<html><body>content</body></html>");
 257   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 258   EXPECT_EQ(0U, features.features().size());
 259   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
 260
 261   // Extraction should also fail for this case because the URL is not http.
 262   net::SpawnedTestServer https_server(
 263       net::SpawnedTestServer::TYPE_HTTPS,
 264       net::SpawnedTestServer::kLocalhost,
 265       base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
 266   ASSERT_TRUE(https_server.Start());
 267   std::string host_str("host.net");  // Must outlive replace_host.
 268   GURL::Replacements replace_host;
 269   replace_host.SetHostStr(host_str);
 270   GURL test_url = https_server.GetURL("/files/title1.html");
 271   ui_test_utils::NavigateToURL(browser(),
 272                                test_url.ReplaceComponents(replace_host));
 273   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 274   EXPECT_EQ(0U, features.features().size());
 275   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
 276
 277   // Extraction should fail for this case because the URL is a POST request.
 278   LoadHtmlPost("host.net", "<html><body>content</body></html>");
 279   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
 280   EXPECT_EQ(0U, features.features().size());
 281   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
 282 }
 283
 284 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, DisableDetection) {
 285   // No scorer yet, so the classifier is not ready.
 286   EXPECT_FALSE(classifier_->is_ready());
 287
 288   // Now set the scorer.
 289   classifier_->set_phishing_scorer(scorer_.get());
 290   EXPECT_TRUE(classifier_->is_ready());
 291
 292   // Set a NULL scorer, which turns detection back off.
 293   classifier_->set_phishing_scorer(NULL);
 294   EXPECT_FALSE(classifier_->is_ready());
 295 }
 296
 297 }  // namespace safe_browsing