1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/safe_browsing/features.h"
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
10 namespace safe_browsing
{
12 const size_t FeatureMap::kMaxFeatureMapSize
= 10000;
14 FeatureMap::FeatureMap() {}
15 FeatureMap::~FeatureMap() {}
17 bool FeatureMap::AddBooleanFeature(const std::string
& name
) {
18 return AddRealFeature(name
, 1.0);
21 bool FeatureMap::AddRealFeature(const std::string
& name
, double value
) {
22 if (features_
.size() >= kMaxFeatureMapSize
) {
23 // If we hit this case, it indicates that either kMaxFeatureMapSize is
24 // too small, or there is a bug causing too many features to be added.
25 // In this case, we'll log to a histogram so we can see that this is
26 // happening, and make phishing classification fail silently.
27 LOG(ERROR
) << "Not adding feature: " << name
<< " because the "
28 << "feature map is too large.";
29 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
32 // We only expect features in the range [0.0, 1.0], so fail if the feature is
33 // outside this range.
34 if (value
< 0.0 || value
> 1.0) {
35 LOG(ERROR
) << "Not adding feature: " << name
<< " because the value "
36 << value
<< " is not in the range [0.0, 1.0].";
37 UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
41 features_
[name
] = value
;
45 void FeatureMap::Clear() {
51 const char kUrlHostIsIpAddress
[] = "UrlHostIsIpAddress";
52 const char kUrlTldToken
[] = "UrlTld=";
53 const char kUrlDomainToken
[] = "UrlDomain=";
54 const char kUrlOtherHostToken
[] = "UrlOtherHostToken=";
56 // URL host aggregate features
57 const char kUrlNumOtherHostTokensGTOne
[] = "UrlNumOtherHostTokens>1";
58 const char kUrlNumOtherHostTokensGTThree
[] = "UrlNumOtherHostTokens>3";
61 const char kUrlPathToken
[] = "UrlPathToken=";
63 // DOM HTML form features
64 const char kPageHasForms
[] = "PageHasForms";
65 const char kPageActionOtherDomainFreq
[] = "PageActionOtherDomainFreq";
66 const char kPageActionURL
[] = "PageActionURL=";
67 const char kPageHasTextInputs
[] = "PageHasTextInputs";
68 const char kPageHasPswdInputs
[] = "PageHasPswdInputs";
69 const char kPageHasRadioInputs
[] = "PageHasRadioInputs";
70 const char kPageHasCheckInputs
[] = "PageHasCheckInputs";
72 // DOM HTML link features
73 const char kPageExternalLinksFreq
[] = "PageExternalLinksFreq";
74 const char kPageLinkDomain
[] = "PageLinkDomain=";
75 const char kPageSecureLinksFreq
[] = "PageSecureLinksFreq";
77 // DOM HTML script features
78 const char kPageNumScriptTagsGTOne
[] = "PageNumScriptTags>1";
79 const char kPageNumScriptTagsGTSix
[] = "PageNumScriptTags>6";
81 // Other DOM HTML features
82 const char kPageImgOtherDomainFreq
[] = "PageImgOtherDomainFreq";
85 const char kPageTerm
[] = "PageTerm=";
87 } // namespace features
88 } // namespace safe_browsing