Supervised user import: Listen for profile creation/deletion
[chromium-blink-merge.git] / chrome / renderer / safe_browsing / phishing_url_feature_extractor.h
blobdf451364d9be68d7929be4b1d2eb83cac47569e4
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // PhishingUrlFeatureExtractor handles computing URL-based features for
6 // the client-side phishing detection model. These include tokens in the
7 // host and path, features pertaining to host length, and IP addresses.
9 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_URL_FEATURE_EXTRACTOR_H_
10 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_URL_FEATURE_EXTRACTOR_H_
12 #include <string>
13 #include <vector>
15 #include "base/basictypes.h"
17 class GURL;
19 namespace safe_browsing {
20 class FeatureMap;
22 class PhishingUrlFeatureExtractor {
23 public:
24 PhishingUrlFeatureExtractor();
25 ~PhishingUrlFeatureExtractor();
27 // Extracts features for |url| into the given feature map.
28 // Returns true on success.
29 bool ExtractFeatures(const GURL& url, FeatureMap* features);
31 private:
32 friend class PhishingUrlFeatureExtractorTest;
34 static const size_t kMinPathComponentLength = 3;
36 // Given a string, finds all substrings of consecutive alphanumeric
37 // characters of length >= kMinPathComponentLength and inserts them into
38 // tokens.
39 static void SplitStringIntoLongAlphanumTokens(
40 const std::string& full,
41 std::vector<std::string>* tokens);
43 DISALLOW_COPY_AND_ASSIGN(PhishingUrlFeatureExtractor);
46 } // namespace safe_browsing
48 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_URL_FEATURE_EXTRACTOR_H_