Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / components / url_matcher / regex_set_matcher.h
blobe4eb1dc052f1e0bb14be7371f8992a4d7abc02da
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
6 #define COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
13 #include "base/memory/scoped_ptr.h"
14 #include "components/url_matcher/string_pattern.h"
15 #include "components/url_matcher/substring_set_matcher.h"
16 #include "components/url_matcher/url_matcher_export.h"
18 namespace re2 {
19 class FilteredRE2;
22 namespace url_matcher {
24 // Efficiently matches URLs against a collection of regular expressions,
25 // using FilteredRE2 to reduce the number of regexes that must be matched
26 // by pre-filtering with substring matching. See:
27 // http://swtch.com/~rsc/regexp/regexp3.html#analysis
28 class URL_MATCHER_EXPORT RegexSetMatcher {
29 public:
30 RegexSetMatcher();
31 virtual ~RegexSetMatcher();
33 // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds
34 // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple
35 // patterns at once.
36 // Ownership of the patterns remains with the caller.
37 void AddPatterns(const std::vector<const StringPattern*>& regex_list);
39 // Removes all regex patterns.
40 void ClearPatterns();
42 // Appends the IDs of regular expressions in our set that match the |text|
43 // to |matches|.
44 bool Match(const std::string& text,
45 std::set<StringPattern::ID>* matches) const;
47 bool IsEmpty() const;
49 private:
50 typedef int RE2ID;
51 typedef std::map<StringPattern::ID, const StringPattern*> RegexMap;
52 typedef std::vector<StringPattern::ID> RE2IDMap;
54 // Use Aho-Corasick SubstringSetMatcher to find which literal patterns
55 // match the |text|.
56 std::vector<RE2ID> FindSubstringMatches(const std::string& text) const;
58 // Rebuild FilteredRE2 from scratch. Needs to be called whenever
59 // our set of regexes changes.
60 // TODO(yoz): investigate if it could be done incrementally;
61 // apparently not supported by FilteredRE2.
62 void RebuildMatcher();
64 // Clean up StringPatterns in |substring_patterns_|.
65 void DeleteSubstringPatterns();
67 // Mapping of regex StringPattern::IDs to regexes.
68 RegexMap regexes_;
69 // Mapping of RE2IDs from FilteredRE2 (which are assigned in order)
70 // to regex StringPattern::IDs.
71 RE2IDMap re2_id_map_;
73 scoped_ptr<re2::FilteredRE2> filtered_re2_;
74 scoped_ptr<SubstringSetMatcher> substring_matcher_;
76 // The substring patterns from FilteredRE2, which are used in
77 // |substring_matcher_| but whose lifetime is managed here.
78 std::vector<const StringPattern*> substring_patterns_;
81 } // namespace url_matcher
83 #endif // COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_