1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
6 #define COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
13 #include "base/memory/scoped_ptr.h"
14 #include "components/url_matcher/string_pattern.h"
15 #include "components/url_matcher/substring_set_matcher.h"
16 #include "components/url_matcher/url_matcher_export.h"
22 namespace url_matcher
{
24 // Efficiently matches URLs against a collection of regular expressions,
25 // using FilteredRE2 to reduce the number of regexes that must be matched
26 // by pre-filtering with substring matching. See:
27 // http://swtch.com/~rsc/regexp/regexp3.html#analysis
28 class URL_MATCHER_EXPORT RegexSetMatcher
{
31 virtual ~RegexSetMatcher();
33 // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds
34 // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple
36 // Ownership of the patterns remains with the caller.
37 void AddPatterns(const std::vector
<const StringPattern
*>& regex_list
);
39 // Removes all regex patterns.
42 // Appends the IDs of regular expressions in our set that match the |text|
44 bool Match(const std::string
& text
,
45 std::set
<StringPattern::ID
>* matches
) const;
51 typedef std::map
<StringPattern::ID
, const StringPattern
*> RegexMap
;
52 typedef std::vector
<StringPattern::ID
> RE2IDMap
;
54 // Use Aho-Corasick SubstringSetMatcher to find which literal patterns
56 std::vector
<RE2ID
> FindSubstringMatches(const std::string
& text
) const;
58 // Rebuild FilteredRE2 from scratch. Needs to be called whenever
59 // our set of regexes changes.
60 // TODO(yoz): investigate if it could be done incrementally;
61 // apparently not supported by FilteredRE2.
62 void RebuildMatcher();
64 // Clean up StringPatterns in |substring_patterns_|.
65 void DeleteSubstringPatterns();
67 // Mapping of regex StringPattern::IDs to regexes.
69 // Mapping of RE2IDs from FilteredRE2 (which are assigned in order)
70 // to regex StringPattern::IDs.
73 scoped_ptr
<re2::FilteredRE2
> filtered_re2_
;
74 scoped_ptr
<SubstringSetMatcher
> substring_matcher_
;
76 // The substring patterns from FilteredRE2, which are used in
77 // |substring_matcher_| but whose lifetime is managed here.
78 std::vector
<const StringPattern
*> substring_patterns_
;
81 } // namespace url_matcher
83 #endif // COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_