Roll src/third_party/WebKit c63b89c:29324ab (svn 202546:202547)
[chromium-blink-merge.git] / components / url_matcher / regex_set_matcher.cc
blobae994db85dcc89dc5fc8f367f11474a65cf0528a
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/url_matcher/regex_set_matcher.h"
7 #include "base/logging.h"
8 #include "base/stl_util.h"
9 #include "base/strings/string_util.h"
10 #include "components/url_matcher/substring_set_matcher.h"
11 #include "third_party/re2/re2/filtered_re2.h"
12 #include "third_party/re2/re2/re2.h"
14 namespace url_matcher {
16 RegexSetMatcher::RegexSetMatcher() {}
18 RegexSetMatcher::~RegexSetMatcher() {
19 DeleteSubstringPatterns();
22 void RegexSetMatcher::AddPatterns(
23 const std::vector<const StringPattern*>& regex_list) {
24 if (regex_list.empty())
25 return;
26 for (size_t i = 0; i < regex_list.size(); ++i) {
27 regexes_[regex_list[i]->id()] = regex_list[i];
30 RebuildMatcher();
33 void RegexSetMatcher::ClearPatterns() {
34 regexes_.clear();
35 RebuildMatcher();
38 bool RegexSetMatcher::Match(const std::string& text,
39 std::set<StringPattern::ID>* matches) const {
40 size_t old_number_of_matches = matches->size();
41 if (regexes_.empty())
42 return false;
43 if (!filtered_re2_.get()) {
44 LOG(ERROR) << "RegexSetMatcher was not initialized";
45 return false;
48 // FilteredRE2 expects lowercase for prefiltering, but we still
49 // match case-sensitively.
50 std::vector<RE2ID> atoms(FindSubstringMatches(base::ToLowerASCII(text)));
52 std::vector<RE2ID> re2_ids;
53 filtered_re2_->AllMatches(text, atoms, &re2_ids);
55 for (size_t i = 0; i < re2_ids.size(); ++i) {
56 StringPattern::ID id = re2_id_map_[re2_ids[i]];
57 matches->insert(id);
59 return old_number_of_matches != matches->size();
62 bool RegexSetMatcher::IsEmpty() const {
63 return regexes_.empty();
66 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
67 const std::string& text) const {
68 std::set<int> atoms_set;
69 substring_matcher_->Match(text, &atoms_set);
70 return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
73 void RegexSetMatcher::RebuildMatcher() {
74 re2_id_map_.clear();
75 filtered_re2_.reset(new re2::FilteredRE2());
76 if (regexes_.empty())
77 return;
79 for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
80 RE2ID re2_id;
81 RE2::ErrorCode error = filtered_re2_->Add(
82 it->second->pattern(), RE2::DefaultOptions, &re2_id);
83 if (error == RE2::NoError) {
84 DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
85 re2_id_map_.push_back(it->first);
86 } else {
87 // Unparseable regexes should have been rejected already in
88 // URLMatcherFactory::CreateURLMatchesCondition.
89 LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
90 << it->second->pattern() << ")";
94 std::vector<std::string> strings_to_match;
95 filtered_re2_->Compile(&strings_to_match);
97 substring_matcher_.reset(new SubstringSetMatcher);
98 DeleteSubstringPatterns();
99 // Build SubstringSetMatcher from |strings_to_match|.
100 // SubstringSetMatcher doesn't own its strings.
101 for (size_t i = 0; i < strings_to_match.size(); ++i) {
102 substring_patterns_.push_back(
103 new StringPattern(strings_to_match[i], i));
105 substring_matcher_->RegisterPatterns(substring_patterns_);
108 void RegexSetMatcher::DeleteSubstringPatterns() {
109 STLDeleteElements(&substring_patterns_);
112 } // namespace url_matcher