1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/url_matcher/regex_set_matcher.h"
7 #include "base/logging.h"
8 #include "base/stl_util.h"
9 #include "base/strings/string_util.h"
10 #include "components/url_matcher/substring_set_matcher.h"
11 #include "third_party/re2/re2/filtered_re2.h"
12 #include "third_party/re2/re2/re2.h"
14 namespace url_matcher
{
16 RegexSetMatcher::RegexSetMatcher() {}
18 RegexSetMatcher::~RegexSetMatcher() {
19 DeleteSubstringPatterns();
22 void RegexSetMatcher::AddPatterns(
23 const std::vector
<const StringPattern
*>& regex_list
) {
24 if (regex_list
.empty())
26 for (size_t i
= 0; i
< regex_list
.size(); ++i
) {
27 regexes_
[regex_list
[i
]->id()] = regex_list
[i
];
33 void RegexSetMatcher::ClearPatterns() {
38 bool RegexSetMatcher::Match(const std::string
& text
,
39 std::set
<StringPattern::ID
>* matches
) const {
40 size_t old_number_of_matches
= matches
->size();
43 if (!filtered_re2_
.get()) {
44 LOG(ERROR
) << "RegexSetMatcher was not initialized";
48 // FilteredRE2 expects lowercase for prefiltering, but we still
49 // match case-sensitively.
50 std::vector
<RE2ID
> atoms(FindSubstringMatches(
51 base::StringToLowerASCII(text
)));
53 std::vector
<RE2ID
> re2_ids
;
54 filtered_re2_
->AllMatches(text
, atoms
, &re2_ids
);
56 for (size_t i
= 0; i
< re2_ids
.size(); ++i
) {
57 StringPattern::ID id
= re2_id_map_
[re2_ids
[i
]];
60 return old_number_of_matches
!= matches
->size();
63 bool RegexSetMatcher::IsEmpty() const {
64 return regexes_
.empty();
67 std::vector
<RegexSetMatcher::RE2ID
> RegexSetMatcher::FindSubstringMatches(
68 const std::string
& text
) const {
69 std::set
<int> atoms_set
;
70 substring_matcher_
->Match(text
, &atoms_set
);
71 return std::vector
<RE2ID
>(atoms_set
.begin(), atoms_set
.end());
74 void RegexSetMatcher::RebuildMatcher() {
76 filtered_re2_
.reset(new re2::FilteredRE2());
80 for (RegexMap::iterator it
= regexes_
.begin(); it
!= regexes_
.end(); ++it
) {
82 RE2::ErrorCode error
= filtered_re2_
->Add(
83 it
->second
->pattern(), RE2::DefaultOptions
, &re2_id
);
84 if (error
== RE2::NoError
) {
85 DCHECK_EQ(static_cast<RE2ID
>(re2_id_map_
.size()), re2_id
);
86 re2_id_map_
.push_back(it
->first
);
88 // Unparseable regexes should have been rejected already in
89 // URLMatcherFactory::CreateURLMatchesCondition.
90 LOG(ERROR
) << "Could not parse regex (id=" << it
->first
<< ", "
91 << it
->second
->pattern() << ")";
95 std::vector
<std::string
> strings_to_match
;
96 filtered_re2_
->Compile(&strings_to_match
);
98 substring_matcher_
.reset(new SubstringSetMatcher
);
99 DeleteSubstringPatterns();
100 // Build SubstringSetMatcher from |strings_to_match|.
101 // SubstringSetMatcher doesn't own its strings.
102 for (size_t i
= 0; i
< strings_to_match
.size(); ++i
) {
103 substring_patterns_
.push_back(
104 new StringPattern(strings_to_match
[i
], i
));
106 substring_matcher_
->RegisterPatterns(substring_patterns_
);
109 void RegexSetMatcher::DeleteSubstringPatterns() {
110 STLDeleteElements(&substring_patterns_
);
113 } // namespace url_matcher