1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/url_matcher/regex_set_matcher.h"
7 #include "base/logging.h"
8 #include "base/stl_util.h"
9 #include "base/strings/string_util.h"
10 #include "components/url_matcher/substring_set_matcher.h"
11 #include "third_party/re2/re2/filtered_re2.h"
12 #include "third_party/re2/re2/re2.h"
14 namespace url_matcher
{
16 RegexSetMatcher::RegexSetMatcher() {}
18 RegexSetMatcher::~RegexSetMatcher() {
19 DeleteSubstringPatterns();
22 void RegexSetMatcher::AddPatterns(
23 const std::vector
<const StringPattern
*>& regex_list
) {
24 if (regex_list
.empty())
26 for (size_t i
= 0; i
< regex_list
.size(); ++i
) {
27 regexes_
[regex_list
[i
]->id()] = regex_list
[i
];
33 void RegexSetMatcher::ClearPatterns() {
38 bool RegexSetMatcher::Match(const std::string
& text
,
39 std::set
<StringPattern::ID
>* matches
) const {
40 size_t old_number_of_matches
= matches
->size();
43 if (!filtered_re2_
.get()) {
44 LOG(ERROR
) << "RegexSetMatcher was not initialized";
48 // FilteredRE2 expects lowercase for prefiltering, but we still
49 // match case-sensitively.
50 std::vector
<RE2ID
> atoms(FindSubstringMatches(base::ToLowerASCII(text
)));
52 std::vector
<RE2ID
> re2_ids
;
53 filtered_re2_
->AllMatches(text
, atoms
, &re2_ids
);
55 for (size_t i
= 0; i
< re2_ids
.size(); ++i
) {
56 StringPattern::ID id
= re2_id_map_
[re2_ids
[i
]];
59 return old_number_of_matches
!= matches
->size();
62 bool RegexSetMatcher::IsEmpty() const {
63 return regexes_
.empty();
66 std::vector
<RegexSetMatcher::RE2ID
> RegexSetMatcher::FindSubstringMatches(
67 const std::string
& text
) const {
68 std::set
<int> atoms_set
;
69 substring_matcher_
->Match(text
, &atoms_set
);
70 return std::vector
<RE2ID
>(atoms_set
.begin(), atoms_set
.end());
73 void RegexSetMatcher::RebuildMatcher() {
75 filtered_re2_
.reset(new re2::FilteredRE2());
79 for (RegexMap::iterator it
= regexes_
.begin(); it
!= regexes_
.end(); ++it
) {
81 RE2::ErrorCode error
= filtered_re2_
->Add(
82 it
->second
->pattern(), RE2::DefaultOptions
, &re2_id
);
83 if (error
== RE2::NoError
) {
84 DCHECK_EQ(static_cast<RE2ID
>(re2_id_map_
.size()), re2_id
);
85 re2_id_map_
.push_back(it
->first
);
87 // Unparseable regexes should have been rejected already in
88 // URLMatcherFactory::CreateURLMatchesCondition.
89 LOG(ERROR
) << "Could not parse regex (id=" << it
->first
<< ", "
90 << it
->second
->pattern() << ")";
94 std::vector
<std::string
> strings_to_match
;
95 filtered_re2_
->Compile(&strings_to_match
);
97 substring_matcher_
.reset(new SubstringSetMatcher
);
98 DeleteSubstringPatterns();
99 // Build SubstringSetMatcher from |strings_to_match|.
100 // SubstringSetMatcher doesn't own its strings.
101 for (size_t i
= 0; i
< strings_to_match
.size(); ++i
) {
102 substring_patterns_
.push_back(
103 new StringPattern(strings_to_match
[i
], i
));
105 substring_matcher_
->RegisterPatterns(substring_patterns_
);
108 void RegexSetMatcher::DeleteSubstringPatterns() {
109 STLDeleteElements(&substring_patterns_
);
112 } // namespace url_matcher