1 /* This file is part of the KDE project
3 Copyright (C) 2005 Ivor Hewitt <ivor@kde.org>
4 Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
5 Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with this library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA.
23 #include "khtml_filter_p.h"
26 // rolling hash parameters
28 #define HASH_Q (17509)
29 // HASH_MOD = (HASH_P^7) % HASH_Q
30 #define HASH_MOD (523)
34 void FilterSet::addFilter(const QString
& filterStr
)
36 QString filter
= filterStr
;
38 if (filter
.startsWith(QLatin1Char('!')))
43 int last
= filter
.length() - 1;
44 if (filter
.startsWith(QLatin1String("@@")))
47 // Strip options, we ignore them for now.
48 int dollar
= filter
.lastIndexOf(QLatin1Char('$'));
52 // Perhaps nothing left?
56 filter
= filter
.mid(first
, last
- first
+ 1);
58 // Is it a regexp filter?
59 if (filter
.length()>2 && filter
.startsWith(QLatin1Char('/')) && filter
.endsWith(QLatin1Char('/')))
61 QString inside
= filter
.mid(1, filter
.length()-2);
64 // qDebug() << "R:" << inside;
68 // Nope, a wildcard one.
69 // Note: For these, we also need to handle |.
71 // Strip wildcards at the ends
73 last
= filter
.length() - 1;
75 while (first
< filter
.length() && filter
[first
] == QLatin1Char('*'))
78 while (last
>= 0 && filter
[last
] == QLatin1Char('*'))
82 filter
= QLatin1String("*"); // erm... Well, they asked for it.
84 filter
= filter
.mid(first
, last
- first
+ 1);
86 // Now, do we still have any wildcard stuff left?
87 if (filter
.contains("*") || filter
.contains("?"))
89 // qDebug() << "W:" << filter;
90 // check if we can use RK first (and then check full RE for the rest) for better performance
91 int aPos
= filter
.indexOf('*');
93 aPos
= filter
.length();
94 int qPos
= filter
.indexOf('?');
96 qPos
= filter
.length();
97 int pos
= qMin(aPos
, qPos
);
101 rx
.setPatternSyntax(QRegExp::Wildcard
);
102 rx
.setPattern(filter
.mid(pos
));
104 stringFiltersMatcher
.addWildedString(filter
.mid(0, pos
), rx
);
109 rx
.setPatternSyntax(QRegExp::Wildcard
);
110 rx
.setPattern(filter
);
111 reFilters
.append(rx
);
117 stringFiltersMatcher
.addString(filter
);
122 bool FilterSet::isUrlMatched(const QString
& url
)
124 if (stringFiltersMatcher
.isMatched(url
))
127 for (int c
= 0; c
< reFilters
.size(); ++c
)
129 if (url
.contains(reFilters
[c
]))
136 void FilterSet::clear()
139 stringFiltersMatcher
.clear();
143 void StringsMatcher::addString(const QString
& pattern
)
145 if (pattern
.length() < 8) {
146 // handle short string differently
147 shortStringFilters
.append(pattern
);
149 // use modified Rabin-Karp's algorithm with 8-length string hash
150 // i.e. store hash of first 8 chars in the HashMap for fast look-up
151 stringFilters
.append(pattern
);
152 int ind
= stringFilters
.size() - 1;
155 // compute hash using rolling hash
156 // hash for string: x0,x1,x2...xn-1 will be:
157 // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
158 // where p and q some wisely-chosen integers
159 /*for (int k = 0; k < 8; ++k)*/
160 int len
= pattern
.length();
161 for (int k
= len
- 8; k
< len
; ++k
)
162 current
= (current
* HASH_P
+ pattern
[k
].unicode()) % HASH_Q
;
164 // insert computed hash value into HashMap
165 WTF::HashMap
<int, QVector
<int> >::iterator it
= stringFiltersHash
.find(current
+ 1);
166 if (it
== stringFiltersHash
.end()) {
169 stringFiltersHash
.add(current
+ 1, list
);
170 fastLookUp
.setBit(current
);
172 it
->second
.append(ind
);
177 void StringsMatcher::addWildedString(const QString
& prefix
, const QRegExp
& rx
)
179 rePrefixes
.append(prefix
);
180 reFilters
.append(rx
);
181 int index
= -rePrefixes
.size();
184 for (int k
= 0; k
< 8; ++k
)
185 current
= (current
* HASH_P
+ prefix
[k
].unicode()) % HASH_Q
;
187 // insert computed hash value into HashMap
188 WTF::HashMap
<int, QVector
<int> >::iterator it
= stringFiltersHash
.find(current
+ 1);
189 if (it
== stringFiltersHash
.end()) {
192 stringFiltersHash
.add(current
+ 1, list
);
193 fastLookUp
.setBit(current
);
195 it
->second
.append(index
);
199 bool StringsMatcher::isMatched(const QString
& str
) const
201 // check short strings first
202 for (int i
= 0; i
< shortStringFilters
.size(); ++i
) {
203 if (str
.contains(shortStringFilters
[i
]))
207 int len
= str
.length();
212 // compute hash for first 8 characters
213 for (k
= 0; k
< 8 && k
< len
; ++k
)
214 current
= (current
* HASH_P
+ str
[k
].unicode()) % HASH_Q
;
216 WTF::HashMap
<int, QVector
<int> >::const_iterator hashEnd
= stringFiltersHash
.end();
217 // main Rabin-Karp's algorithm loop
218 for (k
= 7; k
< len
; ++k
, current
= next
) {
219 // roll the hash if not at the end
220 // (calculate hash for the next iteration)
222 next
= (HASH_P
* ((current
+ HASH_Q
- ((HASH_MOD
* str
[k
- 7].unicode()) % HASH_Q
)) % HASH_Q
) + str
[k
+ 1].unicode()) % HASH_Q
;
224 if (!fastLookUp
.testBit(current
))
227 // look-up the hash in the HashMap and check all strings
228 WTF::HashMap
<int, QVector
<int> >::const_iterator it
= stringFiltersHash
.find(current
+ 1);
230 // check possible strings
232 for (int j
= 0; j
< it
->second
.size(); ++j
) {
233 int index
= it
->second
[j
];
234 // check if we got simple string or REs prefix
236 int flen
= stringFilters
[index
].length();
237 if (k
- flen
+ 1 >= 0 && stringFilters
[index
] == str
.midRef(k
- flen
+ 1 , flen
))
241 int flen
= rePrefixes
[index
].length();
242 if (k
- 8 + flen
< len
&& rePrefixes
[index
] == str
.midRef(k
- 7, flen
) &&
243 str
.indexOf(reFilters
[index
], k
- 7 + flen
) == k
- 7 + flen
)
253 void StringsMatcher::clear()
255 stringFilters
.clear();
256 shortStringFilters
.clear();
259 stringFiltersHash
.clear();
260 fastLookUp
.resize(HASH_Q
);
261 fastLookUp
.fill(0, 0, HASH_Q
);
266 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;