don't discard iframe children.
[kdelibs.git] / khtml / khtml_filter.cpp
blob622d8451e4b8c50e1bd5edb2300dcd087d05ac5e
1 /* This file is part of the KDE project
3 Copyright (C) 2005 Ivor Hewitt <ivor@kde.org>
4 Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
5 Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with this library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA.
23 #include "khtml_filter_p.h"
24 #include <QDebug>
26 // rolling hash parameters
27 #define HASH_P (1997)
28 #define HASH_Q (17509)
29 // HASH_MOD = (HASH_P^7) % HASH_Q
30 #define HASH_MOD (523)
32 namespace khtml {
34 void FilterSet::addFilter(const QString& filterStr)
36 QString filter = filterStr;
38 if (filter.startsWith(QLatin1Char('!')))
39 return;
41 // Strip leading @@
42 int first = 0;
43 int last = filter.length() - 1;
44 if (filter.startsWith(QLatin1String("@@")))
45 first = 2;
47 // Strip options, we ignore them for now.
48 int dollar = filter.lastIndexOf(QLatin1Char('$'));
49 if (dollar != -1)
50 last = dollar - 1;
52 // Perhaps nothing left?
53 if (first > last)
54 return;
56 filter = filter.mid(first, last - first + 1);
58 // Is it a regexp filter?
59 if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/')))
61 QString inside = filter.mid(1, filter.length()-2);
62 QRegExp rx(inside);
63 reFilters.append(rx);
64 // qDebug() << "R:" << inside;
66 else
68 // Nope, a wildcard one.
69 // Note: For these, we also need to handle |.
71 // Strip wildcards at the ends
72 first = 0;
73 last = filter.length() - 1;
75 while (first < filter.length() && filter[first] == QLatin1Char('*'))
76 ++first;
78 while (last >= 0 && filter[last] == QLatin1Char('*'))
79 --last;
81 if (first > last)
82 filter = QLatin1String("*"); // erm... Well, they asked for it.
83 else
84 filter = filter.mid(first, last - first + 1);
86 // Now, do we still have any wildcard stuff left?
87 if (filter.contains("*") || filter.contains("?"))
89 // qDebug() << "W:" << filter;
90 // check if we can use RK first (and then check full RE for the rest) for better performance
91 int aPos = filter.indexOf('*');
92 if (aPos < 0)
93 aPos = filter.length();
94 int qPos = filter.indexOf('?');
95 if (qPos < 0)
96 qPos = filter.length();
97 int pos = qMin(aPos, qPos);
98 if (pos > 7) {
99 QRegExp rx;
101 rx.setPatternSyntax(QRegExp::Wildcard);
102 rx.setPattern(filter.mid(pos));
104 stringFiltersMatcher.addWildedString(filter.mid(0, pos), rx);
106 } else {
107 QRegExp rx;
109 rx.setPatternSyntax(QRegExp::Wildcard);
110 rx.setPattern(filter);
111 reFilters.append(rx);
114 else
116 // Fast path
117 stringFiltersMatcher.addString(filter);
122 bool FilterSet::isUrlMatched(const QString& url)
124 if (stringFiltersMatcher.isMatched(url))
125 return true;
127 for (int c = 0; c < reFilters.size(); ++c)
129 if (url.contains(reFilters[c]))
130 return true;
133 return false;
136 void FilterSet::clear()
138 reFilters.clear();
139 stringFiltersMatcher.clear();
143 void StringsMatcher::addString(const QString& pattern)
145 if (pattern.length() < 8) {
146 // handle short string differently
147 shortStringFilters.append(pattern);
148 } else {
149 // use modified Rabin-Karp's algorithm with 8-length string hash
150 // i.e. store hash of first 8 chars in the HashMap for fast look-up
151 stringFilters.append(pattern);
152 int ind = stringFilters.size() - 1;
153 int current = 0;
155 // compute hash using rolling hash
156 // hash for string: x0,x1,x2...xn-1 will be:
157 // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
158 // where p and q some wisely-chosen integers
159 /*for (int k = 0; k < 8; ++k)*/
160 int len = pattern.length();
161 for (int k = len - 8; k < len; ++k)
162 current = (current * HASH_P + pattern[k].unicode()) % HASH_Q;
164 // insert computed hash value into HashMap
165 WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
166 if (it == stringFiltersHash.end()) {
167 QVector<int> list;
168 list.append(ind);
169 stringFiltersHash.add(current + 1, list);
170 fastLookUp.setBit(current);
171 } else {
172 it->second.append(ind);
177 void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx)
179 rePrefixes.append(prefix);
180 reFilters.append(rx);
181 int index = -rePrefixes.size();
183 int current = 0;
184 for (int k = 0; k < 8; ++k)
185 current = (current * HASH_P + prefix[k].unicode()) % HASH_Q;
187 // insert computed hash value into HashMap
188 WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
189 if (it == stringFiltersHash.end()) {
190 QVector<int> list;
191 list.append(index);
192 stringFiltersHash.add(current + 1, list);
193 fastLookUp.setBit(current);
194 } else {
195 it->second.append(index);
199 bool StringsMatcher::isMatched(const QString& str) const
201 // check short strings first
202 for (int i = 0; i < shortStringFilters.size(); ++i) {
203 if (str.contains(shortStringFilters[i]))
204 return true;
207 int len = str.length();
208 int k;
210 int current = 0;
211 int next = 0;
212 // compute hash for first 8 characters
213 for (k = 0; k < 8 && k < len; ++k)
214 current = (current * HASH_P + str[k].unicode()) % HASH_Q;
216 WTF::HashMap<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
217 // main Rabin-Karp's algorithm loop
218 for (k = 7; k < len; ++k, current = next) {
219 // roll the hash if not at the end
220 // (calculate hash for the next iteration)
221 if (k + 1 < len)
222 next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q;
224 if (!fastLookUp.testBit(current))
225 continue;
227 // look-up the hash in the HashMap and check all strings
228 WTF::HashMap<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
230 // check possible strings
231 if (it != hashEnd) {
232 for (int j = 0; j < it->second.size(); ++j) {
233 int index = it->second[j];
234 // check if we got simple string or REs prefix
235 if (index >= 0) {
236 int flen = stringFilters[index].length();
237 if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen))
238 return true;
239 } else {
240 index = -index - 1;
241 int flen = rePrefixes[index].length();
242 if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen) &&
243 str.indexOf(reFilters[index], k - 7 + flen) == k - 7 + flen)
244 return true;
250 return false;
253 void StringsMatcher::clear()
255 stringFilters.clear();
256 shortStringFilters.clear();
257 reFilters.clear();
258 rePrefixes.clear();
259 stringFiltersHash.clear();
260 fastLookUp.resize(HASH_Q);
261 fastLookUp.fill(0, 0, HASH_Q);
266 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;