Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / ucb / source / regexp / regexp.cxx
blob8b8dcbc85b8f0c6efe648cadadf5491e6d090b58
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <regexp.hxx>
22 #include <cstddef>
24 #include <osl/diagnose.h>
25 #include <com/sun/star/lang/IllegalArgumentException.hpp>
26 #include <rtl/character.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/ustring.hxx>
29 #include <utility>
31 using namespace com::sun::star;
32 using namespace ucb_impl;
35 // Regexp
38 inline Regexp::Regexp(Kind eTheKind, OUString aThePrefix,
39 bool bTheEmptyDomain, OUString aTheInfix,
40 bool bTheTranslation,
41 OUString aTheReversePrefix):
42 m_eKind(eTheKind),
43 m_aPrefix(std::move(aThePrefix)),
44 m_aInfix(std::move(aTheInfix)),
45 m_aReversePrefix(std::move(aTheReversePrefix)),
46 m_bEmptyDomain(bTheEmptyDomain),
47 m_bTranslation(bTheTranslation)
49 OSL_ASSERT(m_eKind == KIND_DOMAIN
50 || (!m_bEmptyDomain && m_aInfix.isEmpty()));
51 OSL_ASSERT(m_bTranslation || m_aReversePrefix.isEmpty());
55 namespace {
57 bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
58 sal_Unicode const * pEnd,
59 OUString const & rString)
61 sal_Unicode const * p = *pBegin;
63 sal_Unicode const * q = rString.getStr();
64 sal_Unicode const * qEnd = q + rString.getLength();
66 if (pEnd - p < qEnd - q)
67 return false;
69 while (q != qEnd)
71 if (rtl::compareIgnoreAsciiCase(*p++, *q++) != 0)
72 return false;
75 *pBegin = p;
76 return true;
81 bool Regexp::matches(OUString const & rString) const
83 sal_Unicode const * pBegin = rString.getStr();
84 sal_Unicode const * pEnd = pBegin + rString.getLength();
86 bool bMatches = false;
88 sal_Unicode const * p = pBegin;
89 if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
91 switch (m_eKind)
93 case KIND_PREFIX:
94 bMatches = true;
95 break;
97 case KIND_AUTHORITY:
98 bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
99 break;
101 case KIND_DOMAIN:
102 if (!m_bEmptyDomain)
104 if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
105 break;
106 ++p;
108 for (;;)
110 sal_Unicode const * q = p;
111 if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
112 && (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
114 bMatches = true;
115 break;
118 if (p == pEnd)
119 break;
121 sal_Unicode c = *p++;
122 if (c == '/' || c == '?' || c == '#')
123 break;
125 break;
129 return bMatches;
133 namespace {
135 bool isScheme(OUString const & rString, bool bColon)
137 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
138 // is true) from RFC 2396:
139 sal_Unicode const * p = rString.getStr();
140 sal_Unicode const * pEnd = p + rString.getLength();
141 if (p != pEnd && rtl::isAsciiAlpha(*p))
142 for (++p;;)
144 if (p == pEnd)
145 return !bColon;
146 sal_Unicode c = *p++;
147 if (!(rtl::isAsciiAlphanumeric(c)
148 || c == '+' || c == '-' || c == '.'))
149 return bColon && c == ':' && p == pEnd;
151 return false;
154 void appendStringLiteral(OUStringBuffer * pBuffer,
155 OUString const & rString)
157 OSL_ASSERT(pBuffer);
159 pBuffer->append('"');
160 sal_Unicode const * p = rString.getStr();
161 sal_Unicode const * pEnd = p + rString.getLength();
162 while (p != pEnd)
164 sal_Unicode c = *p++;
165 if (c == '"' || c == '\\')
166 pBuffer->append('\\');
167 pBuffer->append(c);
169 pBuffer->append('"');
174 OUString Regexp::getRegexp() const
176 if (m_bTranslation)
178 OUStringBuffer aBuffer;
179 if (!m_aPrefix.isEmpty())
180 appendStringLiteral(&aBuffer, m_aPrefix);
181 switch (m_eKind)
183 case KIND_PREFIX:
184 aBuffer.append("(.*)");
185 break;
187 case KIND_AUTHORITY:
188 aBuffer.append("(([/?#].*)?)");
189 break;
191 case KIND_DOMAIN:
192 aBuffer.append("([^/?#]" + OUStringChar(sal_Unicode(m_bEmptyDomain ? '*' : '+')));
193 if (!m_aInfix.isEmpty())
194 appendStringLiteral(&aBuffer, m_aInfix);
195 aBuffer.append("([/?#].*)?)");
196 break;
198 aBuffer.append("->");
199 if (!m_aReversePrefix.isEmpty())
200 appendStringLiteral(&aBuffer, m_aReversePrefix);
201 aBuffer.append("\\1");
202 return aBuffer.makeStringAndClear();
204 else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
205 return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
206 else
208 OUStringBuffer aBuffer;
209 if (!m_aPrefix.isEmpty())
210 appendStringLiteral(&aBuffer, m_aPrefix);
211 switch (m_eKind)
213 case KIND_PREFIX:
214 aBuffer.append(".*");
215 break;
217 case KIND_AUTHORITY:
218 aBuffer.append("([/?#].*)?");
219 break;
221 case KIND_DOMAIN:
222 aBuffer.append("[^/?#]" + OUStringChar( m_bEmptyDomain ? '*' : '+' ));
223 if (!m_aInfix.isEmpty())
224 appendStringLiteral(&aBuffer, m_aInfix);
225 aBuffer.append("([/?#].*)?");
226 break;
228 return aBuffer.makeStringAndClear();
233 namespace {
235 bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
236 char const * pString, size_t nStringLength)
238 sal_Unicode const * p = *pBegin;
240 unsigned char const * q = reinterpret_cast< unsigned char const * >(pString);
241 unsigned char const * qEnd = q + nStringLength;
243 if (pEnd - p < qEnd - q)
244 return false;
246 while (q != qEnd)
248 sal_Unicode c1 = *p++;
249 sal_Unicode c2 = *q++;
250 if (c1 != c2)
251 return false;
254 *pBegin = p;
255 return true;
258 bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
259 OUString * pString)
261 sal_Unicode const * p = *pBegin;
263 if (p == pEnd || *p++ != '"')
264 return false;
266 OUStringBuffer aBuffer;
267 for (;;)
269 if (p == pEnd)
270 return false;
271 sal_Unicode c = *p++;
272 if (c == '"')
273 break;
274 if (c == '\\')
276 if (p == pEnd)
277 return false;
278 c = *p++;
279 if (c != '"' && c != '\\')
280 return false;
282 aBuffer.append(c);
285 *pBegin = p;
286 *pString = aBuffer.makeStringAndClear();
287 return true;
292 Regexp Regexp::parse(OUString const & rRegexp)
294 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
295 // where <scheme> is as defined in RFC 2396:
296 if (isScheme(rRegexp, false))
297 return Regexp(Regexp::KIND_PREFIX,
298 rRegexp + ":",
299 false,
300 OUString(),
301 false,
302 OUString());
304 sal_Unicode const * p = rRegexp.getStr();
305 sal_Unicode const * pEnd = p + rRegexp.getLength();
307 OUString aPrefix;
308 scanStringLiteral(&p, pEnd, &aPrefix);
310 if (p == pEnd)
311 throw lang::IllegalArgumentException();
313 // This and the matchString() calls below are some of the few places where
314 // RTL_CONSTASCII_STRINGPARAM() should NOT be removed.
315 // (c.f. https://gerrit.libreoffice.org/3117)
316 if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
318 if (p != pEnd)
319 throw lang::IllegalArgumentException();
321 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, OUString(),
322 false, OUString());
324 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
326 OUString aReversePrefix;
327 scanStringLiteral(&p, pEnd, &aReversePrefix);
329 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
330 || p != pEnd)
331 throw lang::IllegalArgumentException();
333 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, OUString(),
334 true, aReversePrefix);
336 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
338 if (p != pEnd)
339 throw lang::IllegalArgumentException();
341 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, OUString(),
342 false, OUString());
344 else if (matchString(&p, pEnd,
345 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
347 OUString aReversePrefix;
348 if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
349 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
350 && p == pEnd))
351 throw lang::IllegalArgumentException();
353 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, OUString(),
354 true, aReversePrefix);
356 else
358 bool bOpen = false;
359 if (p != pEnd && *p == '(')
361 ++p;
362 bOpen = true;
365 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
366 throw lang::IllegalArgumentException();
368 if (p == pEnd || (*p != '*' && *p != '+'))
369 throw lang::IllegalArgumentException();
370 bool bEmptyDomain = *p++ == '*';
372 OUString aInfix;
373 scanStringLiteral(&p, pEnd, &aInfix);
375 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
376 throw lang::IllegalArgumentException();
378 OUString aReversePrefix;
379 if (bOpen
380 && !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
381 && scanStringLiteral(&p, pEnd, &aReversePrefix)
382 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
383 throw lang::IllegalArgumentException();
385 if (p != pEnd)
386 throw lang::IllegalArgumentException();
388 return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
389 bOpen, aReversePrefix);
393 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */