Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / ucb / source / regexp / regexp.cxx
blobdb3934bcb79a74489d0f22fb8f63b65756beee57
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <regexp.hxx>
22 #include <cstddef>
24 #include "osl/diagnose.h"
25 #include <com/sun/star/lang/IllegalArgumentException.hpp>
26 #include <rtl/ustrbuf.hxx>
27 #include <rtl/ustring.hxx>
28 #include <comphelper/string.hxx>
30 namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp;
31 // unnamed namespaces don't work well yet...
33 using namespace com::sun::star;
34 using namespace ucb_impl;
36 //============================================================================
38 // Regexp
40 //============================================================================
42 inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix,
43 bool bTheEmptyDomain, rtl::OUString const & rTheInfix,
44 bool bTheTranslation,
45 rtl::OUString const & rTheReversePrefix):
46 m_eKind(eTheKind),
47 m_aPrefix(rThePrefix),
48 m_aInfix(rTheInfix),
49 m_aReversePrefix(rTheReversePrefix),
50 m_bEmptyDomain(bTheEmptyDomain),
51 m_bTranslation(bTheTranslation)
53 OSL_ASSERT(m_eKind == KIND_DOMAIN
54 || (!m_bEmptyDomain && m_aInfix.isEmpty()));
55 OSL_ASSERT(m_bTranslation || m_aReversePrefix.isEmpty());
58 //============================================================================
59 namespace unnamed_ucb_regexp {
61 bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
62 sal_Unicode const * pEnd,
63 rtl::OUString const & rString)
65 sal_Unicode const * p = *pBegin;
67 sal_Unicode const * q = rString.getStr();
68 sal_Unicode const * qEnd = q + rString.getLength();
70 if (pEnd - p < qEnd - q)
71 return false;
73 while (q != qEnd)
75 sal_Unicode c1 = *p++;
76 sal_Unicode c2 = *q++;
77 if (c1 >= 'a' && c1 <= 'z')
78 c1 -= 'a' - 'A';
79 if (c2 >= 'a' && c2 <= 'z')
80 c2 -= 'a' - 'A';
81 if (c1 != c2)
82 return false;
85 *pBegin = p;
86 return true;
91 bool Regexp::matches(rtl::OUString const & rString,
92 rtl::OUString * pTranslation, bool * pTranslated) const
94 sal_Unicode const * pBegin = rString.getStr();
95 sal_Unicode const * pEnd = pBegin + rString.getLength();
97 bool bMatches = false;
99 sal_Unicode const * p = pBegin;
100 if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
102 sal_Unicode const * pBlock1Begin = p;
103 sal_Unicode const * pBlock1End = pEnd;
105 sal_Unicode const * pBlock2Begin = 0;
106 sal_Unicode const * pBlock2End = 0;
108 switch (m_eKind)
110 case KIND_PREFIX:
111 bMatches = true;
112 break;
114 case KIND_AUTHORITY:
115 bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
116 break;
118 case KIND_DOMAIN:
119 if (!m_bEmptyDomain)
121 if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
122 break;
123 ++p;
125 for (;;)
127 sal_Unicode const * q = p;
128 if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
129 && (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
131 bMatches = true;
132 pBlock1End = p;
133 pBlock2Begin = q;
134 pBlock2End = pEnd;
135 break;
138 if (p == pEnd)
139 break;
141 sal_Unicode c = *p++;
142 if (c == '/' || c == '?' || c == '#')
143 break;
145 break;
148 if (bMatches)
150 if (m_bTranslation)
152 if (pTranslation)
154 rtl::OUStringBuffer aBuffer(m_aReversePrefix);
155 aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin);
156 aBuffer.append(m_aInfix);
157 aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin);
158 *pTranslation = aBuffer.makeStringAndClear();
160 if (pTranslated)
161 *pTranslated = true;
163 else
165 if (pTranslation)
166 *pTranslation = rString;
167 if (pTranslated)
168 *pTranslated = false;
173 return bMatches;
176 //============================================================================
177 namespace unnamed_ucb_regexp {
179 bool isScheme(rtl::OUString const & rString, bool bColon)
181 using comphelper::string::isalphaAscii;
182 using comphelper::string::isdigitAscii;
183 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
184 // is true) from RFC 2396:
185 sal_Unicode const * p = rString.getStr();
186 sal_Unicode const * pEnd = p + rString.getLength();
187 if (p != pEnd && isalphaAscii(*p))
188 for (++p;;)
190 if (p == pEnd)
191 return !bColon;
192 sal_Unicode c = *p++;
193 if (!(isalphaAscii(c) || isdigitAscii(c)
194 || c == '+' || c == '-' || c == '.'))
195 return bColon && c == ':' && p == pEnd;
197 return false;
200 void appendStringLiteral(rtl::OUStringBuffer * pBuffer,
201 rtl::OUString const & rString)
203 OSL_ASSERT(pBuffer);
205 pBuffer->append(sal_Unicode('"'));
206 sal_Unicode const * p = rString.getStr();
207 sal_Unicode const * pEnd = p + rString.getLength();
208 while (p != pEnd)
210 sal_Unicode c = *p++;
211 if (c == '"' || c == '\\')
212 pBuffer->append(sal_Unicode('\\'));
213 pBuffer->append(c);
215 pBuffer->append(sal_Unicode('"'));
220 rtl::OUString Regexp::getRegexp(bool bReverse) const
222 if (m_bTranslation)
224 rtl::OUStringBuffer aBuffer;
225 if (bReverse)
227 if (!m_aReversePrefix.isEmpty())
228 appendStringLiteral(&aBuffer, m_aReversePrefix);
230 else
232 if (!m_aPrefix.isEmpty())
233 appendStringLiteral(&aBuffer, m_aPrefix);
235 switch (m_eKind)
237 case KIND_PREFIX:
238 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
239 break;
241 case KIND_AUTHORITY:
242 aBuffer.
243 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
244 break;
246 case KIND_DOMAIN:
247 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
248 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
249 if (!m_aInfix.isEmpty())
250 appendStringLiteral(&aBuffer, m_aInfix);
251 aBuffer.
252 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
253 break;
255 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
256 if (bReverse)
258 if (!m_aPrefix.isEmpty())
259 appendStringLiteral(&aBuffer, m_aPrefix);
261 else
263 if (!m_aReversePrefix.isEmpty())
264 appendStringLiteral(&aBuffer, m_aReversePrefix);
266 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
267 return aBuffer.makeStringAndClear();
269 else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
270 return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
271 else
273 rtl::OUStringBuffer aBuffer;
274 if (!m_aPrefix.isEmpty())
275 appendStringLiteral(&aBuffer, m_aPrefix);
276 switch (m_eKind)
278 case KIND_PREFIX:
279 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
280 break;
282 case KIND_AUTHORITY:
283 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
284 break;
286 case KIND_DOMAIN:
287 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
288 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
289 if (!m_aInfix.isEmpty())
290 appendStringLiteral(&aBuffer, m_aInfix);
291 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
292 break;
294 return aBuffer.makeStringAndClear();
298 //============================================================================
299 namespace unnamed_ucb_regexp {
301 bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
302 sal_Char const * pString, size_t nStringLength)
304 sal_Unicode const * p = *pBegin;
306 sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString);
307 sal_uChar const * qEnd = q + nStringLength;
309 if (pEnd - p < qEnd - q)
310 return false;
312 while (q != qEnd)
314 sal_Unicode c1 = *p++;
315 sal_Unicode c2 = *q++;
316 if (c1 != c2)
317 return false;
320 *pBegin = p;
321 return true;
324 bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
325 rtl::OUString * pString)
327 sal_Unicode const * p = *pBegin;
329 if (p == pEnd || *p++ != '"')
330 return false;
332 rtl::OUStringBuffer aBuffer;
333 for (;;)
335 if (p == pEnd)
336 return false;
337 sal_Unicode c = *p++;
338 if (c == '"')
339 break;
340 if (c == '\\')
342 if (p == pEnd)
343 return false;
344 c = *p++;
345 if (c != '"' && c != '\\')
346 return false;
348 aBuffer.append(c);
351 *pBegin = p;
352 *pString = aBuffer.makeStringAndClear();
353 return true;
358 Regexp Regexp::parse(rtl::OUString const & rRegexp)
360 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
361 // where <scheme> is as defined in RFC 2396:
362 if (isScheme(rRegexp, false))
363 return Regexp(Regexp::KIND_PREFIX,
364 rRegexp
365 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
366 false,
367 rtl::OUString(),
368 false,
369 rtl::OUString());
371 sal_Unicode const * p = rRegexp.getStr();
372 sal_Unicode const * pEnd = p + rRegexp.getLength();
374 rtl::OUString aPrefix;
375 scanStringLiteral(&p, pEnd, &aPrefix);
377 if (p == pEnd)
378 throw lang::IllegalArgumentException();
380 if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
382 if (p != pEnd)
383 throw lang::IllegalArgumentException();
385 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
386 false, rtl::OUString());
388 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
390 rtl::OUString aReversePrefix;
391 scanStringLiteral(&p, pEnd, &aReversePrefix);
393 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
394 || p != pEnd)
395 throw lang::IllegalArgumentException();
397 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
398 true, aReversePrefix);
400 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
402 if (p != pEnd)
403 throw lang::IllegalArgumentException();
405 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
406 false, rtl::OUString());
408 else if (matchString(&p, pEnd,
409 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
411 rtl::OUString aReversePrefix;
412 if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
413 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
414 && p == pEnd))
415 throw lang::IllegalArgumentException();
417 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
418 true, aReversePrefix);
420 else
422 bool bOpen = false;
423 if (p != pEnd && *p == '(')
425 ++p;
426 bOpen = true;
429 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
430 throw lang::IllegalArgumentException();
432 if (p == pEnd || (*p != '*' && *p != '+'))
433 throw lang::IllegalArgumentException();
434 bool bEmptyDomain = *p++ == '*';
436 rtl::OUString aInfix;
437 scanStringLiteral(&p, pEnd, &aInfix);
439 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
440 throw lang::IllegalArgumentException();
442 rtl::OUString aReversePrefix;
443 if (bOpen
444 && !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
445 && scanStringLiteral(&p, pEnd, &aReversePrefix)
446 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
447 throw lang::IllegalArgumentException();
449 if (p != pEnd)
450 throw lang::IllegalArgumentException();
452 return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
453 bOpen, aReversePrefix);
457 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */