Bump for 3.6-28
[LibreOffice.git] / ucb / source / regexp / regexp.cxx
blob69e5b7afaba7d62d8bfc1522cbb2842da314c895
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include <regexp.hxx>
31 #include <cstddef>
33 #include "osl/diagnose.h"
34 #include <com/sun/star/lang/IllegalArgumentException.hpp>
35 #include <rtl/ustrbuf.hxx>
36 #include <rtl/ustring.hxx>
37 #include <comphelper/string.hxx>
39 namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp;
40 // unnamed namespaces don't work well yet...
42 using namespace com::sun::star;
43 using namespace ucb_impl;
45 //============================================================================
47 // Regexp
49 //============================================================================
51 inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix,
52 bool bTheEmptyDomain, rtl::OUString const & rTheInfix,
53 bool bTheTranslation,
54 rtl::OUString const & rTheReversePrefix):
55 m_eKind(eTheKind),
56 m_aPrefix(rThePrefix),
57 m_aInfix(rTheInfix),
58 m_aReversePrefix(rTheReversePrefix),
59 m_bEmptyDomain(bTheEmptyDomain),
60 m_bTranslation(bTheTranslation)
62 OSL_ASSERT(m_eKind == KIND_DOMAIN
63 || (!m_bEmptyDomain && m_aInfix.isEmpty()));
64 OSL_ASSERT(m_bTranslation || m_aReversePrefix.isEmpty());
67 //============================================================================
68 namespace unnamed_ucb_regexp {
70 bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
71 sal_Unicode const * pEnd,
72 rtl::OUString const & rString)
74 sal_Unicode const * p = *pBegin;
76 sal_Unicode const * q = rString.getStr();
77 sal_Unicode const * qEnd = q + rString.getLength();
79 if (pEnd - p < qEnd - q)
80 return false;
82 while (q != qEnd)
84 sal_Unicode c1 = *p++;
85 sal_Unicode c2 = *q++;
86 if (c1 >= 'a' && c1 <= 'z')
87 c1 -= 'a' - 'A';
88 if (c2 >= 'a' && c2 <= 'z')
89 c2 -= 'a' - 'A';
90 if (c1 != c2)
91 return false;
94 *pBegin = p;
95 return true;
100 bool Regexp::matches(rtl::OUString const & rString,
101 rtl::OUString * pTranslation, bool * pTranslated) const
103 sal_Unicode const * pBegin = rString.getStr();
104 sal_Unicode const * pEnd = pBegin + rString.getLength();
106 bool bMatches = false;
108 sal_Unicode const * p = pBegin;
109 if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
111 sal_Unicode const * pBlock1Begin = p;
112 sal_Unicode const * pBlock1End = pEnd;
114 sal_Unicode const * pBlock2Begin = 0;
115 sal_Unicode const * pBlock2End = 0;
117 switch (m_eKind)
119 case KIND_PREFIX:
120 bMatches = true;
121 break;
123 case KIND_AUTHORITY:
124 bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
125 break;
127 case KIND_DOMAIN:
128 if (!m_bEmptyDomain)
130 if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
131 break;
132 ++p;
134 for (;;)
136 sal_Unicode const * q = p;
137 if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
138 && (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
140 bMatches = true;
141 pBlock1End = p;
142 pBlock2Begin = q;
143 pBlock2End = pEnd;
144 break;
147 if (p == pEnd)
148 break;
150 sal_Unicode c = *p++;
151 if (c == '/' || c == '?' || c == '#')
152 break;
154 break;
157 if (bMatches)
159 if (m_bTranslation)
161 if (pTranslation)
163 rtl::OUStringBuffer aBuffer(m_aReversePrefix);
164 aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin);
165 aBuffer.append(m_aInfix);
166 aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin);
167 *pTranslation = aBuffer.makeStringAndClear();
169 if (pTranslated)
170 *pTranslated = true;
172 else
174 if (pTranslation)
175 *pTranslation = rString;
176 if (pTranslated)
177 *pTranslated = false;
182 return bMatches;
185 //============================================================================
186 namespace unnamed_ucb_regexp {
188 bool isScheme(rtl::OUString const & rString, bool bColon)
190 using comphelper::string::isalphaAscii;
191 using comphelper::string::isdigitAscii;
192 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
193 // is true) from RFC 2396:
194 sal_Unicode const * p = rString.getStr();
195 sal_Unicode const * pEnd = p + rString.getLength();
196 if (p != pEnd && isalphaAscii(*p))
197 for (++p;;)
199 if (p == pEnd)
200 return !bColon;
201 sal_Unicode c = *p++;
202 if (!(isalphaAscii(c) || isdigitAscii(c)
203 || c == '+' || c == '-' || c == '.'))
204 return bColon && c == ':' && p == pEnd;
206 return false;
209 void appendStringLiteral(rtl::OUStringBuffer * pBuffer,
210 rtl::OUString const & rString)
212 OSL_ASSERT(pBuffer);
214 pBuffer->append(sal_Unicode('"'));
215 sal_Unicode const * p = rString.getStr();
216 sal_Unicode const * pEnd = p + rString.getLength();
217 while (p != pEnd)
219 sal_Unicode c = *p++;
220 if (c == '"' || c == '\\')
221 pBuffer->append(sal_Unicode('\\'));
222 pBuffer->append(c);
224 pBuffer->append(sal_Unicode('"'));
229 rtl::OUString Regexp::getRegexp(bool bReverse) const
231 if (m_bTranslation)
233 rtl::OUStringBuffer aBuffer;
234 if (bReverse)
236 if (!m_aReversePrefix.isEmpty())
237 appendStringLiteral(&aBuffer, m_aReversePrefix);
239 else
241 if (!m_aPrefix.isEmpty())
242 appendStringLiteral(&aBuffer, m_aPrefix);
244 switch (m_eKind)
246 case KIND_PREFIX:
247 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
248 break;
250 case KIND_AUTHORITY:
251 aBuffer.
252 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
253 break;
255 case KIND_DOMAIN:
256 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
257 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
258 if (!m_aInfix.isEmpty())
259 appendStringLiteral(&aBuffer, m_aInfix);
260 aBuffer.
261 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
262 break;
264 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
265 if (bReverse)
267 if (!m_aPrefix.isEmpty())
268 appendStringLiteral(&aBuffer, m_aPrefix);
270 else
272 if (!m_aReversePrefix.isEmpty())
273 appendStringLiteral(&aBuffer, m_aReversePrefix);
275 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
276 return aBuffer.makeStringAndClear();
278 else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
279 return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
280 else
282 rtl::OUStringBuffer aBuffer;
283 if (!m_aPrefix.isEmpty())
284 appendStringLiteral(&aBuffer, m_aPrefix);
285 switch (m_eKind)
287 case KIND_PREFIX:
288 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
289 break;
291 case KIND_AUTHORITY:
292 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
293 break;
295 case KIND_DOMAIN:
296 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
297 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
298 if (!m_aInfix.isEmpty())
299 appendStringLiteral(&aBuffer, m_aInfix);
300 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
301 break;
303 return aBuffer.makeStringAndClear();
307 //============================================================================
308 namespace unnamed_ucb_regexp {
310 bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
311 sal_Char const * pString, size_t nStringLength)
313 sal_Unicode const * p = *pBegin;
315 sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString);
316 sal_uChar const * qEnd = q + nStringLength;
318 if (pEnd - p < qEnd - q)
319 return false;
321 while (q != qEnd)
323 sal_Unicode c1 = *p++;
324 sal_Unicode c2 = *q++;
325 if (c1 != c2)
326 return false;
329 *pBegin = p;
330 return true;
333 bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
334 rtl::OUString * pString)
336 sal_Unicode const * p = *pBegin;
338 if (p == pEnd || *p++ != '"')
339 return false;
341 rtl::OUStringBuffer aBuffer;
342 for (;;)
344 if (p == pEnd)
345 return false;
346 sal_Unicode c = *p++;
347 if (c == '"')
348 break;
349 if (c == '\\')
351 if (p == pEnd)
352 return false;
353 c = *p++;
354 if (c != '"' && c != '\\')
355 return false;
357 aBuffer.append(c);
360 *pBegin = p;
361 *pString = aBuffer.makeStringAndClear();
362 return true;
367 Regexp Regexp::parse(rtl::OUString const & rRegexp)
369 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
370 // where <scheme> is as defined in RFC 2396:
371 if (isScheme(rRegexp, false))
372 return Regexp(Regexp::KIND_PREFIX,
373 rRegexp
374 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
375 false,
376 rtl::OUString(),
377 false,
378 rtl::OUString());
380 sal_Unicode const * p = rRegexp.getStr();
381 sal_Unicode const * pEnd = p + rRegexp.getLength();
383 rtl::OUString aPrefix;
384 scanStringLiteral(&p, pEnd, &aPrefix);
386 if (p == pEnd)
387 throw lang::IllegalArgumentException();
389 if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
391 if (p != pEnd)
392 throw lang::IllegalArgumentException();
394 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
395 false, rtl::OUString());
397 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
399 rtl::OUString aReversePrefix;
400 scanStringLiteral(&p, pEnd, &aReversePrefix);
402 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
403 || p != pEnd)
404 throw lang::IllegalArgumentException();
406 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
407 true, aReversePrefix);
409 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
411 if (p != pEnd)
412 throw lang::IllegalArgumentException();
414 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
415 false, rtl::OUString());
417 else if (matchString(&p, pEnd,
418 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
420 rtl::OUString aReversePrefix;
421 if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
422 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
423 && p == pEnd))
424 throw lang::IllegalArgumentException();
426 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
427 true, aReversePrefix);
429 else
431 bool bOpen = false;
432 if (p != pEnd && *p == '(')
434 ++p;
435 bOpen = true;
438 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
439 throw lang::IllegalArgumentException();
441 if (p == pEnd || (*p != '*' && *p != '+'))
442 throw lang::IllegalArgumentException();
443 bool bEmptyDomain = *p++ == '*';
445 rtl::OUString aInfix;
446 scanStringLiteral(&p, pEnd, &aInfix);
448 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
449 throw lang::IllegalArgumentException();
451 rtl::OUString aReversePrefix;
452 if (bOpen
453 && !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
454 && scanStringLiteral(&p, pEnd, &aReversePrefix)
455 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
456 throw lang::IllegalArgumentException();
458 if (p != pEnd)
459 throw lang::IllegalArgumentException();
461 return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
462 bOpen, aReversePrefix);
466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */