1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
24 #include "osl/diagnose.h"
25 #include <com/sun/star/lang/IllegalArgumentException.hpp>
26 #include <rtl/ustrbuf.hxx>
27 #include <rtl/ustring.hxx>
28 #include <comphelper/string.hxx>
30 namespace unnamed_ucb_regexp
{} using namespace unnamed_ucb_regexp
;
31 // unnamed namespaces don't work well yet...
33 using namespace com::sun::star
;
34 using namespace ucb_impl
;
36 //============================================================================
40 //============================================================================
42 inline Regexp::Regexp(Kind eTheKind
, rtl::OUString
const & rThePrefix
,
43 bool bTheEmptyDomain
, rtl::OUString
const & rTheInfix
,
45 rtl::OUString
const & rTheReversePrefix
):
47 m_aPrefix(rThePrefix
),
49 m_aReversePrefix(rTheReversePrefix
),
50 m_bEmptyDomain(bTheEmptyDomain
),
51 m_bTranslation(bTheTranslation
)
53 OSL_ASSERT(m_eKind
== KIND_DOMAIN
54 || (!m_bEmptyDomain
&& m_aInfix
.isEmpty()));
55 OSL_ASSERT(m_bTranslation
|| m_aReversePrefix
.isEmpty());
58 //============================================================================
59 namespace unnamed_ucb_regexp
{
61 bool matchStringIgnoreCase(sal_Unicode
const ** pBegin
,
62 sal_Unicode
const * pEnd
,
63 rtl::OUString
const & rString
)
65 sal_Unicode
const * p
= *pBegin
;
67 sal_Unicode
const * q
= rString
.getStr();
68 sal_Unicode
const * qEnd
= q
+ rString
.getLength();
70 if (pEnd
- p
< qEnd
- q
)
75 sal_Unicode c1
= *p
++;
76 sal_Unicode c2
= *q
++;
77 if (c1
>= 'a' && c1
<= 'z')
79 if (c2
>= 'a' && c2
<= 'z')
91 bool Regexp::matches(rtl::OUString
const & rString
,
92 rtl::OUString
* pTranslation
, bool * pTranslated
) const
94 sal_Unicode
const * pBegin
= rString
.getStr();
95 sal_Unicode
const * pEnd
= pBegin
+ rString
.getLength();
97 bool bMatches
= false;
99 sal_Unicode
const * p
= pBegin
;
100 if (matchStringIgnoreCase(&p
, pEnd
, m_aPrefix
))
102 sal_Unicode
const * pBlock1Begin
= p
;
103 sal_Unicode
const * pBlock1End
= pEnd
;
105 sal_Unicode
const * pBlock2Begin
= 0;
106 sal_Unicode
const * pBlock2End
= 0;
115 bMatches
= p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#';
121 if (p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#')
127 sal_Unicode
const * q
= p
;
128 if (matchStringIgnoreCase(&q
, pEnd
, m_aInfix
)
129 && (q
== pEnd
|| *q
== '/' || *q
== '?' || *q
== '#'))
141 sal_Unicode c
= *p
++;
142 if (c
== '/' || c
== '?' || c
== '#')
154 rtl::OUStringBuffer
aBuffer(m_aReversePrefix
);
155 aBuffer
.append(pBlock1Begin
, pBlock1End
- pBlock1Begin
);
156 aBuffer
.append(m_aInfix
);
157 aBuffer
.append(pBlock2Begin
, pBlock2End
- pBlock2Begin
);
158 *pTranslation
= aBuffer
.makeStringAndClear();
166 *pTranslation
= rString
;
168 *pTranslated
= false;
176 //============================================================================
177 namespace unnamed_ucb_regexp
{
179 bool isScheme(rtl::OUString
const & rString
, bool bColon
)
181 using comphelper::string::isalphaAscii
;
182 using comphelper::string::isdigitAscii
;
183 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
184 // is true) from RFC 2396:
185 sal_Unicode
const * p
= rString
.getStr();
186 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
187 if (p
!= pEnd
&& isalphaAscii(*p
))
192 sal_Unicode c
= *p
++;
193 if (!(isalphaAscii(c
) || isdigitAscii(c
)
194 || c
== '+' || c
== '-' || c
== '.'))
195 return bColon
&& c
== ':' && p
== pEnd
;
200 void appendStringLiteral(rtl::OUStringBuffer
* pBuffer
,
201 rtl::OUString
const & rString
)
205 pBuffer
->append(sal_Unicode('"'));
206 sal_Unicode
const * p
= rString
.getStr();
207 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
210 sal_Unicode c
= *p
++;
211 if (c
== '"' || c
== '\\')
212 pBuffer
->append(sal_Unicode('\\'));
215 pBuffer
->append(sal_Unicode('"'));
220 rtl::OUString
Regexp::getRegexp(bool bReverse
) const
224 rtl::OUStringBuffer aBuffer
;
227 if (!m_aReversePrefix
.isEmpty())
228 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
232 if (!m_aPrefix
.isEmpty())
233 appendStringLiteral(&aBuffer
, m_aPrefix
);
238 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
243 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
247 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
248 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
249 if (!m_aInfix
.isEmpty())
250 appendStringLiteral(&aBuffer
, m_aInfix
);
252 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
255 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
258 if (!m_aPrefix
.isEmpty())
259 appendStringLiteral(&aBuffer
, m_aPrefix
);
263 if (!m_aReversePrefix
.isEmpty())
264 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
266 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
267 return aBuffer
.makeStringAndClear();
269 else if (m_eKind
== KIND_PREFIX
&& isScheme(m_aPrefix
, true))
270 return m_aPrefix
.copy(0, m_aPrefix
.getLength() - 1);
273 rtl::OUStringBuffer aBuffer
;
274 if (!m_aPrefix
.isEmpty())
275 appendStringLiteral(&aBuffer
, m_aPrefix
);
279 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
283 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
287 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
288 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
289 if (!m_aInfix
.isEmpty())
290 appendStringLiteral(&aBuffer
, m_aInfix
);
291 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
294 return aBuffer
.makeStringAndClear();
298 //============================================================================
299 namespace unnamed_ucb_regexp
{
301 bool matchString(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
302 sal_Char
const * pString
, size_t nStringLength
)
304 sal_Unicode
const * p
= *pBegin
;
306 sal_uChar
const * q
= reinterpret_cast< sal_uChar
const * >(pString
);
307 sal_uChar
const * qEnd
= q
+ nStringLength
;
309 if (pEnd
- p
< qEnd
- q
)
314 sal_Unicode c1
= *p
++;
315 sal_Unicode c2
= *q
++;
324 bool scanStringLiteral(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
325 rtl::OUString
* pString
)
327 sal_Unicode
const * p
= *pBegin
;
329 if (p
== pEnd
|| *p
++ != '"')
332 rtl::OUStringBuffer aBuffer
;
337 sal_Unicode c
= *p
++;
345 if (c
!= '"' && c
!= '\\')
352 *pString
= aBuffer
.makeStringAndClear();
358 Regexp
Regexp::parse(rtl::OUString
const & rRegexp
)
360 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
361 // where <scheme> is as defined in RFC 2396:
362 if (isScheme(rRegexp
, false))
363 return Regexp(Regexp::KIND_PREFIX
,
365 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
371 sal_Unicode
const * p
= rRegexp
.getStr();
372 sal_Unicode
const * pEnd
= p
+ rRegexp
.getLength();
374 rtl::OUString aPrefix
;
375 scanStringLiteral(&p
, pEnd
, &aPrefix
);
378 throw lang::IllegalArgumentException();
380 if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(".*")))
383 throw lang::IllegalArgumentException();
385 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, rtl::OUString(),
386 false, rtl::OUString());
388 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
390 rtl::OUString aReversePrefix
;
391 scanStringLiteral(&p
, pEnd
, &aReversePrefix
);
393 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
395 throw lang::IllegalArgumentException();
397 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, rtl::OUString(),
398 true, aReversePrefix
);
400 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
403 throw lang::IllegalArgumentException();
405 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, rtl::OUString(),
406 false, rtl::OUString());
408 else if (matchString(&p
, pEnd
,
409 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
411 rtl::OUString aReversePrefix
;
412 if (!(scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
413 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
415 throw lang::IllegalArgumentException();
417 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, rtl::OUString(),
418 true, aReversePrefix
);
423 if (p
!= pEnd
&& *p
== '(')
429 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
430 throw lang::IllegalArgumentException();
432 if (p
== pEnd
|| (*p
!= '*' && *p
!= '+'))
433 throw lang::IllegalArgumentException();
434 bool bEmptyDomain
= *p
++ == '*';
436 rtl::OUString aInfix
;
437 scanStringLiteral(&p
, pEnd
, &aInfix
);
439 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
440 throw lang::IllegalArgumentException();
442 rtl::OUString aReversePrefix
;
444 && !(matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(")->"))
445 && scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
446 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))))
447 throw lang::IllegalArgumentException();
450 throw lang::IllegalArgumentException();
452 return Regexp(Regexp::KIND_DOMAIN
, aPrefix
, bEmptyDomain
, aInfix
,
453 bOpen
, aReversePrefix
);
457 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */