1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
24 #include "osl/diagnose.h"
25 #include <com/sun/star/lang/IllegalArgumentException.hpp>
26 #include <rtl/ustrbuf.hxx>
27 #include <rtl/ustring.hxx>
28 #include <comphelper/string.hxx>
30 namespace unnamed_ucb_regexp
{} using namespace unnamed_ucb_regexp
;
31 // unnamed namespaces don't work well yet...
33 using namespace com::sun::star
;
34 using namespace ucb_impl
;
42 inline Regexp::Regexp(Kind eTheKind
, OUString
const & rThePrefix
,
43 bool bTheEmptyDomain
, OUString
const & rTheInfix
,
45 OUString
const & rTheReversePrefix
):
47 m_aPrefix(rThePrefix
),
49 m_aReversePrefix(rTheReversePrefix
),
50 m_bEmptyDomain(bTheEmptyDomain
),
51 m_bTranslation(bTheTranslation
)
53 OSL_ASSERT(m_eKind
== KIND_DOMAIN
54 || (!m_bEmptyDomain
&& m_aInfix
.isEmpty()));
55 OSL_ASSERT(m_bTranslation
|| m_aReversePrefix
.isEmpty());
59 namespace unnamed_ucb_regexp
{
61 bool matchStringIgnoreCase(sal_Unicode
const ** pBegin
,
62 sal_Unicode
const * pEnd
,
63 OUString
const & rString
)
65 sal_Unicode
const * p
= *pBegin
;
67 sal_Unicode
const * q
= rString
.getStr();
68 sal_Unicode
const * qEnd
= q
+ rString
.getLength();
70 if (pEnd
- p
< qEnd
- q
)
75 sal_Unicode c1
= *p
++;
76 sal_Unicode c2
= *q
++;
77 if (c1
>= 'a' && c1
<= 'z')
79 if (c2
>= 'a' && c2
<= 'z')
91 bool Regexp::matches(OUString
const & rString
,
92 OUString
* pTranslation
, bool * pTranslated
) const
94 sal_Unicode
const * pBegin
= rString
.getStr();
95 sal_Unicode
const * pEnd
= pBegin
+ rString
.getLength();
97 bool bMatches
= false;
99 sal_Unicode
const * p
= pBegin
;
100 if (matchStringIgnoreCase(&p
, pEnd
, m_aPrefix
))
102 sal_Unicode
const * pBlock1Begin
= p
;
103 sal_Unicode
const * pBlock1End
= pEnd
;
105 sal_Unicode
const * pBlock2Begin
= 0;
106 sal_Unicode
const * pBlock2End
= 0;
115 bMatches
= p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#';
121 if (p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#')
127 sal_Unicode
const * q
= p
;
128 if (matchStringIgnoreCase(&q
, pEnd
, m_aInfix
)
129 && (q
== pEnd
|| *q
== '/' || *q
== '?' || *q
== '#'))
141 sal_Unicode c
= *p
++;
142 if (c
== '/' || c
== '?' || c
== '#')
154 OUStringBuffer
aBuffer(m_aReversePrefix
);
155 aBuffer
.append(pBlock1Begin
, pBlock1End
- pBlock1Begin
);
156 aBuffer
.append(m_aInfix
);
157 aBuffer
.append(pBlock2Begin
, pBlock2End
- pBlock2Begin
);
158 *pTranslation
= aBuffer
.makeStringAndClear();
166 *pTranslation
= rString
;
168 *pTranslated
= false;
177 namespace unnamed_ucb_regexp
{
179 bool isScheme(OUString
const & rString
, bool bColon
)
181 using comphelper::string::isalphaAscii
;
182 using comphelper::string::isdigitAscii
;
183 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
184 // is true) from RFC 2396:
185 sal_Unicode
const * p
= rString
.getStr();
186 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
187 if (p
!= pEnd
&& isalphaAscii(*p
))
192 sal_Unicode c
= *p
++;
193 if (!(isalphaAscii(c
) || isdigitAscii(c
)
194 || c
== '+' || c
== '-' || c
== '.'))
195 return bColon
&& c
== ':' && p
== pEnd
;
200 void appendStringLiteral(OUStringBuffer
* pBuffer
,
201 OUString
const & rString
)
205 pBuffer
->append('"');
206 sal_Unicode
const * p
= rString
.getStr();
207 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
210 sal_Unicode c
= *p
++;
211 if (c
== '"' || c
== '\\')
212 pBuffer
->append('\\');
215 pBuffer
->append('"');
220 OUString
Regexp::getRegexp(bool bReverse
) const
224 OUStringBuffer aBuffer
;
227 if (!m_aReversePrefix
.isEmpty())
228 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
232 if (!m_aPrefix
.isEmpty())
233 appendStringLiteral(&aBuffer
, m_aPrefix
);
238 aBuffer
.append("(.*)");
242 aBuffer
.append("(([/?#].*)?)");
246 aBuffer
.append("([^/?#]");
247 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
248 if (!m_aInfix
.isEmpty())
249 appendStringLiteral(&aBuffer
, m_aInfix
);
250 aBuffer
.append("([/?#].*)?)");
253 aBuffer
.append("->");
256 if (!m_aPrefix
.isEmpty())
257 appendStringLiteral(&aBuffer
, m_aPrefix
);
261 if (!m_aReversePrefix
.isEmpty())
262 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
264 aBuffer
.append("\\1");
265 return aBuffer
.makeStringAndClear();
267 else if (m_eKind
== KIND_PREFIX
&& isScheme(m_aPrefix
, true))
268 return m_aPrefix
.copy(0, m_aPrefix
.getLength() - 1);
271 OUStringBuffer aBuffer
;
272 if (!m_aPrefix
.isEmpty())
273 appendStringLiteral(&aBuffer
, m_aPrefix
);
277 aBuffer
.append(".*");
281 aBuffer
.append("([/?#].*)?");
285 aBuffer
.append("[^/?#]");
286 aBuffer
.append( m_bEmptyDomain
? '*' : '+' );
287 if (!m_aInfix
.isEmpty())
288 appendStringLiteral(&aBuffer
, m_aInfix
);
289 aBuffer
.append("([/?#].*)?");
292 return aBuffer
.makeStringAndClear();
297 namespace unnamed_ucb_regexp
{
299 bool matchString(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
300 sal_Char
const * pString
, size_t nStringLength
)
302 sal_Unicode
const * p
= *pBegin
;
304 unsigned char const * q
= reinterpret_cast< unsigned char const * >(pString
);
305 unsigned char const * qEnd
= q
+ nStringLength
;
307 if (pEnd
- p
< qEnd
- q
)
312 sal_Unicode c1
= *p
++;
313 sal_Unicode c2
= *q
++;
322 bool scanStringLiteral(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
325 sal_Unicode
const * p
= *pBegin
;
327 if (p
== pEnd
|| *p
++ != '"')
330 OUStringBuffer aBuffer
;
335 sal_Unicode c
= *p
++;
343 if (c
!= '"' && c
!= '\\')
350 *pString
= aBuffer
.makeStringAndClear();
356 Regexp
Regexp::parse(OUString
const & rRegexp
)
358 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
359 // where <scheme> is as defined in RFC 2396:
360 if (isScheme(rRegexp
, false))
361 return Regexp(Regexp::KIND_PREFIX
,
368 sal_Unicode
const * p
= rRegexp
.getStr();
369 sal_Unicode
const * pEnd
= p
+ rRegexp
.getLength();
372 scanStringLiteral(&p
, pEnd
, &aPrefix
);
375 throw lang::IllegalArgumentException();
377 // This and the matchString() calls below are some of the few places where
378 // RTL_CONSTASCII_STRINGPARAM() should NOT be removed.
379 // (c.f. https://gerrit.libreoffice.org/3117)
380 if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(".*")))
383 throw lang::IllegalArgumentException();
385 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, OUString(),
388 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
390 OUString aReversePrefix
;
391 scanStringLiteral(&p
, pEnd
, &aReversePrefix
);
393 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
395 throw lang::IllegalArgumentException();
397 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, OUString(),
398 true, aReversePrefix
);
400 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
403 throw lang::IllegalArgumentException();
405 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, OUString(),
408 else if (matchString(&p
, pEnd
,
409 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
411 OUString aReversePrefix
;
412 if (!(scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
413 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
415 throw lang::IllegalArgumentException();
417 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, OUString(),
418 true, aReversePrefix
);
423 if (p
!= pEnd
&& *p
== '(')
429 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
430 throw lang::IllegalArgumentException();
432 if (p
== pEnd
|| (*p
!= '*' && *p
!= '+'))
433 throw lang::IllegalArgumentException();
434 bool bEmptyDomain
= *p
++ == '*';
437 scanStringLiteral(&p
, pEnd
, &aInfix
);
439 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
440 throw lang::IllegalArgumentException();
442 OUString aReversePrefix
;
444 && !(matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(")->"))
445 && scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
446 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))))
447 throw lang::IllegalArgumentException();
450 throw lang::IllegalArgumentException();
452 return Regexp(Regexp::KIND_DOMAIN
, aPrefix
, bEmptyDomain
, aInfix
,
453 bOpen
, aReversePrefix
);
457 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */