1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
24 #include <osl/diagnose.h>
25 #include <com/sun/star/lang/IllegalArgumentException.hpp>
26 #include <rtl/character.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/ustring.hxx>
31 using namespace com::sun::star
;
32 using namespace ucb_impl
;
38 inline Regexp::Regexp(Kind eTheKind
, OUString aThePrefix
,
39 bool bTheEmptyDomain
, OUString aTheInfix
,
41 OUString aTheReversePrefix
):
43 m_aPrefix(std::move(aThePrefix
)),
44 m_aInfix(std::move(aTheInfix
)),
45 m_aReversePrefix(std::move(aTheReversePrefix
)),
46 m_bEmptyDomain(bTheEmptyDomain
),
47 m_bTranslation(bTheTranslation
)
49 OSL_ASSERT(m_eKind
== KIND_DOMAIN
50 || (!m_bEmptyDomain
&& m_aInfix
.isEmpty()));
51 OSL_ASSERT(m_bTranslation
|| m_aReversePrefix
.isEmpty());
57 bool matchStringIgnoreCase(sal_Unicode
const ** pBegin
,
58 sal_Unicode
const * pEnd
,
59 OUString
const & rString
)
61 sal_Unicode
const * p
= *pBegin
;
63 sal_Unicode
const * q
= rString
.getStr();
64 sal_Unicode
const * qEnd
= q
+ rString
.getLength();
66 if (pEnd
- p
< qEnd
- q
)
71 if (rtl::compareIgnoreAsciiCase(*p
++, *q
++) != 0)
81 bool Regexp::matches(OUString
const & rString
) const
83 sal_Unicode
const * pBegin
= rString
.getStr();
84 sal_Unicode
const * pEnd
= pBegin
+ rString
.getLength();
86 bool bMatches
= false;
88 sal_Unicode
const * p
= pBegin
;
89 if (matchStringIgnoreCase(&p
, pEnd
, m_aPrefix
))
98 bMatches
= p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#';
104 if (p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#')
110 sal_Unicode
const * q
= p
;
111 if (matchStringIgnoreCase(&q
, pEnd
, m_aInfix
)
112 && (q
== pEnd
|| *q
== '/' || *q
== '?' || *q
== '#'))
121 sal_Unicode c
= *p
++;
122 if (c
== '/' || c
== '?' || c
== '#')
135 bool isScheme(OUString
const & rString
, bool bColon
)
137 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
138 // is true) from RFC 2396:
139 sal_Unicode
const * p
= rString
.getStr();
140 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
141 if (p
!= pEnd
&& rtl::isAsciiAlpha(*p
))
146 sal_Unicode c
= *p
++;
147 if (!(rtl::isAsciiAlphanumeric(c
)
148 || c
== '+' || c
== '-' || c
== '.'))
149 return bColon
&& c
== ':' && p
== pEnd
;
154 void appendStringLiteral(OUStringBuffer
* pBuffer
,
155 OUString
const & rString
)
159 pBuffer
->append('"');
160 sal_Unicode
const * p
= rString
.getStr();
161 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
164 sal_Unicode c
= *p
++;
165 if (c
== '"' || c
== '\\')
166 pBuffer
->append('\\');
169 pBuffer
->append('"');
174 OUString
Regexp::getRegexp() const
178 OUStringBuffer aBuffer
;
179 if (!m_aPrefix
.isEmpty())
180 appendStringLiteral(&aBuffer
, m_aPrefix
);
184 aBuffer
.append("(.*)");
188 aBuffer
.append("(([/?#].*)?)");
192 aBuffer
.append("([^/?#]" + OUStringChar(sal_Unicode(m_bEmptyDomain
? '*' : '+')));
193 if (!m_aInfix
.isEmpty())
194 appendStringLiteral(&aBuffer
, m_aInfix
);
195 aBuffer
.append("([/?#].*)?)");
198 aBuffer
.append("->");
199 if (!m_aReversePrefix
.isEmpty())
200 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
201 aBuffer
.append("\\1");
202 return aBuffer
.makeStringAndClear();
204 else if (m_eKind
== KIND_PREFIX
&& isScheme(m_aPrefix
, true))
205 return m_aPrefix
.copy(0, m_aPrefix
.getLength() - 1);
208 OUStringBuffer aBuffer
;
209 if (!m_aPrefix
.isEmpty())
210 appendStringLiteral(&aBuffer
, m_aPrefix
);
214 aBuffer
.append(".*");
218 aBuffer
.append("([/?#].*)?");
222 aBuffer
.append("[^/?#]" + OUStringChar( m_bEmptyDomain
? '*' : '+' ));
223 if (!m_aInfix
.isEmpty())
224 appendStringLiteral(&aBuffer
, m_aInfix
);
225 aBuffer
.append("([/?#].*)?");
228 return aBuffer
.makeStringAndClear();
235 bool matchString(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
236 char const * pString
, size_t nStringLength
)
238 sal_Unicode
const * p
= *pBegin
;
240 unsigned char const * q
= reinterpret_cast< unsigned char const * >(pString
);
241 unsigned char const * qEnd
= q
+ nStringLength
;
243 if (pEnd
- p
< qEnd
- q
)
248 sal_Unicode c1
= *p
++;
249 sal_Unicode c2
= *q
++;
258 bool scanStringLiteral(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
261 sal_Unicode
const * p
= *pBegin
;
263 if (p
== pEnd
|| *p
++ != '"')
266 OUStringBuffer aBuffer
;
271 sal_Unicode c
= *p
++;
279 if (c
!= '"' && c
!= '\\')
286 *pString
= aBuffer
.makeStringAndClear();
292 Regexp
Regexp::parse(OUString
const & rRegexp
)
294 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
295 // where <scheme> is as defined in RFC 2396:
296 if (isScheme(rRegexp
, false))
297 return Regexp(Regexp::KIND_PREFIX
,
304 sal_Unicode
const * p
= rRegexp
.getStr();
305 sal_Unicode
const * pEnd
= p
+ rRegexp
.getLength();
308 scanStringLiteral(&p
, pEnd
, &aPrefix
);
311 throw lang::IllegalArgumentException();
313 // This and the matchString() calls below are some of the few places where
314 // RTL_CONSTASCII_STRINGPARAM() should NOT be removed.
315 // (c.f. https://gerrit.libreoffice.org/3117)
316 if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(".*")))
319 throw lang::IllegalArgumentException();
321 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, OUString(),
324 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
326 OUString aReversePrefix
;
327 scanStringLiteral(&p
, pEnd
, &aReversePrefix
);
329 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
331 throw lang::IllegalArgumentException();
333 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, OUString(),
334 true, aReversePrefix
);
336 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
339 throw lang::IllegalArgumentException();
341 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, OUString(),
344 else if (matchString(&p
, pEnd
,
345 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
347 OUString aReversePrefix
;
348 if (!(scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
349 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
351 throw lang::IllegalArgumentException();
353 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, OUString(),
354 true, aReversePrefix
);
359 if (p
!= pEnd
&& *p
== '(')
365 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
366 throw lang::IllegalArgumentException();
368 if (p
== pEnd
|| (*p
!= '*' && *p
!= '+'))
369 throw lang::IllegalArgumentException();
370 bool bEmptyDomain
= *p
++ == '*';
373 scanStringLiteral(&p
, pEnd
, &aInfix
);
375 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
376 throw lang::IllegalArgumentException();
378 OUString aReversePrefix
;
380 && !(matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(")->"))
381 && scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
382 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))))
383 throw lang::IllegalArgumentException();
386 throw lang::IllegalArgumentException();
388 return Regexp(Regexp::KIND_DOMAIN
, aPrefix
, bEmptyDomain
, aInfix
,
389 bOpen
, aReversePrefix
);
393 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */