1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
24 #include <osl/diagnose.h>
25 #include <com/sun/star/lang/IllegalArgumentException.hpp>
26 #include <rtl/character.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/ustring.hxx>
30 using namespace com::sun::star
;
31 using namespace ucb_impl
;
37 inline Regexp::Regexp(Kind eTheKind
, OUString
const & rThePrefix
,
38 bool bTheEmptyDomain
, OUString
const & rTheInfix
,
40 OUString
const & rTheReversePrefix
):
42 m_aPrefix(rThePrefix
),
44 m_aReversePrefix(rTheReversePrefix
),
45 m_bEmptyDomain(bTheEmptyDomain
),
46 m_bTranslation(bTheTranslation
)
48 OSL_ASSERT(m_eKind
== KIND_DOMAIN
49 || (!m_bEmptyDomain
&& m_aInfix
.isEmpty()));
50 OSL_ASSERT(m_bTranslation
|| m_aReversePrefix
.isEmpty());
56 bool matchStringIgnoreCase(sal_Unicode
const ** pBegin
,
57 sal_Unicode
const * pEnd
,
58 OUString
const & rString
)
60 sal_Unicode
const * p
= *pBegin
;
62 sal_Unicode
const * q
= rString
.getStr();
63 sal_Unicode
const * qEnd
= q
+ rString
.getLength();
65 if (pEnd
- p
< qEnd
- q
)
70 if (rtl::compareIgnoreAsciiCase(*p
++, *q
++) != 0)
80 bool Regexp::matches(OUString
const & rString
) const
82 sal_Unicode
const * pBegin
= rString
.getStr();
83 sal_Unicode
const * pEnd
= pBegin
+ rString
.getLength();
85 bool bMatches
= false;
87 sal_Unicode
const * p
= pBegin
;
88 if (matchStringIgnoreCase(&p
, pEnd
, m_aPrefix
))
97 bMatches
= p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#';
103 if (p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#')
109 sal_Unicode
const * q
= p
;
110 if (matchStringIgnoreCase(&q
, pEnd
, m_aInfix
)
111 && (q
== pEnd
|| *q
== '/' || *q
== '?' || *q
== '#'))
120 sal_Unicode c
= *p
++;
121 if (c
== '/' || c
== '?' || c
== '#')
134 bool isScheme(OUString
const & rString
, bool bColon
)
136 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
137 // is true) from RFC 2396:
138 sal_Unicode
const * p
= rString
.getStr();
139 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
140 if (p
!= pEnd
&& rtl::isAsciiAlpha(*p
))
145 sal_Unicode c
= *p
++;
146 if (!(rtl::isAsciiAlphanumeric(c
)
147 || c
== '+' || c
== '-' || c
== '.'))
148 return bColon
&& c
== ':' && p
== pEnd
;
153 void appendStringLiteral(OUStringBuffer
* pBuffer
,
154 OUString
const & rString
)
158 pBuffer
->append('"');
159 sal_Unicode
const * p
= rString
.getStr();
160 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
163 sal_Unicode c
= *p
++;
164 if (c
== '"' || c
== '\\')
165 pBuffer
->append('\\');
168 pBuffer
->append('"');
173 OUString
Regexp::getRegexp() const
177 OUStringBuffer aBuffer
;
178 if (!m_aPrefix
.isEmpty())
179 appendStringLiteral(&aBuffer
, m_aPrefix
);
183 aBuffer
.append("(.*)");
187 aBuffer
.append("(([/?#].*)?)");
191 aBuffer
.append("([^/?#]");
192 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
193 if (!m_aInfix
.isEmpty())
194 appendStringLiteral(&aBuffer
, m_aInfix
);
195 aBuffer
.append("([/?#].*)?)");
198 aBuffer
.append("->");
199 if (!m_aReversePrefix
.isEmpty())
200 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
201 aBuffer
.append("\\1");
202 return aBuffer
.makeStringAndClear();
204 else if (m_eKind
== KIND_PREFIX
&& isScheme(m_aPrefix
, true))
205 return m_aPrefix
.copy(0, m_aPrefix
.getLength() - 1);
208 OUStringBuffer aBuffer
;
209 if (!m_aPrefix
.isEmpty())
210 appendStringLiteral(&aBuffer
, m_aPrefix
);
214 aBuffer
.append(".*");
218 aBuffer
.append("([/?#].*)?");
222 aBuffer
.append("[^/?#]");
223 aBuffer
.append( m_bEmptyDomain
? '*' : '+' );
224 if (!m_aInfix
.isEmpty())
225 appendStringLiteral(&aBuffer
, m_aInfix
);
226 aBuffer
.append("([/?#].*)?");
229 return aBuffer
.makeStringAndClear();
236 bool matchString(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
237 char const * pString
, size_t nStringLength
)
239 sal_Unicode
const * p
= *pBegin
;
241 unsigned char const * q
= reinterpret_cast< unsigned char const * >(pString
);
242 unsigned char const * qEnd
= q
+ nStringLength
;
244 if (pEnd
- p
< qEnd
- q
)
249 sal_Unicode c1
= *p
++;
250 sal_Unicode c2
= *q
++;
259 bool scanStringLiteral(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
262 sal_Unicode
const * p
= *pBegin
;
264 if (p
== pEnd
|| *p
++ != '"')
267 OUStringBuffer aBuffer
;
272 sal_Unicode c
= *p
++;
280 if (c
!= '"' && c
!= '\\')
287 *pString
= aBuffer
.makeStringAndClear();
293 Regexp
Regexp::parse(OUString
const & rRegexp
)
295 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
296 // where <scheme> is as defined in RFC 2396:
297 if (isScheme(rRegexp
, false))
298 return Regexp(Regexp::KIND_PREFIX
,
305 sal_Unicode
const * p
= rRegexp
.getStr();
306 sal_Unicode
const * pEnd
= p
+ rRegexp
.getLength();
309 scanStringLiteral(&p
, pEnd
, &aPrefix
);
312 throw lang::IllegalArgumentException();
314 // This and the matchString() calls below are some of the few places where
315 // RTL_CONSTASCII_STRINGPARAM() should NOT be removed.
316 // (c.f. https://gerrit.libreoffice.org/3117)
317 if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(".*")))
320 throw lang::IllegalArgumentException();
322 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, OUString(),
325 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
327 OUString aReversePrefix
;
328 scanStringLiteral(&p
, pEnd
, &aReversePrefix
);
330 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
332 throw lang::IllegalArgumentException();
334 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, OUString(),
335 true, aReversePrefix
);
337 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
340 throw lang::IllegalArgumentException();
342 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, OUString(),
345 else if (matchString(&p
, pEnd
,
346 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
348 OUString aReversePrefix
;
349 if (!(scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
350 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
352 throw lang::IllegalArgumentException();
354 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, OUString(),
355 true, aReversePrefix
);
360 if (p
!= pEnd
&& *p
== '(')
366 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
367 throw lang::IllegalArgumentException();
369 if (p
== pEnd
|| (*p
!= '*' && *p
!= '+'))
370 throw lang::IllegalArgumentException();
371 bool bEmptyDomain
= *p
++ == '*';
374 scanStringLiteral(&p
, pEnd
, &aInfix
);
376 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
377 throw lang::IllegalArgumentException();
379 OUString aReversePrefix
;
381 && !(matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(")->"))
382 && scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
383 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))))
384 throw lang::IllegalArgumentException();
387 throw lang::IllegalArgumentException();
389 return Regexp(Regexp::KIND_DOMAIN
, aPrefix
, bEmptyDomain
, aInfix
,
390 bOpen
, aReversePrefix
);
394 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */