1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
33 #include "osl/diagnose.h"
34 #include <com/sun/star/lang/IllegalArgumentException.hpp>
35 #include <rtl/ustrbuf.hxx>
36 #include <rtl/ustring.hxx>
37 #include <comphelper/string.hxx>
39 namespace unnamed_ucb_regexp
{} using namespace unnamed_ucb_regexp
;
40 // unnamed namespaces don't work well yet...
42 using namespace com::sun::star
;
43 using namespace ucb_impl
;
45 //============================================================================
49 //============================================================================
51 inline Regexp::Regexp(Kind eTheKind
, rtl::OUString
const & rThePrefix
,
52 bool bTheEmptyDomain
, rtl::OUString
const & rTheInfix
,
54 rtl::OUString
const & rTheReversePrefix
):
56 m_aPrefix(rThePrefix
),
58 m_aReversePrefix(rTheReversePrefix
),
59 m_bEmptyDomain(bTheEmptyDomain
),
60 m_bTranslation(bTheTranslation
)
62 OSL_ASSERT(m_eKind
== KIND_DOMAIN
63 || (!m_bEmptyDomain
&& m_aInfix
.isEmpty()));
64 OSL_ASSERT(m_bTranslation
|| m_aReversePrefix
.isEmpty());
67 //============================================================================
68 namespace unnamed_ucb_regexp
{
70 bool matchStringIgnoreCase(sal_Unicode
const ** pBegin
,
71 sal_Unicode
const * pEnd
,
72 rtl::OUString
const & rString
)
74 sal_Unicode
const * p
= *pBegin
;
76 sal_Unicode
const * q
= rString
.getStr();
77 sal_Unicode
const * qEnd
= q
+ rString
.getLength();
79 if (pEnd
- p
< qEnd
- q
)
84 sal_Unicode c1
= *p
++;
85 sal_Unicode c2
= *q
++;
86 if (c1
>= 'a' && c1
<= 'z')
88 if (c2
>= 'a' && c2
<= 'z')
100 bool Regexp::matches(rtl::OUString
const & rString
,
101 rtl::OUString
* pTranslation
, bool * pTranslated
) const
103 sal_Unicode
const * pBegin
= rString
.getStr();
104 sal_Unicode
const * pEnd
= pBegin
+ rString
.getLength();
106 bool bMatches
= false;
108 sal_Unicode
const * p
= pBegin
;
109 if (matchStringIgnoreCase(&p
, pEnd
, m_aPrefix
))
111 sal_Unicode
const * pBlock1Begin
= p
;
112 sal_Unicode
const * pBlock1End
= pEnd
;
114 sal_Unicode
const * pBlock2Begin
= 0;
115 sal_Unicode
const * pBlock2End
= 0;
124 bMatches
= p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#';
130 if (p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#')
136 sal_Unicode
const * q
= p
;
137 if (matchStringIgnoreCase(&q
, pEnd
, m_aInfix
)
138 && (q
== pEnd
|| *q
== '/' || *q
== '?' || *q
== '#'))
150 sal_Unicode c
= *p
++;
151 if (c
== '/' || c
== '?' || c
== '#')
163 rtl::OUStringBuffer
aBuffer(m_aReversePrefix
);
164 aBuffer
.append(pBlock1Begin
, pBlock1End
- pBlock1Begin
);
165 aBuffer
.append(m_aInfix
);
166 aBuffer
.append(pBlock2Begin
, pBlock2End
- pBlock2Begin
);
167 *pTranslation
= aBuffer
.makeStringAndClear();
175 *pTranslation
= rString
;
177 *pTranslated
= false;
185 //============================================================================
186 namespace unnamed_ucb_regexp
{
188 bool isScheme(rtl::OUString
const & rString
, bool bColon
)
190 using comphelper::string::isalphaAscii
;
191 using comphelper::string::isdigitAscii
;
192 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
193 // is true) from RFC 2396:
194 sal_Unicode
const * p
= rString
.getStr();
195 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
196 if (p
!= pEnd
&& isalphaAscii(*p
))
201 sal_Unicode c
= *p
++;
202 if (!(isalphaAscii(c
) || isdigitAscii(c
)
203 || c
== '+' || c
== '-' || c
== '.'))
204 return bColon
&& c
== ':' && p
== pEnd
;
209 void appendStringLiteral(rtl::OUStringBuffer
* pBuffer
,
210 rtl::OUString
const & rString
)
214 pBuffer
->append(sal_Unicode('"'));
215 sal_Unicode
const * p
= rString
.getStr();
216 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
219 sal_Unicode c
= *p
++;
220 if (c
== '"' || c
== '\\')
221 pBuffer
->append(sal_Unicode('\\'));
224 pBuffer
->append(sal_Unicode('"'));
229 rtl::OUString
Regexp::getRegexp(bool bReverse
) const
233 rtl::OUStringBuffer aBuffer
;
236 if (!m_aReversePrefix
.isEmpty())
237 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
241 if (!m_aPrefix
.isEmpty())
242 appendStringLiteral(&aBuffer
, m_aPrefix
);
247 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
252 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
256 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
257 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
258 if (!m_aInfix
.isEmpty())
259 appendStringLiteral(&aBuffer
, m_aInfix
);
261 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
264 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
267 if (!m_aPrefix
.isEmpty())
268 appendStringLiteral(&aBuffer
, m_aPrefix
);
272 if (!m_aReversePrefix
.isEmpty())
273 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
275 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
276 return aBuffer
.makeStringAndClear();
278 else if (m_eKind
== KIND_PREFIX
&& isScheme(m_aPrefix
, true))
279 return m_aPrefix
.copy(0, m_aPrefix
.getLength() - 1);
282 rtl::OUStringBuffer aBuffer
;
283 if (!m_aPrefix
.isEmpty())
284 appendStringLiteral(&aBuffer
, m_aPrefix
);
288 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
292 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
296 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
297 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
298 if (!m_aInfix
.isEmpty())
299 appendStringLiteral(&aBuffer
, m_aInfix
);
300 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
303 return aBuffer
.makeStringAndClear();
307 //============================================================================
308 namespace unnamed_ucb_regexp
{
310 bool matchString(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
311 sal_Char
const * pString
, size_t nStringLength
)
313 sal_Unicode
const * p
= *pBegin
;
315 sal_uChar
const * q
= reinterpret_cast< sal_uChar
const * >(pString
);
316 sal_uChar
const * qEnd
= q
+ nStringLength
;
318 if (pEnd
- p
< qEnd
- q
)
323 sal_Unicode c1
= *p
++;
324 sal_Unicode c2
= *q
++;
333 bool scanStringLiteral(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
334 rtl::OUString
* pString
)
336 sal_Unicode
const * p
= *pBegin
;
338 if (p
== pEnd
|| *p
++ != '"')
341 rtl::OUStringBuffer aBuffer
;
346 sal_Unicode c
= *p
++;
354 if (c
!= '"' && c
!= '\\')
361 *pString
= aBuffer
.makeStringAndClear();
367 Regexp
Regexp::parse(rtl::OUString
const & rRegexp
)
369 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
370 // where <scheme> is as defined in RFC 2396:
371 if (isScheme(rRegexp
, false))
372 return Regexp(Regexp::KIND_PREFIX
,
374 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
380 sal_Unicode
const * p
= rRegexp
.getStr();
381 sal_Unicode
const * pEnd
= p
+ rRegexp
.getLength();
383 rtl::OUString aPrefix
;
384 scanStringLiteral(&p
, pEnd
, &aPrefix
);
387 throw lang::IllegalArgumentException();
389 if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(".*")))
392 throw lang::IllegalArgumentException();
394 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, rtl::OUString(),
395 false, rtl::OUString());
397 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
399 rtl::OUString aReversePrefix
;
400 scanStringLiteral(&p
, pEnd
, &aReversePrefix
);
402 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
404 throw lang::IllegalArgumentException();
406 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, rtl::OUString(),
407 true, aReversePrefix
);
409 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
412 throw lang::IllegalArgumentException();
414 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, rtl::OUString(),
415 false, rtl::OUString());
417 else if (matchString(&p
, pEnd
,
418 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
420 rtl::OUString aReversePrefix
;
421 if (!(scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
422 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
424 throw lang::IllegalArgumentException();
426 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, rtl::OUString(),
427 true, aReversePrefix
);
432 if (p
!= pEnd
&& *p
== '(')
438 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
439 throw lang::IllegalArgumentException();
441 if (p
== pEnd
|| (*p
!= '*' && *p
!= '+'))
442 throw lang::IllegalArgumentException();
443 bool bEmptyDomain
= *p
++ == '*';
445 rtl::OUString aInfix
;
446 scanStringLiteral(&p
, pEnd
, &aInfix
);
448 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
449 throw lang::IllegalArgumentException();
451 rtl::OUString aReversePrefix
;
453 && !(matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(")->"))
454 && scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
455 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))))
456 throw lang::IllegalArgumentException();
459 throw lang::IllegalArgumentException();
461 return Regexp(Regexp::KIND_DOMAIN
, aPrefix
, bEmptyDomain
, aInfix
,
462 bOpen
, aReversePrefix
);
466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */