1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: regexp.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_ucb.hxx"
37 #include "osl/diagnose.h"
38 #include <com/sun/star/lang/IllegalArgumentException.hpp>
39 #include <rtl/ustrbuf.hxx>
40 #include <rtl/ustring.hxx>
42 namespace unnamed_ucb_regexp
{} using namespace unnamed_ucb_regexp
;
43 // unnamed namespaces don't work well yet...
45 using namespace com::sun::star
;
46 using namespace ucb_impl
;
48 //============================================================================
52 //============================================================================
54 inline Regexp::Regexp(Kind eTheKind
, rtl::OUString
const & rThePrefix
,
55 bool bTheEmptyDomain
, rtl::OUString
const & rTheInfix
,
57 rtl::OUString
const & rTheReversePrefix
):
59 m_aPrefix(rThePrefix
),
61 m_aReversePrefix(rTheReversePrefix
),
62 m_bEmptyDomain(bTheEmptyDomain
),
63 m_bTranslation(bTheTranslation
)
65 OSL_ASSERT(m_eKind
== KIND_DOMAIN
66 || !m_bEmptyDomain
&& m_aInfix
.getLength() == 0);
67 OSL_ASSERT(m_bTranslation
|| m_aReversePrefix
.getLength() == 0);
70 //============================================================================
71 namespace unnamed_ucb_regexp
{
73 bool matchStringIgnoreCase(sal_Unicode
const ** pBegin
,
74 sal_Unicode
const * pEnd
,
75 rtl::OUString
const & rString
)
77 sal_Unicode
const * p
= *pBegin
;
79 sal_Unicode
const * q
= rString
.getStr();
80 sal_Unicode
const * qEnd
= q
+ rString
.getLength();
82 if (pEnd
- p
< qEnd
- q
)
87 sal_Unicode c1
= *p
++;
88 sal_Unicode c2
= *q
++;
89 if (c1
>= 'a' && c1
<= 'z')
91 if (c2
>= 'a' && c2
<= 'z')
103 bool Regexp::matches(rtl::OUString
const & rString
,
104 rtl::OUString
* pTranslation
, bool * pTranslated
) const
106 sal_Unicode
const * pBegin
= rString
.getStr();
107 sal_Unicode
const * pEnd
= pBegin
+ rString
.getLength();
109 bool bMatches
= false;
111 sal_Unicode
const * p
= pBegin
;
112 if (matchStringIgnoreCase(&p
, pEnd
, m_aPrefix
))
114 sal_Unicode
const * pBlock1Begin
= p
;
115 sal_Unicode
const * pBlock1End
= pEnd
;
117 sal_Unicode
const * pBlock2Begin
= 0;
118 sal_Unicode
const * pBlock2End
= 0;
127 bMatches
= p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#';
133 if (p
== pEnd
|| *p
== '/' || *p
== '?' || *p
== '#')
139 sal_Unicode
const * q
= p
;
140 if (matchStringIgnoreCase(&q
, pEnd
, m_aInfix
)
141 && (q
== pEnd
|| *q
== '/' || *q
== '?' || *q
== '#'))
153 sal_Unicode c
= *p
++;
154 if (c
== '/' || c
== '?' || c
== '#')
166 rtl::OUStringBuffer
aBuffer(m_aReversePrefix
);
167 aBuffer
.append(pBlock1Begin
, pBlock1End
- pBlock1Begin
);
168 aBuffer
.append(m_aInfix
);
169 aBuffer
.append(pBlock2Begin
, pBlock2End
- pBlock2Begin
);
170 *pTranslation
= aBuffer
.makeStringAndClear();
178 *pTranslation
= rString
;
180 *pTranslated
= false;
188 //============================================================================
189 namespace unnamed_ucb_regexp
{
191 inline bool isAlpha(sal_Unicode c
)
193 return (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z');
196 inline bool isDigit(sal_Unicode c
)
198 return c
>= '0' && c
<= '9';
201 bool isScheme(rtl::OUString
const & rString
, bool bColon
)
203 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
204 // is true) from RFC 2396:
205 sal_Unicode
const * p
= rString
.getStr();
206 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
207 if (p
!= pEnd
&& isAlpha(*p
))
212 sal_Unicode c
= *p
++;
213 if (!(isAlpha(c
) || isDigit(c
)
214 || c
== '+' || c
== '-' || c
== '.'))
215 return bColon
&& c
== ':' && p
== pEnd
;
220 void appendStringLiteral(rtl::OUStringBuffer
* pBuffer
,
221 rtl::OUString
const & rString
)
225 pBuffer
->append(sal_Unicode('"'));
226 sal_Unicode
const * p
= rString
.getStr();
227 sal_Unicode
const * pEnd
= p
+ rString
.getLength();
230 sal_Unicode c
= *p
++;
231 if (c
== '"' || c
== '\\')
232 pBuffer
->append(sal_Unicode('\\'));
235 pBuffer
->append(sal_Unicode('"'));
240 rtl::OUString
Regexp::getRegexp(bool bReverse
) const
244 rtl::OUStringBuffer aBuffer
;
247 if (m_aReversePrefix
.getLength() != 0)
248 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
252 if (m_aPrefix
.getLength() != 0)
253 appendStringLiteral(&aBuffer
, m_aPrefix
);
258 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
263 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
267 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
268 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
269 if (m_aInfix
.getLength() != 0)
270 appendStringLiteral(&aBuffer
, m_aInfix
);
272 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
275 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
278 if (m_aPrefix
.getLength() != 0)
279 appendStringLiteral(&aBuffer
, m_aPrefix
);
283 if (m_aReversePrefix
.getLength() != 0)
284 appendStringLiteral(&aBuffer
, m_aReversePrefix
);
286 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
287 return aBuffer
.makeStringAndClear();
289 else if (m_eKind
== KIND_PREFIX
&& isScheme(m_aPrefix
, true))
290 return m_aPrefix
.copy(0, m_aPrefix
.getLength() - 1);
293 rtl::OUStringBuffer aBuffer
;
294 if (m_aPrefix
.getLength() != 0)
295 appendStringLiteral(&aBuffer
, m_aPrefix
);
299 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
303 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
307 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
308 aBuffer
.append(sal_Unicode(m_bEmptyDomain
? '*' : '+'));
309 if (m_aInfix
.getLength() != 0)
310 appendStringLiteral(&aBuffer
, m_aInfix
);
311 aBuffer
.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
314 return aBuffer
.makeStringAndClear();
318 //============================================================================
319 namespace unnamed_ucb_regexp
{
321 bool matchString(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
322 sal_Char
const * pString
, size_t nStringLength
)
324 sal_Unicode
const * p
= *pBegin
;
326 sal_uChar
const * q
= reinterpret_cast< sal_uChar
const * >(pString
);
327 sal_uChar
const * qEnd
= q
+ nStringLength
;
329 if (pEnd
- p
< qEnd
- q
)
334 sal_Unicode c1
= *p
++;
335 sal_Unicode c2
= *q
++;
344 bool scanStringLiteral(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
345 rtl::OUString
* pString
)
347 sal_Unicode
const * p
= *pBegin
;
349 if (p
== pEnd
|| *p
++ != '"')
352 rtl::OUStringBuffer aBuffer
;
357 sal_Unicode c
= *p
++;
365 if (c
!= '"' && c
!= '\\')
372 *pString
= aBuffer
.makeStringAndClear();
378 Regexp
Regexp::parse(rtl::OUString
const & rRegexp
)
380 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
381 // where <scheme> is as defined in RFC 2396:
382 if (isScheme(rRegexp
, false))
383 return Regexp(Regexp::KIND_PREFIX
,
385 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
391 sal_Unicode
const * p
= rRegexp
.getStr();
392 sal_Unicode
const * pEnd
= p
+ rRegexp
.getLength();
394 rtl::OUString aPrefix
;
395 scanStringLiteral(&p
, pEnd
, &aPrefix
);
398 throw lang::IllegalArgumentException();
400 if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(".*")))
403 throw lang::IllegalArgumentException();
405 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, rtl::OUString(),
406 false, rtl::OUString());
408 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
410 rtl::OUString aReversePrefix
;
411 scanStringLiteral(&p
, pEnd
, &aReversePrefix
);
413 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
415 throw lang::IllegalArgumentException();
417 return Regexp(Regexp::KIND_PREFIX
, aPrefix
, false, rtl::OUString(),
418 true, aReversePrefix
);
420 else if (matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
423 throw lang::IllegalArgumentException();
425 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, rtl::OUString(),
426 false, rtl::OUString());
428 else if (matchString(&p
, pEnd
,
429 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
431 rtl::OUString aReversePrefix
;
432 if (!(scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
433 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))
435 throw lang::IllegalArgumentException();
437 return Regexp(Regexp::KIND_AUTHORITY
, aPrefix
, false, rtl::OUString(),
438 true, aReversePrefix
);
443 if (p
!= pEnd
&& *p
== '(')
449 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
450 throw lang::IllegalArgumentException();
452 if (p
== pEnd
|| (*p
!= '*' && *p
!= '+'))
453 throw lang::IllegalArgumentException();
454 bool bEmptyDomain
= *p
++ == '*';
456 rtl::OUString aInfix
;
457 scanStringLiteral(&p
, pEnd
, &aInfix
);
459 if (!matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
460 throw lang::IllegalArgumentException();
462 rtl::OUString aReversePrefix
;
464 && !(matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM(")->"))
465 && scanStringLiteral(&p
, pEnd
, &aReversePrefix
)
466 && matchString(&p
, pEnd
, RTL_CONSTASCII_STRINGPARAM("\\1"))))
467 throw lang::IllegalArgumentException();
470 throw lang::IllegalArgumentException();
472 return Regexp(Regexp::KIND_DOMAIN
, aPrefix
, bEmptyDomain
, aInfix
,
473 bOpen
, aReversePrefix
);