1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <svl/urihelper.hxx>
21 #include <com/sun/star/ucb/Command.hpp>
22 #include <com/sun/star/ucb/IllegalIdentifierException.hpp>
23 #include <com/sun/star/ucb/UniversalContentBroker.hpp>
24 #include <com/sun/star/ucb/UnsupportedCommandException.hpp>
25 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
26 #include <com/sun/star/ucb/XCommandProcessor.hpp>
27 #include <com/sun/star/ucb/XContent.hpp>
28 #include <com/sun/star/ucb/XUniversalContentBroker.hpp>
29 #include <com/sun/star/uno/Any.hxx>
30 #include <com/sun/star/uno/Exception.hpp>
31 #include <com/sun/star/uno/Reference.hxx>
32 #include <com/sun/star/uno/RuntimeException.hpp>
33 #include <com/sun/star/uno/XComponentContext.hpp>
34 #include <com/sun/star/uri/UriReferenceFactory.hpp>
35 #include <com/sun/star/uri/XUriReference.hpp>
36 #include <com/sun/star/uri/XUriReferenceFactory.hpp>
37 #include <comphelper/processfactory.hxx>
38 #include <osl/diagnose.h>
39 #include <rtl/character.hxx>
40 #include <rtl/instance.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include <rtl/ustring.h>
43 #include <rtl/ustring.hxx>
44 #include <sal/types.h>
45 #include <tools/inetmime.hxx>
46 #include <unotools/charclass.hxx>
48 using namespace com::sun::star
;
50 OUString
URIHelper::SmartRel2Abs(INetURLObject
const & rTheBaseURIRef
,
51 OUString
const & rTheRelURIRef
,
52 Link
<OUString
*, bool> const & rMaybeFileHdl
,
53 bool bCheckFileExists
,
55 INetURLObject::EncodeMechanism eEncodeMechanism
,
56 INetURLObject::DecodeMechanism eDecodeMechanism
,
57 rtl_TextEncoding eCharset
,
58 bool bRelativeNonURIs
,
59 INetURLObject::FSysStyle eStyle
)
61 // Backwards compatibility:
62 if( rTheRelURIRef
.startsWith("#") )
65 INetURLObject aAbsURIRef
;
66 if (rTheBaseURIRef
.HasError())
67 aAbsURIRef
. SetSmartURL(rTheRelURIRef
, eEncodeMechanism
, eCharset
, eStyle
);
71 aAbsURIRef
= rTheBaseURIRef
.smartRel2Abs(rTheRelURIRef
,
80 && (aAbsURIRef
.GetProtocol() == INetProtocol::File
))
82 INetURLObject aNonFileURIRef
;
83 aNonFileURIRef
.SetSmartURL(rTheRelURIRef
,
87 if (!aNonFileURIRef
.HasError()
88 && aNonFileURIRef
.GetProtocol() != INetProtocol::File
)
90 bool bMaybeFile
= false;
91 if (rMaybeFileHdl
.IsSet())
93 OUString
aFilePath(rTheRelURIRef
);
94 bMaybeFile
= rMaybeFileHdl
.Call(&aFilePath
);
97 aAbsURIRef
= aNonFileURIRef
;
101 return aAbsURIRef
.GetMainURL(eDecodeMechanism
, eCharset
);
104 namespace { struct MaybeFileHdl
: public rtl::Static
< Link
<OUString
*, bool>, MaybeFileHdl
> {}; }
106 void URIHelper::SetMaybeFileHdl(Link
<OUString
*, bool> const & rTheMaybeFileHdl
)
108 MaybeFileHdl::get() = rTheMaybeFileHdl
;
111 Link
<OUString
*, bool> URIHelper::GetMaybeFileHdl()
113 return MaybeFileHdl::get();
118 bool isAbsoluteHierarchicalUriReference(
119 css::uno::Reference
< css::uri::XUriReference
> const & uriReference
)
121 return uriReference
.is() && uriReference
->isAbsolute()
122 && uriReference
->isHierarchical() && !uriReference
->hasRelativePath();
125 // To improve performance, assume that if for any prefix URL of a given
126 // hierarchical URL either a UCB content cannot be created, or the UCB content
127 // does not support the getCasePreservingURL command, then this will hold for
128 // any other prefix URL of the given URL, too:
129 enum Result
{ Success
, GeneralFailure
, SpecificFailure
};
131 Result
normalizePrefix( css::uno::Reference
< css::ucb::XUniversalContentBroker
> const & broker
,
132 OUString
const & uri
, OUString
* normalized
)
134 OSL_ASSERT(broker
.is() && normalized
!= 0);
135 css::uno::Reference
< css::ucb::XContent
> content
;
137 content
= broker
->queryContent(broker
->createContentIdentifier(uri
));
138 } catch (css::ucb::IllegalIdentifierException
&) {}
140 return GeneralFailure
;
143 #if OSL_DEBUG_LEVEL > 0
146 (css::uno::Reference
< css::ucb::XCommandProcessor
>(
147 content
, css::uno::UNO_QUERY_THROW
)->execute(
148 css::ucb::Command("getCasePreservingURL",
149 -1, css::uno::Any()),
151 css::uno::Reference
< css::ucb::XCommandEnvironment
>())
154 } catch (css::uno::RuntimeException
&) {
156 } catch (css::ucb::UnsupportedCommandException
&) {
157 return GeneralFailure
;
158 } catch (css::uno::Exception
&) {
159 return SpecificFailure
;
165 css::uno::Reference
< css::ucb::XUniversalContentBroker
> const & broker
,
166 css::uno::Reference
< css::uri::XUriReferenceFactory
> const & uriFactory
,
167 OUString
const & uriReference
)
169 // normalizePrefix can potentially fail (a typically example being a file
170 // URL that denotes a non-existing resource); in such a case, try to
171 // normalize as long a prefix of the given URL as possible (i.e., normalize
172 // all the existing directories within the path):
174 sal_Int32 n
= uriReference
.indexOf('#');
175 normalized
= n
== -1 ? uriReference
: uriReference
.copy(0, n
);
176 switch (normalizePrefix(broker
, normalized
, &normalized
)) {
178 return n
== -1 ? normalized
: normalized
+ uriReference
.copy(n
);
181 case SpecificFailure
:
185 css::uno::Reference
< css::uri::XUriReference
> ref(
186 uriFactory
->parse(uriReference
));
187 if (!isAbsoluteHierarchicalUriReference(ref
)) {
190 sal_Int32 count
= ref
->getPathSegmentCount();
194 OUStringBuffer
head(ref
->getScheme());
196 if (ref
->hasAuthority()) {
198 head
.append(ref
->getAuthority());
200 for (sal_Int32 i
= count
- 1; i
> 0; --i
) {
201 OUStringBuffer
buf(head
);
202 for (sal_Int32 j
= 0; j
< i
; ++j
) {
204 buf
.append(ref
->getPathSegment(j
));
206 normalized
= buf
.makeStringAndClear();
207 if (normalizePrefix(broker
, normalized
, &normalized
) != SpecificFailure
)
209 buf
.append(normalized
);
210 css::uno::Reference
< css::uri::XUriReference
> preRef(
211 uriFactory
->parse(normalized
));
212 if (!isAbsoluteHierarchicalUriReference(preRef
)) {
213 // This could only happen if something is inconsistent:
216 sal_Int32 preCount
= preRef
->getPathSegmentCount();
217 // normalizePrefix may have added or removed a final slash:
219 if (preCount
== i
- 1) {
221 } else if (preCount
- 1 == i
&& !buf
.isEmpty()
222 && buf
[buf
.getLength() - 1] == '/')
224 buf
.setLength(buf
.getLength() - 1);
226 // This could only happen if something is inconsistent:
230 for (sal_Int32 j
= i
; j
< count
; ++j
) {
232 buf
.append(ref
->getPathSegment(j
));
234 if (ref
->hasQuery()) {
236 buf
.append(ref
->getQuery());
238 if (ref
->hasFragment()) {
240 buf
.append(ref
->getFragment());
242 return buf
.makeStringAndClear();
250 css::uno::Reference
< css::uri::XUriReference
>
251 URIHelper::normalizedMakeRelative(
252 css::uno::Reference
< css::uno::XComponentContext
> const & context
,
253 OUString
const & baseUriReference
, OUString
const & uriReference
)
255 OSL_ASSERT(context
.is());
256 css::uno::Reference
< css::ucb::XUniversalContentBroker
> broker(
257 css::ucb::UniversalContentBroker::create(context
));
258 css::uno::Reference
< css::uri::XUriReferenceFactory
> uriFactory(
259 css::uri::UriReferenceFactory::create(context
));
260 return uriFactory
->makeRelative(
261 uriFactory
->parse(normalize(broker
, uriFactory
, baseUriReference
)),
262 uriFactory
->parse(normalize(broker
, uriFactory
, uriReference
)), true,
266 OUString
URIHelper::simpleNormalizedMakeRelative(
267 OUString
const & baseUriReference
, OUString
const & uriReference
)
269 com::sun::star::uno::Reference
< com::sun::star::uri::XUriReference
> rel(
270 URIHelper::normalizedMakeRelative(
271 comphelper::getProcessComponentContext(), baseUriReference
,
273 return rel
.is() ? rel
->getUriReference() : uriReference
;
277 // FindFirstURLInText
282 inline sal_Int32
nextChar(OUString
const & rStr
, sal_Int32 nPos
)
284 return rtl::isHighSurrogate(rStr
[nPos
])
285 && rStr
.getLength() - nPos
>= 2
286 && rtl::isLowSurrogate(rStr
[nPos
+ 1]) ?
290 bool isBoundary1(CharClass
const & rCharClass
, OUString
const & rStr
,
291 sal_Int32 nPos
, sal_Int32 nEnd
)
295 if (rCharClass
.isLetterNumeric(rStr
, nPos
))
312 bool isBoundary2(CharClass
const & rCharClass
, OUString
const & rStr
,
313 sal_Int32 nPos
, sal_Int32 nEnd
)
317 if (rCharClass
.isLetterNumeric(rStr
, nPos
))
347 bool checkWChar(CharClass
const & rCharClass
, OUString
const & rStr
,
348 sal_Int32
* pPos
, sal_Int32
* pEnd
, bool bBackslash
= false,
351 sal_Unicode c
= rStr
[*pPos
];
354 static sal_uInt8
const aMap
[128]
355 = { 0, 0, 0, 0, 0, 0, 0, 0,
356 0, 0, 0, 0, 0, 0, 0, 0,
357 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 0, 0, 0, 0, 0, 0, 0,
359 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
360 1, 1, 1, 1, 1, 4, 1, 4, // ()*+,-./
361 4, 4, 4, 4, 4, 4, 4, 4, // 01234567
362 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
363 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
364 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO
365 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW
366 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_
367 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg
368 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno
369 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw
370 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
398 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
404 else if (rCharClass
.isLetterNumeric(rStr
, *pPos
))
406 *pEnd
= *pPos
= nextChar(rStr
, *pPos
);
413 sal_uInt32
scanDomain(OUString
const & rStr
, sal_Int32
* pPos
,
416 sal_Unicode
const * pBuffer
= rStr
.getStr();
417 sal_Unicode
const * p
= pBuffer
+ *pPos
;
418 sal_uInt32 nLabels
= INetURLObject::scanDomain(p
, pBuffer
+ nEnd
, false);
419 *pPos
= sal::static_int_cast
< sal_Int32
>(p
- pBuffer
);
425 OUString
URIHelper::FindFirstURLInText(OUString
const & rText
,
428 CharClass
const & rCharClass
,
429 INetURLObject::EncodeMechanism eMechanism
,
430 rtl_TextEncoding eCharset
,
431 INetURLObject::FSysStyle eStyle
)
433 if (!(rBegin
<= rEnd
&& rEnd
<= rText
.getLength()))
436 // Search for the first substring of [rBegin..rEnd[ that matches any of the
437 // following productions (for which the appropriate style bit is set in
438 // eStyle, if applicable).
440 // 1st Production (known scheme):
441 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
444 // 2nd Production (file):
445 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
447 // 3rd Production (ftp):
448 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
450 // 4th Production (http):
451 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
453 // 5th Production (mailto):
454 // \B2 local-part "@" domain \B1
456 // 6th Production (UNC file):
457 // \B1 "\\" domain "\" *(wchar / "\") \B1
459 // 7th Production (DOS file):
460 // \B1 ALPHA ":\" *(wchar / "\") \B1
462 // 8th Production (Unix-like DOS file):
463 // \B1 ALPHA ":/" *(wchar / "\") \B1
465 // The productions use the following auxiliary rules.
467 // local-part = atom *("." atom)
468 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
469 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
471 // domain = label *("." label)
472 // label = alphanum [*(alphanum / "-") alphanum]
473 // alphanum = ALPHA / DIGIT
474 // wchar = <any uric character (ignoring the escaped rule), or "%", or
475 // a letter or digit (according to rCharClass)>
477 // "\B1" (boundary 1) stands for the beginning or end of the block of text,
478 // or a character that is neither (a) a letter or digit (according to
479 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
480 // (FIXME: What was the rationale for this set of punctuation characters?)
482 // "\B2" (boundary 2) stands for the beginning or end of the block of text,
483 // or a character that is neither (a) a letter or digit (according to
484 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
485 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
486 // 822 <atom> character, or "@" from \B1's set above).
488 // Productions 1--4, and 6--8 try to find a maximum-length match, but they
489 // stop at the first <wchar> character that is a "\B1" character which is
490 // only followed by "\B1" characters (taking "\" and "|" characters into
491 // account appropriately). Production 5 simply tries to find a maximum-
494 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9
497 // Productions 6--9 are only applicable if the FSYS_DOS bit is set in
500 bool bBoundary1
= true;
501 bool bBoundary2
= true;
502 for (sal_Int32 nPos
= rBegin
; nPos
!= rEnd
; nPos
= nextChar(rText
, nPos
))
504 sal_Unicode c
= rText
[nPos
];
507 if (rtl::isAsciiAlpha(c
))
510 INetProtocol eScheme
= INetURLObject::CompareProtocolScheme(rText
.copy(i
, rEnd
- i
));
511 if (eScheme
== INetProtocol::File
) // 2nd
513 while (rText
[i
++] != ':') ;
514 sal_Int32 nPrefixEnd
= i
;
515 sal_Int32 nUriEnd
= i
;
517 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
, true,
519 if (i
!= nPrefixEnd
&& i
!= rEnd
&& rText
[i
] == '#')
523 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
525 if (nUriEnd
!= nPrefixEnd
526 && isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
))
528 INetURLObject
aUri(rText
.copy(nPos
, nUriEnd
- nPos
),
529 INetProtocol::File
, eMechanism
, eCharset
,
531 if (!aUri
.HasError())
536 aUri
.GetMainURL(INetURLObject::DECODE_TO_IURI
);
540 else if (eScheme
!= INetProtocol::NotValid
) // 1st
542 while (rText
[i
++] != ':') ;
543 sal_Int32 nPrefixEnd
= i
;
544 sal_Int32 nUriEnd
= i
;
546 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
547 if (i
!= nPrefixEnd
&& i
!= rEnd
&& rText
[i
] == '#')
551 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
553 if (nUriEnd
!= nPrefixEnd
554 && (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
)
555 || rText
[nUriEnd
] == '\\'))
557 INetURLObject
aUri(rText
.copy(nPos
, nUriEnd
- nPos
),
558 INetProtocol::Http
, eMechanism
,
560 if (!aUri
.HasError())
565 aUri
.GetMainURL(INetURLObject::DECODE_TO_IURI
);
572 sal_uInt32 nLabels
= scanDomain(rText
, &i
, rEnd
);
574 && rText
[nPos
+ 3] == '.'
575 && (((rText
[nPos
] == 'w'
576 || rText
[nPos
] == 'W')
577 && (rText
[nPos
+ 1] == 'w'
578 || rText
[nPos
+ 1] == 'W')
579 && (rText
[nPos
+ 2] == 'w'
580 || rText
[nPos
+ 2] == 'W'))
581 || ((rText
[nPos
] == 'f'
582 || rText
[nPos
] == 'F')
583 && (rText
[nPos
+ 1] == 't'
584 || rText
[nPos
+ 1] == 'T')
585 && (rText
[nPos
+ 2] == 'p'
586 || rText
[nPos
+ 2] == 'P'))))
587 // (note that rText.GetChar(nPos + 3) is guaranteed to be
590 sal_Int32 nUriEnd
= i
;
591 if (i
!= rEnd
&& rText
[i
] == '/')
595 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
597 if (i
!= rEnd
&& rText
[i
] == '#')
601 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
603 if (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
)
604 || rText
[nUriEnd
] == '\\')
606 INetURLObject
aUri(rText
.copy(nPos
, nUriEnd
- nPos
),
607 INetProtocol::Http
, eMechanism
,
609 if (!aUri
.HasError())
614 aUri
.GetMainURL(INetURLObject::DECODE_TO_IURI
);
619 if ((eStyle
& INetURLObject::FSYS_DOS
) != 0 && rEnd
- nPos
>= 3
620 && rText
[nPos
+ 1] == ':'
621 && (rText
[nPos
+ 2] == '/'
622 || rText
[nPos
+ 2] == '\\')) // 7th, 8th
625 sal_Int32 nUriEnd
= i
;
627 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
628 if (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
))
630 INetURLObject
aUri(rText
.copy(nPos
, nUriEnd
- nPos
),
632 INetURLObject::ENCODE_ALL
,
633 RTL_TEXTENCODING_UTF8
,
634 INetURLObject::FSYS_DOS
);
635 if (!aUri
.HasError())
640 aUri
.GetMainURL(INetURLObject::DECODE_TO_IURI
);
645 else if ((eStyle
& INetURLObject::FSYS_DOS
) != 0 && rEnd
- nPos
>= 2
646 && rText
[nPos
] == '\\'
647 && rText
[nPos
+ 1] == '\\') // 6th
649 sal_Int32 i
= nPos
+ 2;
650 sal_uInt32 nLabels
= scanDomain(rText
, &i
, rEnd
);
651 if (nLabels
>= 1 && i
!= rEnd
&& rText
[i
] == '\\')
653 sal_Int32 nUriEnd
= ++i
;
655 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
,
657 if (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
))
659 INetURLObject
aUri(rText
.copy(nPos
, nUriEnd
- nPos
),
661 INetURLObject::ENCODE_ALL
,
662 RTL_TEXTENCODING_UTF8
,
663 INetURLObject::FSYS_DOS
);
664 if (!aUri
.HasError())
669 aUri
.GetMainURL(INetURLObject::DECODE_TO_IURI
);
675 if (bBoundary2
&& INetMIME::isAtomChar(c
)) // 5th
678 for (sal_Int32 i
= nPos
+ 1; i
!= rEnd
; ++i
)
680 sal_Unicode c2
= rText
[i
];
681 if (INetMIME::isAtomChar(c2
))
692 sal_uInt32 nLabels
= scanDomain(rText
, &i
, rEnd
);
694 && isBoundary1(rCharClass
, rText
, i
, rEnd
))
696 INetURLObject
aUri(rText
.copy(nPos
, i
- nPos
),
697 INetProtocol::Mailto
,
698 INetURLObject::ENCODE_ALL
);
699 if (!aUri
.HasError())
703 return aUri
.GetMainURL(
704 INetURLObject::DECODE_TO_IURI
);
712 bBoundary1
= isBoundary1(rCharClass
, rText
, nPos
, rEnd
);
713 bBoundary2
= isBoundary2(rCharClass
, rText
, nPos
, rEnd
);
719 OUString
URIHelper::removePassword(OUString
const & rURI
,
720 INetURLObject::EncodeMechanism eEncodeMechanism
,
721 INetURLObject::DecodeMechanism eDecodeMechanism
,
722 rtl_TextEncoding eCharset
)
724 INetURLObject
aObj(rURI
, eEncodeMechanism
, eCharset
);
725 return aObj
.HasError() ?
727 aObj
.GetURLNoPass(eDecodeMechanism
, eCharset
);
730 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */