1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <string_view>
23 #include <sal/config.h>
25 #include <unicode/idna.h>
27 #include <svl/urihelper.hxx>
28 #include <com/sun/star/ucb/Command.hpp>
29 #include <com/sun/star/ucb/IllegalIdentifierException.hpp>
30 #include <com/sun/star/ucb/UniversalContentBroker.hpp>
31 #include <com/sun/star/ucb/UnsupportedCommandException.hpp>
32 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
33 #include <com/sun/star/ucb/XCommandProcessor.hpp>
34 #include <com/sun/star/ucb/XContent.hpp>
35 #include <com/sun/star/ucb/XUniversalContentBroker.hpp>
36 #include <com/sun/star/uno/Any.hxx>
37 #include <com/sun/star/uno/Exception.hpp>
38 #include <com/sun/star/uno/Reference.hxx>
39 #include <com/sun/star/uno/RuntimeException.hpp>
40 #include <com/sun/star/uno/XComponentContext.hpp>
41 #include <com/sun/star/uri/UriReferenceFactory.hpp>
42 #include <com/sun/star/uri/XUriReference.hpp>
43 #include <com/sun/star/uri/XUriReferenceFactory.hpp>
44 #include <comphelper/processfactory.hxx>
45 #include <osl/diagnose.h>
46 #include <rtl/character.hxx>
47 #include <rtl/ustrbuf.hxx>
48 #include <rtl/ustring.hxx>
49 #include <sal/types.h>
50 #include <sal/log.hxx>
51 #include <tools/inetmime.hxx>
52 #include <unotools/charclass.hxx>
54 using namespace com::sun::star
;
56 OUString
URIHelper::SmartRel2Abs(INetURLObject
const & rTheBaseURIRef
,
57 OUString
const & rTheRelURIRef
,
58 Link
<OUString
*, bool> const & rMaybeFileHdl
,
59 bool bCheckFileExists
,
61 INetURLObject::EncodeMechanism eEncodeMechanism
,
62 INetURLObject::DecodeMechanism eDecodeMechanism
,
63 rtl_TextEncoding eCharset
,
66 // Backwards compatibility:
67 if( rTheRelURIRef
.startsWith("#") )
70 INetURLObject aAbsURIRef
;
71 if (rTheBaseURIRef
.HasError())
72 aAbsURIRef
. SetSmartURL(rTheRelURIRef
, eEncodeMechanism
, eCharset
, eStyle
);
76 aAbsURIRef
= rTheBaseURIRef
.smartRel2Abs(rTheRelURIRef
,
81 false/*bRelativeNonURIs*/,
85 && (aAbsURIRef
.GetProtocol() == INetProtocol::File
))
87 INetURLObject aNonFileURIRef
;
88 aNonFileURIRef
.SetSmartURL(rTheRelURIRef
,
92 if (!aNonFileURIRef
.HasError()
93 && aNonFileURIRef
.GetProtocol() != INetProtocol::File
)
95 bool bMaybeFile
= false;
96 if (rMaybeFileHdl
.IsSet())
98 OUString
aFilePath(rTheRelURIRef
);
99 bMaybeFile
= rMaybeFileHdl
.Call(&aFilePath
);
102 aAbsURIRef
= std::move(aNonFileURIRef
);
106 return aAbsURIRef
.GetMainURL(eDecodeMechanism
, eCharset
);
109 namespace { Link
<OUString
*, bool> gMaybeFileHdl
; }
111 void URIHelper::SetMaybeFileHdl(Link
<OUString
*, bool> const & rTheMaybeFileHdl
)
113 gMaybeFileHdl
= rTheMaybeFileHdl
;
116 Link
<OUString
*, bool> const & URIHelper::GetMaybeFileHdl()
118 return gMaybeFileHdl
;
123 bool isAbsoluteHierarchicalUriReference(
124 css::uno::Reference
< css::uri::XUriReference
> const & uriReference
)
126 return uriReference
.is() && uriReference
->isAbsolute()
127 && !uriReference
->hasRelativePath();
130 // To improve performance, assume that if for any prefix URL of a given
131 // hierarchical URL either a UCB content cannot be created, or the UCB content
132 // does not support the getCasePreservingURL command, then this will hold for
133 // any other prefix URL of the given URL, too:
134 enum Result
{ Success
, GeneralFailure
, SpecificFailure
};
136 Result
normalizePrefix( css::uno::Reference
< css::ucb::XUniversalContentBroker
> const & broker
,
137 OUString
const & uri
, OUString
* normalized
)
139 assert(broker
.is() && normalized
!= nullptr);
140 css::uno::Reference
< css::ucb::XContent
> content
;
142 content
= broker
->queryContent(broker
->createContentIdentifier(uri
));
143 } catch (css::ucb::IllegalIdentifierException
&) {}
145 return GeneralFailure
;
149 (css::uno::Reference
< css::ucb::XCommandProcessor
>(
150 content
, css::uno::UNO_QUERY_THROW
)->execute(
151 css::ucb::Command(u
"getCasePreservingURL"_ustr
,
152 -1, css::uno::Any()),
154 css::uno::Reference
< css::ucb::XCommandEnvironment
>())
157 } catch (css::uno::RuntimeException
&) {
159 } catch (css::ucb::UnsupportedCommandException
&) {
160 return GeneralFailure
;
161 } catch (css::uno::Exception
&) {
162 return SpecificFailure
;
168 css::uno::Reference
< css::ucb::XUniversalContentBroker
> const & broker
,
169 css::uno::Reference
< css::uri::XUriReferenceFactory
> const & uriFactory
,
170 OUString
const & uriReference
)
172 // normalizePrefix can potentially fail (a typically example being a file
173 // URL that denotes a non-existing resource); in such a case, try to
174 // normalize as long a prefix of the given URL as possible (i.e., normalize
175 // all the existing directories within the path):
177 sal_Int32 n
= uriReference
.indexOf('#');
178 normalized
= n
== -1 ? uriReference
: uriReference
.copy(0, n
);
179 switch (normalizePrefix(broker
, normalized
, &normalized
)) {
181 return n
== -1 ? normalized
: normalized
+ uriReference
.subView(n
);
184 case SpecificFailure
:
188 css::uno::Reference
< css::uri::XUriReference
> ref(
189 uriFactory
->parse(uriReference
));
190 if (!isAbsoluteHierarchicalUriReference(ref
)) {
193 sal_Int32 count
= ref
->getPathSegmentCount();
197 OUStringBuffer
head(ref
->getScheme());
199 if (ref
->hasAuthority()) {
200 head
.append("//" + ref
->getAuthority());
202 for (sal_Int32 i
= count
- 1; i
> 0; --i
) {
203 OUStringBuffer
buf(head
);
204 for (sal_Int32 j
= 0; j
< i
; ++j
) {
206 buf
.append(ref
->getPathSegment(j
));
208 normalized
= buf
.makeStringAndClear();
209 if (normalizePrefix(broker
, normalized
, &normalized
) != SpecificFailure
)
211 buf
.append(normalized
);
212 css::uno::Reference
< css::uri::XUriReference
> preRef(
213 uriFactory
->parse(normalized
));
214 if (!isAbsoluteHierarchicalUriReference(preRef
)) {
215 // This could only happen if something is inconsistent:
218 sal_Int32 preCount
= preRef
->getPathSegmentCount();
219 // normalizePrefix may have added or removed a final slash:
221 if (preCount
== i
- 1) {
223 } else if (preCount
- 1 == i
&& !buf
.isEmpty()
224 && buf
[buf
.getLength() - 1] == '/')
226 buf
.setLength(buf
.getLength() - 1);
228 // This could only happen if something is inconsistent:
232 for (sal_Int32 j
= i
; j
< count
; ++j
) {
234 buf
.append(ref
->getPathSegment(j
));
236 if (ref
->hasQuery()) {
238 buf
.append(ref
->getQuery());
240 if (ref
->hasFragment()) {
242 buf
.append(ref
->getFragment());
244 return buf
.makeStringAndClear();
252 css::uno::Reference
< css::uri::XUriReference
>
253 URIHelper::normalizedMakeRelative(
254 css::uno::Reference
< css::uno::XComponentContext
> const & context
,
255 OUString
const & baseUriReference
, OUString
const & uriReference
)
257 OSL_ASSERT(context
.is());
258 css::uno::Reference
< css::ucb::XUniversalContentBroker
> broker(
259 css::ucb::UniversalContentBroker::create(context
));
260 css::uno::Reference
< css::uri::XUriReferenceFactory
> uriFactory(
261 css::uri::UriReferenceFactory::create(context
));
262 return uriFactory
->makeRelative(
263 uriFactory
->parse(normalize(broker
, uriFactory
, baseUriReference
)),
264 uriFactory
->parse(normalize(broker
, uriFactory
, uriReference
)), true,
268 OUString
URIHelper::simpleNormalizedMakeRelative(
269 OUString
const & baseUriReference
, OUString
const & uriReference
)
271 css::uno::Reference
< css::uri::XUriReference
> rel(
272 URIHelper::normalizedMakeRelative(
273 comphelper::getProcessComponentContext(), baseUriReference
,
275 return rel
.is() ? rel
->getUriReference() : uriReference
;
279 // FindFirstURLInText
284 sal_Int32
nextChar(std::u16string_view rStr
, sal_Int32 nPos
)
286 return rtl::isHighSurrogate(rStr
[nPos
])
287 && rStr
.size() - nPos
>= 2
288 && rtl::isLowSurrogate(rStr
[nPos
+ 1]) ?
292 bool isBoundary1(CharClass
const & rCharClass
, OUString
const & rStr
,
293 sal_Int32 nPos
, sal_Int32 nEnd
)
297 if (rCharClass
.isLetterNumeric(rStr
, nPos
))
314 bool isBoundary2(CharClass
const & rCharClass
, OUString
const & rStr
,
315 sal_Int32 nPos
, sal_Int32 nEnd
)
319 if (rCharClass
.isLetterNumeric(rStr
, nPos
))
349 // tdf#145381 Added MatchingBracketDepth counter to detect matching closing
350 // brackets that are part of the uri
351 bool checkWChar(CharClass
const & rCharClass
, OUString
const & rStr
,
352 sal_Int32
* pPos
, sal_Int32
* pEnd
,
353 sal_Int32
* pMatchingBracketDepth
= nullptr,
354 bool bBackslash
= false, bool bPipe
= false)
356 sal_Unicode c
= rStr
[*pPos
];
359 static sal_uInt8
const aMap
[128]
360 = { 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 0, 0, 0, 0, 0, 0, 0,
364 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
365 5, 6, 1, 1, 1, 4, 1, 4, // ()*+,-./
366 4, 4, 4, 4, 4, 4, 4, 4, // 01234567
367 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
368 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
369 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO
370 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW
371 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_
372 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg
373 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno
374 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw
375 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
403 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
408 case 5: // opening bracket
410 if(nullptr != pMatchingBracketDepth
)
411 ++(*pMatchingBracketDepth
);
414 case 6: // closing bracket
416 if(nullptr != pMatchingBracketDepth
&& *pMatchingBracketDepth
> 0)
418 --(*pMatchingBracketDepth
);
419 // tdf#145381 When there was an opening bracket, detect this closing bracket
420 // as part of the uri
427 else if (rCharClass
.isLetterNumeric(rStr
, *pPos
))
429 *pEnd
= *pPos
= nextChar(rStr
, *pPos
);
436 sal_uInt32
scanDomain(OUString
const & rStr
, sal_Int32
* pPos
,
439 sal_Unicode
const * pBuffer
= rStr
.getStr();
440 sal_Unicode
const * p
= pBuffer
+ *pPos
;
441 sal_uInt32 nLabels
= INetURLObject::scanDomain(p
, pBuffer
+ nEnd
, false);
442 *pPos
= sal::static_int_cast
< sal_Int32
>(p
- pBuffer
);
448 OUString
URIHelper::FindFirstURLInText(OUString
const & rText
,
451 CharClass
const & rCharClass
,
452 INetURLObject::EncodeMechanism eMechanism
,
453 rtl_TextEncoding eCharset
)
455 if (rBegin
> rEnd
|| rEnd
> rText
.getLength())
458 // Search for the first substring of [rBegin..rEnd[ that matches any of the
459 // following productions (for which the appropriate style bit is set in
460 // eStyle, if applicable).
462 // 1st Production (known scheme):
463 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
466 // 2nd Production (file):
467 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
469 // 3rd Production (ftp):
470 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
472 // 4th Production (http):
473 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
475 // 5th Production (mailto):
476 // \B2 local-part "@" domain \B1
478 // 6th Production (UNC file):
479 // \B1 "\\" domain "\" *(wchar / "\") \B1
481 // 7th Production (DOS file):
482 // \B1 ALPHA ":\" *(wchar / "\") \B1
484 // 8th Production (Unix-like DOS file):
485 // \B1 ALPHA ":/" *(wchar / "\") \B1
487 // The productions use the following auxiliary rules.
489 // local-part = atom *("." atom)
490 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
491 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
493 // domain = label *("." label)
494 // label = alphanum [*(alphanum / "-") alphanum]
495 // alphanum = ALPHA / DIGIT
496 // wchar = <any uric character (ignoring the escaped rule), or "%", or
497 // a letter or digit (according to rCharClass)>
499 // "\B1" (boundary 1) stands for the beginning or end of the block of text,
500 // or a character that is neither (a) a letter or digit (according to
501 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
502 // (FIXME: What was the rationale for this set of punctuation characters?)
504 // "\B2" (boundary 2) stands for the beginning or end of the block of text,
505 // or a character that is neither (a) a letter or digit (according to
506 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
507 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
508 // 822 <atom> character, or "@" from \B1's set above).
510 // Productions 1--4, and 6--8 try to find a maximum-length match, but they
511 // stop at the first <wchar> character that is a "\B1" character which is
512 // only followed by "\B1" characters (taking "\" and "|" characters into
513 // account appropriately). Production 5 simply tries to find a maximum-
516 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9
517 // use EncodeMechanism::All.
519 // Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
522 // tdf#145381: In addition to the productions I added a mechanism to detect
523 // matching brackets. The task presents the case of an url that ends on a
524 // closing bracket. This needs to be detected as part of the uri in the case
525 // that a matching opening bracket exists.
527 bool bBoundary1
= true;
528 bool bBoundary2
= true;
529 for (sal_Int32 nPos
= rBegin
; nPos
!= rEnd
; nPos
= nextChar(rText
, nPos
))
531 sal_Unicode c
= rText
[nPos
];
534 if (rtl::isAsciiAlpha(c
))
537 INetProtocol eScheme
= INetURLObject::CompareProtocolScheme(rText
.subView(i
, rEnd
- i
));
538 if (eScheme
== INetProtocol::File
) // 2nd
540 while (rText
[i
++] != ':') ;
541 sal_Int32 nPrefixEnd
= i
;
542 sal_Int32 nUriEnd
= i
;
544 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
, nullptr, true,
546 if (i
!= nPrefixEnd
&& i
!= rEnd
&& rText
[i
] == '#')
550 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
552 if (nUriEnd
!= nPrefixEnd
553 && isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
))
555 INetURLObject
aUri(rText
.subView(nPos
, nUriEnd
- nPos
),
556 INetProtocol::File
, eMechanism
, eCharset
,
558 if (!aUri
.HasError())
563 aUri
.GetMainURL(INetURLObject::DecodeMechanism::ToIUri
);
567 else if (eScheme
!= INetProtocol::NotValid
) // 1st
569 while (rText
[i
++] != ':') ;
570 sal_Int32 nPrefixEnd
= i
;
571 sal_Int32 nUriEnd
= i
;
572 sal_Int32 nMatchingBracketDepth
= 0;
574 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
,
575 &nMatchingBracketDepth
)) ;
576 if (i
!= nPrefixEnd
&& i
!= rEnd
&& rText
[i
] == '#')
580 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
582 if (nUriEnd
!= nPrefixEnd
583 && (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
)
584 || rText
[nUriEnd
] == '\\'))
586 INetURLObject
aUri(rText
.subView(nPos
, nUriEnd
- nPos
),
587 INetProtocol::Http
, eMechanism
,
589 if (!aUri
.HasError())
594 aUri
.GetMainURL(INetURLObject::DecodeMechanism::ToIUri
);
601 sal_uInt32 nLabels
= scanDomain(rText
, &i
, rEnd
);
603 && rText
[nPos
+ 3] == '.'
604 && (((rText
[nPos
] == 'w'
605 || rText
[nPos
] == 'W')
606 && (rText
[nPos
+ 1] == 'w'
607 || rText
[nPos
+ 1] == 'W')
608 && (rText
[nPos
+ 2] == 'w'
609 || rText
[nPos
+ 2] == 'W'))
610 || ((rText
[nPos
] == 'f'
611 || rText
[nPos
] == 'F')
612 && (rText
[nPos
+ 1] == 't'
613 || rText
[nPos
+ 1] == 'T')
614 && (rText
[nPos
+ 2] == 'p'
615 || rText
[nPos
+ 2] == 'P'))))
616 // (note that rText.GetChar(nPos + 3) is guaranteed to be
619 sal_Int32 nUriEnd
= i
;
620 if (i
!= rEnd
&& rText
[i
] == '/')
624 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
626 if (i
!= rEnd
&& rText
[i
] == '#')
630 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
632 if (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
)
633 || rText
[nUriEnd
] == '\\')
635 INetURLObject
aUri(rText
.subView(nPos
, nUriEnd
- nPos
),
636 INetProtocol::Http
, eMechanism
,
638 if (!aUri
.HasError())
643 aUri
.GetMainURL(INetURLObject::DecodeMechanism::ToIUri
);
649 && rText
[nPos
+ 1] == ':'
650 && (rText
[nPos
+ 2] == '/'
651 || rText
[nPos
+ 2] == '\\')) // 7th, 8th
654 sal_Int32 nUriEnd
= i
;
656 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
)) ;
657 if (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
))
659 INetURLObject
aUri(rText
.subView(nPos
, nUriEnd
- nPos
),
661 INetURLObject::EncodeMechanism::All
,
662 RTL_TEXTENCODING_UTF8
,
664 if (!aUri
.HasError())
669 aUri
.GetMainURL(INetURLObject::DecodeMechanism::ToIUri
);
674 else if (rEnd
- nPos
>= 2
675 && rText
[nPos
] == '\\'
676 && rText
[nPos
+ 1] == '\\') // 6th
678 sal_Int32 i
= nPos
+ 2;
679 sal_uInt32 nLabels
= scanDomain(rText
, &i
, rEnd
);
680 if (nLabels
>= 1 && i
!= rEnd
&& rText
[i
] == '\\')
682 sal_Int32 nUriEnd
= ++i
;
684 && checkWChar(rCharClass
, rText
, &i
, &nUriEnd
,
686 if (isBoundary1(rCharClass
, rText
, nUriEnd
, rEnd
))
688 INetURLObject
aUri(rText
.subView(nPos
, nUriEnd
- nPos
),
690 INetURLObject::EncodeMechanism::All
,
691 RTL_TEXTENCODING_UTF8
,
693 if (!aUri
.HasError())
698 aUri
.GetMainURL(INetURLObject::DecodeMechanism::ToIUri
);
704 if (bBoundary2
&& INetMIME::isAtomChar(c
)) // 5th
707 for (sal_Int32 i
= nPos
+ 1; i
!= rEnd
; ++i
)
709 sal_Unicode c2
= rText
[i
];
710 if (INetMIME::isAtomChar(c2
))
721 sal_uInt32 nLabels
= scanDomain(rText
, &i
, rEnd
);
723 && isBoundary1(rCharClass
, rText
, i
, rEnd
))
725 INetURLObject
aUri(rText
.subView(nPos
, i
- nPos
),
726 INetProtocol::Mailto
,
727 INetURLObject::EncodeMechanism::All
);
728 if (!aUri
.HasError())
732 return aUri
.GetMainURL(
733 INetURLObject::DecodeMechanism::ToIUri
);
741 bBoundary1
= isBoundary1(rCharClass
, rText
, nPos
, rEnd
);
742 bBoundary2
= isBoundary2(rCharClass
, rText
, nPos
, rEnd
);
748 OUString
URIHelper::FindFirstDOIInText(std::u16string_view rText
,
751 CharClass
const & rCharClass
)
753 if (rBegin
> rEnd
|| rEnd
> static_cast<sal_Int32
>(rText
.size()))
757 sal_Int32 count
= rEnd
-rBegin
;
758 OUString
candidate(rText
.substr(rBegin
, count
));
759 // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
760 if (candidate
.startsWithIgnoreAsciiCase("doi:10."))
764 for (sal_Int32 i
=start
; i
<count
; i
++)
766 sal_Unicode c
= candidate
[i
];
767 // Match 4 to 9 digits before slash
776 if ( rCharClass
.isDigit(candidate
,i
) )
780 else if (c
=='/' && digit
>=4 && i
<count
-1)
790 // Match [-._;()\/:a-zA-Z0-9] after slash
791 else if (!( rCharClass
.isAlphaNumeric(candidate
, i
) || c
== '.' || c
== '-' || c
=='_' ||
792 c
==';' || c
=='(' || c
==')' || c
=='\\' || (c
=='/' && i
<count
-1) || c
==':'))
798 if (flag
&& digit
==-1)
800 return OUString::Concat("https://doi.org/")+candidate
.subView(4);
807 OUString
URIHelper::removePassword(OUString
const & rURI
,
808 INetURLObject::EncodeMechanism eEncodeMechanism
,
809 INetURLObject::DecodeMechanism eDecodeMechanism
,
810 rtl_TextEncoding eCharset
)
812 INetURLObject
aObj(rURI
, eEncodeMechanism
, eCharset
);
813 return aObj
.HasError() ?
815 aObj
.GetURLNoPass(eDecodeMechanism
, eCharset
);
818 OUString
URIHelper::resolveIdnaHost(OUString
const & url
) {
819 css::uno::Reference
<css::uri::XUriReference
> uri(
820 css::uri::UriReferenceFactory::create(
821 comphelper::getProcessComponentContext())
823 if (!(uri
.is() && uri
->hasAuthority())) {
826 auto auth(uri
->getAuthority());
829 sal_Int32 hostStart
= auth
.indexOf('@') + 1;
830 sal_Int32 hostEnd
= auth
.getLength();
831 while (hostEnd
> hostStart
&& rtl::isAsciiDigit(auth
[hostEnd
- 1])) {
834 if (hostEnd
> hostStart
&& auth
[hostEnd
- 1] == ':') {
837 hostEnd
= auth
.getLength();
839 auto asciiOnly
= true;
840 for (auto i
= hostStart
; i
!= hostEnd
; ++i
) {
841 if (!rtl::isAscii(auth
[i
])) {
847 // Avoid icu::IDNA case normalization in purely non-IDNA domain names:
850 UErrorCode e
= U_ZERO_ERROR
;
851 std::unique_ptr
<icu::IDNA
> idna(
852 icu::IDNA::createUTS46Instance(
853 (UIDNA_USE_STD3_RULES
| UIDNA_CHECK_BIDI
| UIDNA_CHECK_CONTEXTJ
| UIDNA_CHECK_CONTEXTO
),
856 SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e
);
859 icu::UnicodeString ascii
;
863 reinterpret_cast<UChar
const *>(auth
.getStr() + hostStart
),
864 hostEnd
- hostStart
),
866 if (U_FAILURE(e
) || info
.hasErrors()) {
869 OUStringBuffer
buf(uri
->getScheme());
870 buf
.append(OUString::Concat("://") + auth
.subView(0, hostStart
));
872 reinterpret_cast<sal_Unicode
const *>(ascii
.getBuffer()),
874 buf
.append(auth
.subView(hostEnd
) + uri
->getPath());
875 if (uri
->hasQuery()) {
876 buf
.append("?" + uri
->getQuery());
878 if (uri
->hasFragment()) {
879 buf
.append("#" + uri
->getFragment());
881 return buf
.makeStringAndClear();
884 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */