OInterfaceContainerHelper3 needs to be thread-safe
[LibreOffice.git] / svl / source / misc / urihelper.cxx
blob04d168c4d9764b3be07dd562ef3bad8f69c324fd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <string_view>
23 #include <sal/config.h>
25 #include <unicode/idna.h>
27 #include <svl/urihelper.hxx>
28 #include <com/sun/star/ucb/Command.hpp>
29 #include <com/sun/star/ucb/IllegalIdentifierException.hpp>
30 #include <com/sun/star/ucb/UniversalContentBroker.hpp>
31 #include <com/sun/star/ucb/UnsupportedCommandException.hpp>
32 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
33 #include <com/sun/star/ucb/XCommandProcessor.hpp>
34 #include <com/sun/star/ucb/XContent.hpp>
35 #include <com/sun/star/ucb/XUniversalContentBroker.hpp>
36 #include <com/sun/star/uno/Any.hxx>
37 #include <com/sun/star/uno/Exception.hpp>
38 #include <com/sun/star/uno/Reference.hxx>
39 #include <com/sun/star/uno/RuntimeException.hpp>
40 #include <com/sun/star/uno/XComponentContext.hpp>
41 #include <com/sun/star/uri/UriReferenceFactory.hpp>
42 #include <com/sun/star/uri/XUriReference.hpp>
43 #include <com/sun/star/uri/XUriReferenceFactory.hpp>
44 #include <comphelper/processfactory.hxx>
45 #include <osl/diagnose.h>
46 #include <rtl/character.hxx>
47 #include <rtl/instance.hxx>
48 #include <rtl/ustrbuf.hxx>
49 #include <rtl/ustring.hxx>
50 #include <sal/types.h>
51 #include <sal/log.hxx>
52 #include <tools/inetmime.hxx>
53 #include <unotools/charclass.hxx>
55 using namespace com::sun::star;
57 OUString URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
58 OUString const & rTheRelURIRef,
59 Link<OUString *, bool> const & rMaybeFileHdl,
60 bool bCheckFileExists,
61 bool bIgnoreFragment,
62 INetURLObject::EncodeMechanism eEncodeMechanism,
63 INetURLObject::DecodeMechanism eDecodeMechanism,
64 rtl_TextEncoding eCharset,
65 FSysStyle eStyle)
67 // Backwards compatibility:
68 if( rTheRelURIRef.startsWith("#") )
69 return rTheRelURIRef;
71 INetURLObject aAbsURIRef;
72 if (rTheBaseURIRef.HasError())
73 aAbsURIRef. SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
74 else
76 bool bWasAbsolute;
77 aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
78 bWasAbsolute,
79 bIgnoreFragment,
80 eEncodeMechanism,
81 eCharset,
82 false/*bRelativeNonURIs*/,
83 eStyle);
84 if (bCheckFileExists
85 && !bWasAbsolute
86 && (aAbsURIRef.GetProtocol() == INetProtocol::File))
88 INetURLObject aNonFileURIRef;
89 aNonFileURIRef.SetSmartURL(rTheRelURIRef,
90 eEncodeMechanism,
91 eCharset,
92 eStyle);
93 if (!aNonFileURIRef.HasError()
94 && aNonFileURIRef.GetProtocol() != INetProtocol::File)
96 bool bMaybeFile = false;
97 if (rMaybeFileHdl.IsSet())
99 OUString aFilePath(rTheRelURIRef);
100 bMaybeFile = rMaybeFileHdl.Call(&aFilePath);
102 if (!bMaybeFile)
103 aAbsURIRef = aNonFileURIRef;
107 return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
110 namespace { struct MaybeFileHdl : public rtl::Static< Link<OUString *, bool>, MaybeFileHdl > {}; }
112 void URIHelper::SetMaybeFileHdl(Link<OUString *, bool> const & rTheMaybeFileHdl)
114 MaybeFileHdl::get() = rTheMaybeFileHdl;
117 Link<OUString *, bool> const & URIHelper::GetMaybeFileHdl()
119 return MaybeFileHdl::get();
122 namespace {
124 bool isAbsoluteHierarchicalUriReference(
125 css::uno::Reference< css::uri::XUriReference > const & uriReference)
127 return uriReference.is() && uriReference->isAbsolute()
128 && !uriReference->hasRelativePath();
131 // To improve performance, assume that if for any prefix URL of a given
132 // hierarchical URL either a UCB content cannot be created, or the UCB content
133 // does not support the getCasePreservingURL command, then this will hold for
134 // any other prefix URL of the given URL, too:
135 enum Result { Success, GeneralFailure, SpecificFailure };
137 Result normalizePrefix( css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
138 OUString const & uri, OUString * normalized)
140 OSL_ASSERT(broker.is() && normalized != nullptr);
141 css::uno::Reference< css::ucb::XContent > content;
142 try {
143 content = broker->queryContent(broker->createContentIdentifier(uri));
144 } catch (css::ucb::IllegalIdentifierException &) {}
145 if (!content.is()) {
146 return GeneralFailure;
148 try {
149 bool ok =
150 (css::uno::Reference< css::ucb::XCommandProcessor >(
151 content, css::uno::UNO_QUERY_THROW)->execute(
152 css::ucb::Command("getCasePreservingURL",
153 -1, css::uno::Any()),
155 css::uno::Reference< css::ucb::XCommandEnvironment >())
156 >>= *normalized);
157 OSL_ASSERT(ok);
158 } catch (css::uno::RuntimeException &) {
159 throw;
160 } catch (css::ucb::UnsupportedCommandException &) {
161 return GeneralFailure;
162 } catch (css::uno::Exception &) {
163 return SpecificFailure;
165 return Success;
168 OUString normalize(
169 css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
170 css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
171 OUString const & uriReference)
173 // normalizePrefix can potentially fail (a typically example being a file
174 // URL that denotes a non-existing resource); in such a case, try to
175 // normalize as long a prefix of the given URL as possible (i.e., normalize
176 // all the existing directories within the path):
177 OUString normalized;
178 sal_Int32 n = uriReference.indexOf('#');
179 normalized = n == -1 ? uriReference : uriReference.copy(0, n);
180 switch (normalizePrefix(broker, normalized, &normalized)) {
181 case Success:
182 return n == -1 ? normalized : normalized + uriReference.subView(n);
183 case GeneralFailure:
184 return uriReference;
185 case SpecificFailure:
186 default:
187 break;
189 css::uno::Reference< css::uri::XUriReference > ref(
190 uriFactory->parse(uriReference));
191 if (!isAbsoluteHierarchicalUriReference(ref)) {
192 return uriReference;
194 sal_Int32 count = ref->getPathSegmentCount();
195 if (count < 2) {
196 return uriReference;
198 OUStringBuffer head(ref->getScheme());
199 head.append(':');
200 if (ref->hasAuthority()) {
201 head.append("//");
202 head.append(ref->getAuthority());
204 for (sal_Int32 i = count - 1; i > 0; --i) {
205 OUStringBuffer buf(head);
206 for (sal_Int32 j = 0; j < i; ++j) {
207 buf.append('/');
208 buf.append(ref->getPathSegment(j));
210 normalized = buf.makeStringAndClear();
211 if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
213 buf.append(normalized);
214 css::uno::Reference< css::uri::XUriReference > preRef(
215 uriFactory->parse(normalized));
216 if (!isAbsoluteHierarchicalUriReference(preRef)) {
217 // This could only happen if something is inconsistent:
218 break;
220 sal_Int32 preCount = preRef->getPathSegmentCount();
221 // normalizePrefix may have added or removed a final slash:
222 if (preCount != i) {
223 if (preCount == i - 1) {
224 buf.append('/');
225 } else if (preCount - 1 == i && !buf.isEmpty()
226 && buf[buf.getLength() - 1] == '/')
228 buf.setLength(buf.getLength() - 1);
229 } else {
230 // This could only happen if something is inconsistent:
231 break;
234 for (sal_Int32 j = i; j < count; ++j) {
235 buf.append('/');
236 buf.append(ref->getPathSegment(j));
238 if (ref->hasQuery()) {
239 buf.append('?');
240 buf.append(ref->getQuery());
242 if (ref->hasFragment()) {
243 buf.append('#');
244 buf.append(ref->getFragment());
246 return buf.makeStringAndClear();
249 return uriReference;
254 css::uno::Reference< css::uri::XUriReference >
255 URIHelper::normalizedMakeRelative(
256 css::uno::Reference< css::uno::XComponentContext > const & context,
257 OUString const & baseUriReference, OUString const & uriReference)
259 OSL_ASSERT(context.is());
260 css::uno::Reference< css::ucb::XUniversalContentBroker > broker(
261 css::ucb::UniversalContentBroker::create(context));
262 css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
263 css::uri::UriReferenceFactory::create(context));
264 return uriFactory->makeRelative(
265 uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
266 uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
267 true, false);
270 OUString URIHelper::simpleNormalizedMakeRelative(
271 OUString const & baseUriReference, OUString const & uriReference)
273 css::uno::Reference< css::uri::XUriReference > rel(
274 URIHelper::normalizedMakeRelative(
275 comphelper::getProcessComponentContext(), baseUriReference,
276 uriReference));
277 return rel.is() ? rel->getUriReference() : uriReference;
281 // FindFirstURLInText
284 namespace {
286 sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos)
288 return rtl::isHighSurrogate(rStr[nPos])
289 && rStr.getLength() - nPos >= 2
290 && rtl::isLowSurrogate(rStr[nPos + 1]) ?
291 nPos + 2 : nPos + 1;
294 bool isBoundary1(CharClass const & rCharClass, OUString const & rStr,
295 sal_Int32 nPos, sal_Int32 nEnd)
297 if (nPos == nEnd)
298 return true;
299 if (rCharClass.isLetterNumeric(rStr, nPos))
300 return false;
301 switch (rStr[nPos])
303 case '$':
304 case '%':
305 case '&':
306 case '-':
307 case '/':
308 case '@':
309 case '\\':
310 return false;
311 default:
312 return true;
316 bool isBoundary2(CharClass const & rCharClass, OUString const & rStr,
317 sal_Int32 nPos, sal_Int32 nEnd)
319 if (nPos == nEnd)
320 return true;
321 if (rCharClass.isLetterNumeric(rStr, nPos))
322 return false;
323 switch (rStr[nPos])
325 case '!':
326 case '#':
327 case '$':
328 case '%':
329 case '&':
330 case '\'':
331 case '*':
332 case '+':
333 case '-':
334 case '/':
335 case '=':
336 case '?':
337 case '@':
338 case '^':
339 case '_':
340 case '`':
341 case '{':
342 case '|':
343 case '}':
344 case '~':
345 return false;
346 default:
347 return true;
351 bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
352 sal_Int32 * pPos, sal_Int32 * pEnd, bool bBackslash = false,
353 bool bPipe = false)
355 sal_Unicode c = rStr[*pPos];
356 if (rtl::isAscii(c))
358 static sal_uInt8 const aMap[128]
359 = { 0, 0, 0, 0, 0, 0, 0, 0,
360 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
364 1, 1, 1, 1, 1, 4, 1, 4, // ()*+,-./
365 4, 4, 4, 4, 4, 4, 4, 4, // 01234567
366 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
367 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
368 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO
369 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW
370 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_
371 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg
372 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno
373 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw
374 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
375 switch (aMap[c])
377 default: // not uric
378 return false;
380 case 1: // uric
381 ++(*pPos);
382 return true;
384 case 2: // "\"
385 if (bBackslash)
387 *pEnd = ++(*pPos);
388 return true;
390 else
391 return false;
393 case 3: // "|"
394 if (bPipe)
396 *pEnd = ++(*pPos);
397 return true;
399 else
400 return false;
402 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
403 // isBoundary1)
404 *pEnd = ++(*pPos);
405 return true;
408 else if (rCharClass.isLetterNumeric(rStr, *pPos))
410 *pEnd = *pPos = nextChar(rStr, *pPos);
411 return true;
413 else
414 return false;
417 sal_uInt32 scanDomain(OUString const & rStr, sal_Int32 * pPos,
418 sal_Int32 nEnd)
420 sal_Unicode const * pBuffer = rStr.getStr();
421 sal_Unicode const * p = pBuffer + *pPos;
422 sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
423 *pPos = sal::static_int_cast< sal_Int32 >(p - pBuffer);
424 return nLabels;
429 OUString URIHelper::FindFirstURLInText(OUString const & rText,
430 sal_Int32 & rBegin,
431 sal_Int32 & rEnd,
432 CharClass const & rCharClass,
433 INetURLObject::EncodeMechanism eMechanism,
434 rtl_TextEncoding eCharset)
436 if (rBegin > rEnd || rEnd > rText.getLength())
437 return OUString();
439 // Search for the first substring of [rBegin..rEnd[ that matches any of the
440 // following productions (for which the appropriate style bit is set in
441 // eStyle, if applicable).
443 // 1st Production (known scheme):
444 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
445 // \B1
447 // 2nd Production (file):
448 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
450 // 3rd Production (ftp):
451 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
453 // 4th Production (http):
454 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
456 // 5th Production (mailto):
457 // \B2 local-part "@" domain \B1
459 // 6th Production (UNC file):
460 // \B1 "\\" domain "\" *(wchar / "\") \B1
462 // 7th Production (DOS file):
463 // \B1 ALPHA ":\" *(wchar / "\") \B1
465 // 8th Production (Unix-like DOS file):
466 // \B1 ALPHA ":/" *(wchar / "\") \B1
468 // The productions use the following auxiliary rules.
470 // local-part = atom *("." atom)
471 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
472 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
473 // / "~")
474 // domain = label *("." label)
475 // label = alphanum [*(alphanum / "-") alphanum]
476 // alphanum = ALPHA / DIGIT
477 // wchar = <any uric character (ignoring the escaped rule), or "%", or
478 // a letter or digit (according to rCharClass)>
480 // "\B1" (boundary 1) stands for the beginning or end of the block of text,
481 // or a character that is neither (a) a letter or digit (according to
482 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
483 // (FIXME: What was the rationale for this set of punctuation characters?)
485 // "\B2" (boundary 2) stands for the beginning or end of the block of text,
486 // or a character that is neither (a) a letter or digit (according to
487 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
488 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
489 // 822 <atom> character, or "@" from \B1's set above).
491 // Productions 1--4, and 6--8 try to find a maximum-length match, but they
492 // stop at the first <wchar> character that is a "\B1" character which is
493 // only followed by "\B1" characters (taking "\" and "|" characters into
494 // account appropriately). Production 5 simply tries to find a maximum-
495 // length match.
497 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9
498 // use EncodeMechanism::All.
500 // Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
501 // eStyle.
503 bool bBoundary1 = true;
504 bool bBoundary2 = true;
505 for (sal_Int32 nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
507 sal_Unicode c = rText[nPos];
508 if (bBoundary1)
510 if (rtl::isAsciiAlpha(c))
512 sal_Int32 i = nPos;
513 INetProtocol eScheme = INetURLObject::CompareProtocolScheme(rText.copy(i, rEnd - i));
514 if (eScheme == INetProtocol::File) // 2nd
516 while (rText[i++] != ':') ;
517 sal_Int32 nPrefixEnd = i;
518 sal_Int32 nUriEnd = i;
519 while (i != rEnd
520 && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
521 true)) ;
522 if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
524 ++i;
525 while (i != rEnd
526 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
528 if (nUriEnd != nPrefixEnd
529 && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
531 INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
532 INetProtocol::File, eMechanism, eCharset,
533 FSysStyle::Detect);
534 if (!aUri.HasError())
536 rBegin = nPos;
537 rEnd = nUriEnd;
538 return
539 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
543 else if (eScheme != INetProtocol::NotValid) // 1st
545 while (rText[i++] != ':') ;
546 sal_Int32 nPrefixEnd = i;
547 sal_Int32 nUriEnd = i;
548 while (i != rEnd
549 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
550 if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
552 ++i;
553 while (i != rEnd
554 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
556 if (nUriEnd != nPrefixEnd
557 && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
558 || rText[nUriEnd] == '\\'))
560 INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
561 INetProtocol::Http, eMechanism,
562 eCharset);
563 if (!aUri.HasError())
565 rBegin = nPos;
566 rEnd = nUriEnd;
567 return
568 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
573 // 3rd, 4th:
574 i = nPos;
575 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
576 if (nLabels >= 3
577 && rText[nPos + 3] == '.'
578 && (((rText[nPos] == 'w'
579 || rText[nPos] == 'W')
580 && (rText[nPos + 1] == 'w'
581 || rText[nPos + 1] == 'W')
582 && (rText[nPos + 2] == 'w'
583 || rText[nPos + 2] == 'W'))
584 || ((rText[nPos] == 'f'
585 || rText[nPos] == 'F')
586 && (rText[nPos + 1] == 't'
587 || rText[nPos + 1] == 'T')
588 && (rText[nPos + 2] == 'p'
589 || rText[nPos + 2] == 'P'))))
590 // (note that rText.GetChar(nPos + 3) is guaranteed to be
591 // valid)
593 sal_Int32 nUriEnd = i;
594 if (i != rEnd && rText[i] == '/')
596 nUriEnd = ++i;
597 while (i != rEnd
598 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
600 if (i != rEnd && rText[i] == '#')
602 ++i;
603 while (i != rEnd
604 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
606 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
607 || rText[nUriEnd] == '\\')
609 INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
610 INetProtocol::Http, eMechanism,
611 eCharset);
612 if (!aUri.HasError())
614 rBegin = nPos;
615 rEnd = nUriEnd;
616 return
617 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
622 if (rEnd - nPos >= 3
623 && rText[nPos + 1] == ':'
624 && (rText[nPos + 2] == '/'
625 || rText[nPos + 2] == '\\')) // 7th, 8th
627 i = nPos + 3;
628 sal_Int32 nUriEnd = i;
629 while (i != rEnd
630 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
631 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
633 INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
634 INetProtocol::File,
635 INetURLObject::EncodeMechanism::All,
636 RTL_TEXTENCODING_UTF8,
637 FSysStyle::Dos);
638 if (!aUri.HasError())
640 rBegin = nPos;
641 rEnd = nUriEnd;
642 return
643 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
648 else if (rEnd - nPos >= 2
649 && rText[nPos] == '\\'
650 && rText[nPos + 1] == '\\') // 6th
652 sal_Int32 i = nPos + 2;
653 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
654 if (nLabels >= 1 && i != rEnd && rText[i] == '\\')
656 sal_Int32 nUriEnd = ++i;
657 while (i != rEnd
658 && checkWChar(rCharClass, rText, &i, &nUriEnd,
659 true)) ;
660 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
662 INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
663 INetProtocol::File,
664 INetURLObject::EncodeMechanism::All,
665 RTL_TEXTENCODING_UTF8,
666 FSysStyle::Dos);
667 if (!aUri.HasError())
669 rBegin = nPos;
670 rEnd = nUriEnd;
671 return
672 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
678 if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
680 bool bDot = false;
681 for (sal_Int32 i = nPos + 1; i != rEnd; ++i)
683 sal_Unicode c2 = rText[i];
684 if (INetMIME::isAtomChar(c2))
685 bDot = false;
686 else if (bDot)
687 break;
688 else if (c2 == '.')
689 bDot = true;
690 else
692 if (c2 == '@')
694 ++i;
695 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
696 if (nLabels >= 1
697 && isBoundary1(rCharClass, rText, i, rEnd))
699 INetURLObject aUri(rText.copy(nPos, i - nPos),
700 INetProtocol::Mailto,
701 INetURLObject::EncodeMechanism::All);
702 if (!aUri.HasError())
704 rBegin = nPos;
705 rEnd = i;
706 return aUri.GetMainURL(
707 INetURLObject::DecodeMechanism::ToIUri);
711 break;
715 bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
716 bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
718 rBegin = rEnd;
719 return OUString();
722 OUString URIHelper::removePassword(OUString const & rURI,
723 INetURLObject::EncodeMechanism eEncodeMechanism,
724 INetURLObject::DecodeMechanism eDecodeMechanism,
725 rtl_TextEncoding eCharset)
727 INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
728 return aObj.HasError() ?
729 rURI :
730 aObj.GetURLNoPass(eDecodeMechanism, eCharset);
733 OUString URIHelper::resolveIdnaHost(OUString const & url) {
734 css::uno::Reference<css::uri::XUriReference> uri(
735 css::uri::UriReferenceFactory::create(
736 comphelper::getProcessComponentContext())
737 ->parse(url));
738 if (!(uri.is() && uri->hasAuthority())) {
739 return url;
741 auto auth(uri->getAuthority());
742 if (auth.isEmpty())
743 return url;
744 sal_Int32 hostStart = auth.indexOf('@') + 1;
745 sal_Int32 hostEnd = auth.getLength();
746 while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd - 1])) {
747 --hostEnd;
749 if (hostEnd > hostStart && auth[hostEnd - 1] == ':') {
750 --hostEnd;
751 } else {
752 hostEnd = auth.getLength();
754 auto asciiOnly = true;
755 for (auto i = hostStart; i != hostEnd; ++i) {
756 if (!rtl::isAscii(auth[i])) {
757 asciiOnly = false;
758 break;
761 if (asciiOnly) {
762 // Avoid icu::IDNA case normalization in purely non-IDNA domain names:
763 return url;
765 UErrorCode e = U_ZERO_ERROR;
766 std::unique_ptr<icu::IDNA> idna(
767 icu::IDNA::createUTS46Instance(
768 (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ
769 #if U_ICU_VERSION_MAJOR_NUM >= 49
770 | UIDNA_CHECK_CONTEXTO
771 #endif
773 e));
774 if (U_FAILURE(e)) {
775 SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
776 return url;
778 icu::UnicodeString ascii;
779 icu::IDNAInfo info;
780 idna->nameToASCII(
781 icu::UnicodeString(
782 reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
783 hostEnd - hostStart),
784 ascii, info, e);
785 if (U_FAILURE(e) || info.hasErrors()) {
786 return url;
788 OUStringBuffer buf(uri->getScheme());
789 buf.append("://").append(std::u16string_view(auth).substr(0, hostStart));
790 buf.append(
791 reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
792 ascii.length());
793 buf.append(std::u16string_view(auth).substr(hostEnd)).append(uri->getPath());
794 if (uri->hasQuery()) {
795 buf.append('?').append(uri->getQuery());
797 if (uri->hasFragment()) {
798 buf.append('#').append(uri->getFragment());
800 return buf.makeStringAndClear();
803 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */