tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / svl / source / misc / urihelper.cxx
blobf64390d47d495cbfabf17d8ba0d296074afbe1f1
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <string_view>
23 #include <sal/config.h>
25 #include <unicode/idna.h>
27 #include <svl/urihelper.hxx>
28 #include <com/sun/star/ucb/Command.hpp>
29 #include <com/sun/star/ucb/IllegalIdentifierException.hpp>
30 #include <com/sun/star/ucb/UniversalContentBroker.hpp>
31 #include <com/sun/star/ucb/UnsupportedCommandException.hpp>
32 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
33 #include <com/sun/star/ucb/XCommandProcessor.hpp>
34 #include <com/sun/star/ucb/XContent.hpp>
35 #include <com/sun/star/ucb/XUniversalContentBroker.hpp>
36 #include <com/sun/star/uno/Any.hxx>
37 #include <com/sun/star/uno/Exception.hpp>
38 #include <com/sun/star/uno/Reference.hxx>
39 #include <com/sun/star/uno/RuntimeException.hpp>
40 #include <com/sun/star/uno/XComponentContext.hpp>
41 #include <com/sun/star/uri/UriReferenceFactory.hpp>
42 #include <com/sun/star/uri/XUriReference.hpp>
43 #include <com/sun/star/uri/XUriReferenceFactory.hpp>
44 #include <comphelper/processfactory.hxx>
45 #include <osl/diagnose.h>
46 #include <rtl/character.hxx>
47 #include <rtl/ustrbuf.hxx>
48 #include <rtl/ustring.hxx>
49 #include <sal/types.h>
50 #include <sal/log.hxx>
51 #include <tools/inetmime.hxx>
52 #include <unotools/charclass.hxx>
54 using namespace com::sun::star;
56 OUString URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
57 OUString const & rTheRelURIRef,
58 Link<OUString *, bool> const & rMaybeFileHdl,
59 bool bCheckFileExists,
60 bool bIgnoreFragment,
61 INetURLObject::EncodeMechanism eEncodeMechanism,
62 INetURLObject::DecodeMechanism eDecodeMechanism,
63 rtl_TextEncoding eCharset,
64 FSysStyle eStyle)
66 // Backwards compatibility:
67 if( rTheRelURIRef.startsWith("#") )
68 return rTheRelURIRef;
70 INetURLObject aAbsURIRef;
71 if (rTheBaseURIRef.HasError())
72 aAbsURIRef. SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
73 else
75 bool bWasAbsolute;
76 aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
77 bWasAbsolute,
78 bIgnoreFragment,
79 eEncodeMechanism,
80 eCharset,
81 false/*bRelativeNonURIs*/,
82 eStyle);
83 if (bCheckFileExists
84 && !bWasAbsolute
85 && (aAbsURIRef.GetProtocol() == INetProtocol::File))
87 INetURLObject aNonFileURIRef;
88 aNonFileURIRef.SetSmartURL(rTheRelURIRef,
89 eEncodeMechanism,
90 eCharset,
91 eStyle);
92 if (!aNonFileURIRef.HasError()
93 && aNonFileURIRef.GetProtocol() != INetProtocol::File)
95 bool bMaybeFile = false;
96 if (rMaybeFileHdl.IsSet())
98 OUString aFilePath(rTheRelURIRef);
99 bMaybeFile = rMaybeFileHdl.Call(&aFilePath);
101 if (!bMaybeFile)
102 aAbsURIRef = std::move(aNonFileURIRef);
106 return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
109 namespace { Link<OUString *, bool> gMaybeFileHdl; }
111 void URIHelper::SetMaybeFileHdl(Link<OUString *, bool> const & rTheMaybeFileHdl)
113 gMaybeFileHdl = rTheMaybeFileHdl;
116 Link<OUString *, bool> const & URIHelper::GetMaybeFileHdl()
118 return gMaybeFileHdl;
121 namespace {
123 bool isAbsoluteHierarchicalUriReference(
124 css::uno::Reference< css::uri::XUriReference > const & uriReference)
126 return uriReference.is() && uriReference->isAbsolute()
127 && !uriReference->hasRelativePath();
130 // To improve performance, assume that if for any prefix URL of a given
131 // hierarchical URL either a UCB content cannot be created, or the UCB content
132 // does not support the getCasePreservingURL command, then this will hold for
133 // any other prefix URL of the given URL, too:
134 enum Result { Success, GeneralFailure, SpecificFailure };
136 Result normalizePrefix( css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
137 OUString const & uri, OUString * normalized)
139 assert(broker.is() && normalized != nullptr);
140 css::uno::Reference< css::ucb::XContent > content;
141 try {
142 content = broker->queryContent(broker->createContentIdentifier(uri));
143 } catch (css::ucb::IllegalIdentifierException &) {}
144 if (!content.is()) {
145 return GeneralFailure;
147 try {
148 bool ok =
149 (css::uno::Reference< css::ucb::XCommandProcessor >(
150 content, css::uno::UNO_QUERY_THROW)->execute(
151 css::ucb::Command(u"getCasePreservingURL"_ustr,
152 -1, css::uno::Any()),
154 css::uno::Reference< css::ucb::XCommandEnvironment >())
155 >>= *normalized);
156 OSL_ASSERT(ok);
157 } catch (css::uno::RuntimeException &) {
158 throw;
159 } catch (css::ucb::UnsupportedCommandException &) {
160 return GeneralFailure;
161 } catch (css::uno::Exception &) {
162 return SpecificFailure;
164 return Success;
167 OUString normalize(
168 css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
169 css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
170 OUString const & uriReference)
172 // normalizePrefix can potentially fail (a typically example being a file
173 // URL that denotes a non-existing resource); in such a case, try to
174 // normalize as long a prefix of the given URL as possible (i.e., normalize
175 // all the existing directories within the path):
176 OUString normalized;
177 sal_Int32 n = uriReference.indexOf('#');
178 normalized = n == -1 ? uriReference : uriReference.copy(0, n);
179 switch (normalizePrefix(broker, normalized, &normalized)) {
180 case Success:
181 return n == -1 ? normalized : normalized + uriReference.subView(n);
182 case GeneralFailure:
183 return uriReference;
184 case SpecificFailure:
185 default:
186 break;
188 css::uno::Reference< css::uri::XUriReference > ref(
189 uriFactory->parse(uriReference));
190 if (!isAbsoluteHierarchicalUriReference(ref)) {
191 return uriReference;
193 sal_Int32 count = ref->getPathSegmentCount();
194 if (count < 2) {
195 return uriReference;
197 OUStringBuffer head(ref->getScheme());
198 head.append(':');
199 if (ref->hasAuthority()) {
200 head.append("//" + ref->getAuthority());
202 for (sal_Int32 i = count - 1; i > 0; --i) {
203 OUStringBuffer buf(head);
204 for (sal_Int32 j = 0; j < i; ++j) {
205 buf.append('/');
206 buf.append(ref->getPathSegment(j));
208 normalized = buf.makeStringAndClear();
209 if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
211 buf.append(normalized);
212 css::uno::Reference< css::uri::XUriReference > preRef(
213 uriFactory->parse(normalized));
214 if (!isAbsoluteHierarchicalUriReference(preRef)) {
215 // This could only happen if something is inconsistent:
216 break;
218 sal_Int32 preCount = preRef->getPathSegmentCount();
219 // normalizePrefix may have added or removed a final slash:
220 if (preCount != i) {
221 if (preCount == i - 1) {
222 buf.append('/');
223 } else if (preCount - 1 == i && !buf.isEmpty()
224 && buf[buf.getLength() - 1] == '/')
226 buf.setLength(buf.getLength() - 1);
227 } else {
228 // This could only happen if something is inconsistent:
229 break;
232 for (sal_Int32 j = i; j < count; ++j) {
233 buf.append('/');
234 buf.append(ref->getPathSegment(j));
236 if (ref->hasQuery()) {
237 buf.append('?');
238 buf.append(ref->getQuery());
240 if (ref->hasFragment()) {
241 buf.append('#');
242 buf.append(ref->getFragment());
244 return buf.makeStringAndClear();
247 return uriReference;
252 css::uno::Reference< css::uri::XUriReference >
253 URIHelper::normalizedMakeRelative(
254 css::uno::Reference< css::uno::XComponentContext > const & context,
255 OUString const & baseUriReference, OUString const & uriReference)
257 OSL_ASSERT(context.is());
258 css::uno::Reference< css::ucb::XUniversalContentBroker > broker(
259 css::ucb::UniversalContentBroker::create(context));
260 css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
261 css::uri::UriReferenceFactory::create(context));
262 return uriFactory->makeRelative(
263 uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
264 uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
265 true, false);
268 OUString URIHelper::simpleNormalizedMakeRelative(
269 OUString const & baseUriReference, OUString const & uriReference)
271 css::uno::Reference< css::uri::XUriReference > rel(
272 URIHelper::normalizedMakeRelative(
273 comphelper::getProcessComponentContext(), baseUriReference,
274 uriReference));
275 return rel.is() ? rel->getUriReference() : uriReference;
279 // FindFirstURLInText
282 namespace {
284 sal_Int32 nextChar(std::u16string_view rStr, sal_Int32 nPos)
286 return rtl::isHighSurrogate(rStr[nPos])
287 && rStr.size() - nPos >= 2
288 && rtl::isLowSurrogate(rStr[nPos + 1]) ?
289 nPos + 2 : nPos + 1;
292 bool isBoundary1(CharClass const & rCharClass, OUString const & rStr,
293 sal_Int32 nPos, sal_Int32 nEnd)
295 if (nPos == nEnd)
296 return true;
297 if (rCharClass.isLetterNumeric(rStr, nPos))
298 return false;
299 switch (rStr[nPos])
301 case '$':
302 case '%':
303 case '&':
304 case '-':
305 case '/':
306 case '@':
307 case '\\':
308 return false;
309 default:
310 return true;
314 bool isBoundary2(CharClass const & rCharClass, OUString const & rStr,
315 sal_Int32 nPos, sal_Int32 nEnd)
317 if (nPos == nEnd)
318 return true;
319 if (rCharClass.isLetterNumeric(rStr, nPos))
320 return false;
321 switch (rStr[nPos])
323 case '!':
324 case '#':
325 case '$':
326 case '%':
327 case '&':
328 case '\'':
329 case '*':
330 case '+':
331 case '-':
332 case '/':
333 case '=':
334 case '?':
335 case '@':
336 case '^':
337 case '_':
338 case '`':
339 case '{':
340 case '|':
341 case '}':
342 case '~':
343 return false;
344 default:
345 return true;
349 // tdf#145381 Added MatchingBracketDepth counter to detect matching closing
350 // brackets that are part of the uri
351 bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
352 sal_Int32 * pPos, sal_Int32 * pEnd,
353 sal_Int32 * pMatchingBracketDepth = nullptr,
354 bool bBackslash = false, bool bPipe = false)
356 sal_Unicode c = rStr[*pPos];
357 if (rtl::isAscii(c))
359 static sal_uInt8 const aMap[128]
360 = { 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 0, 0, 0, 0, 0, 0, 0,
364 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
365 5, 6, 1, 1, 1, 4, 1, 4, // ()*+,-./
366 4, 4, 4, 4, 4, 4, 4, 4, // 01234567
367 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
368 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
369 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO
370 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW
371 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_
372 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg
373 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno
374 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw
375 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
376 switch (aMap[c])
378 default: // not uric
379 return false;
381 case 1: // uric
382 ++(*pPos);
383 return true;
385 case 2: // "\"
386 if (bBackslash)
388 *pEnd = ++(*pPos);
389 return true;
391 else
392 return false;
394 case 3: // "|"
395 if (bPipe)
397 *pEnd = ++(*pPos);
398 return true;
400 else
401 return false;
403 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
404 // isBoundary1)
405 *pEnd = ++(*pPos);
406 return true;
408 case 5: // opening bracket
409 ++(*pPos);
410 if(nullptr != pMatchingBracketDepth)
411 ++(*pMatchingBracketDepth);
412 return true;
414 case 6: // closing bracket
415 ++(*pPos);
416 if(nullptr != pMatchingBracketDepth && *pMatchingBracketDepth > 0)
418 --(*pMatchingBracketDepth);
419 // tdf#145381 When there was an opening bracket, detect this closing bracket
420 // as part of the uri
421 *pEnd = *pPos;
423 return true;
427 else if (rCharClass.isLetterNumeric(rStr, *pPos))
429 *pEnd = *pPos = nextChar(rStr, *pPos);
430 return true;
432 else
433 return false;
436 sal_uInt32 scanDomain(OUString const & rStr, sal_Int32 * pPos,
437 sal_Int32 nEnd)
439 sal_Unicode const * pBuffer = rStr.getStr();
440 sal_Unicode const * p = pBuffer + *pPos;
441 sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
442 *pPos = sal::static_int_cast< sal_Int32 >(p - pBuffer);
443 return nLabels;
448 OUString URIHelper::FindFirstURLInText(OUString const & rText,
449 sal_Int32 & rBegin,
450 sal_Int32 & rEnd,
451 CharClass const & rCharClass,
452 INetURLObject::EncodeMechanism eMechanism,
453 rtl_TextEncoding eCharset)
455 if (rBegin > rEnd || rEnd > rText.getLength())
456 return OUString();
458 // Search for the first substring of [rBegin..rEnd[ that matches any of the
459 // following productions (for which the appropriate style bit is set in
460 // eStyle, if applicable).
462 // 1st Production (known scheme):
463 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
464 // \B1
466 // 2nd Production (file):
467 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
469 // 3rd Production (ftp):
470 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
472 // 4th Production (http):
473 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
475 // 5th Production (mailto):
476 // \B2 local-part "@" domain \B1
478 // 6th Production (UNC file):
479 // \B1 "\\" domain "\" *(wchar / "\") \B1
481 // 7th Production (DOS file):
482 // \B1 ALPHA ":\" *(wchar / "\") \B1
484 // 8th Production (Unix-like DOS file):
485 // \B1 ALPHA ":/" *(wchar / "\") \B1
487 // The productions use the following auxiliary rules.
489 // local-part = atom *("." atom)
490 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
491 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
492 // / "~")
493 // domain = label *("." label)
494 // label = alphanum [*(alphanum / "-") alphanum]
495 // alphanum = ALPHA / DIGIT
496 // wchar = <any uric character (ignoring the escaped rule), or "%", or
497 // a letter or digit (according to rCharClass)>
499 // "\B1" (boundary 1) stands for the beginning or end of the block of text,
500 // or a character that is neither (a) a letter or digit (according to
501 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
502 // (FIXME: What was the rationale for this set of punctuation characters?)
504 // "\B2" (boundary 2) stands for the beginning or end of the block of text,
505 // or a character that is neither (a) a letter or digit (according to
506 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
507 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
508 // 822 <atom> character, or "@" from \B1's set above).
510 // Productions 1--4, and 6--8 try to find a maximum-length match, but they
511 // stop at the first <wchar> character that is a "\B1" character which is
512 // only followed by "\B1" characters (taking "\" and "|" characters into
513 // account appropriately). Production 5 simply tries to find a maximum-
514 // length match.
516 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9
517 // use EncodeMechanism::All.
519 // Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
520 // eStyle.
522 // tdf#145381: In addition to the productions I added a mechanism to detect
523 // matching brackets. The task presents the case of an url that ends on a
524 // closing bracket. This needs to be detected as part of the uri in the case
525 // that a matching opening bracket exists.
527 bool bBoundary1 = true;
528 bool bBoundary2 = true;
529 for (sal_Int32 nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
531 sal_Unicode c = rText[nPos];
532 if (bBoundary1)
534 if (rtl::isAsciiAlpha(c))
536 sal_Int32 i = nPos;
537 INetProtocol eScheme = INetURLObject::CompareProtocolScheme(rText.subView(i, rEnd - i));
538 if (eScheme == INetProtocol::File) // 2nd
540 while (rText[i++] != ':') ;
541 sal_Int32 nPrefixEnd = i;
542 sal_Int32 nUriEnd = i;
543 while (i != rEnd
544 && checkWChar(rCharClass, rText, &i, &nUriEnd, nullptr, true,
545 true)) ;
546 if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
548 ++i;
549 while (i != rEnd
550 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
552 if (nUriEnd != nPrefixEnd
553 && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
555 INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
556 INetProtocol::File, eMechanism, eCharset,
557 FSysStyle::Detect);
558 if (!aUri.HasError())
560 rBegin = nPos;
561 rEnd = nUriEnd;
562 return
563 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
567 else if (eScheme != INetProtocol::NotValid) // 1st
569 while (rText[i++] != ':') ;
570 sal_Int32 nPrefixEnd = i;
571 sal_Int32 nUriEnd = i;
572 sal_Int32 nMatchingBracketDepth = 0;
573 while (i != rEnd
574 && checkWChar(rCharClass, rText, &i, &nUriEnd,
575 &nMatchingBracketDepth)) ;
576 if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
578 ++i;
579 while (i != rEnd
580 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
582 if (nUriEnd != nPrefixEnd
583 && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
584 || rText[nUriEnd] == '\\'))
586 INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
587 INetProtocol::Http, eMechanism,
588 eCharset);
589 if (!aUri.HasError())
591 rBegin = nPos;
592 rEnd = nUriEnd;
593 return
594 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
599 // 3rd, 4th:
600 i = nPos;
601 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
602 if (nLabels >= 3
603 && rText[nPos + 3] == '.'
604 && (((rText[nPos] == 'w'
605 || rText[nPos] == 'W')
606 && (rText[nPos + 1] == 'w'
607 || rText[nPos + 1] == 'W')
608 && (rText[nPos + 2] == 'w'
609 || rText[nPos + 2] == 'W'))
610 || ((rText[nPos] == 'f'
611 || rText[nPos] == 'F')
612 && (rText[nPos + 1] == 't'
613 || rText[nPos + 1] == 'T')
614 && (rText[nPos + 2] == 'p'
615 || rText[nPos + 2] == 'P'))))
616 // (note that rText.GetChar(nPos + 3) is guaranteed to be
617 // valid)
619 sal_Int32 nUriEnd = i;
620 if (i != rEnd && rText[i] == '/')
622 nUriEnd = ++i;
623 while (i != rEnd
624 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
626 if (i != rEnd && rText[i] == '#')
628 ++i;
629 while (i != rEnd
630 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
632 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
633 || rText[nUriEnd] == '\\')
635 INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
636 INetProtocol::Http, eMechanism,
637 eCharset);
638 if (!aUri.HasError())
640 rBegin = nPos;
641 rEnd = nUriEnd;
642 return
643 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
648 if (rEnd - nPos >= 3
649 && rText[nPos + 1] == ':'
650 && (rText[nPos + 2] == '/'
651 || rText[nPos + 2] == '\\')) // 7th, 8th
653 i = nPos + 3;
654 sal_Int32 nUriEnd = i;
655 while (i != rEnd
656 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
657 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
659 INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
660 INetProtocol::File,
661 INetURLObject::EncodeMechanism::All,
662 RTL_TEXTENCODING_UTF8,
663 FSysStyle::Dos);
664 if (!aUri.HasError())
666 rBegin = nPos;
667 rEnd = nUriEnd;
668 return
669 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
674 else if (rEnd - nPos >= 2
675 && rText[nPos] == '\\'
676 && rText[nPos + 1] == '\\') // 6th
678 sal_Int32 i = nPos + 2;
679 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
680 if (nLabels >= 1 && i != rEnd && rText[i] == '\\')
682 sal_Int32 nUriEnd = ++i;
683 while (i != rEnd
684 && checkWChar(rCharClass, rText, &i, &nUriEnd,
685 nullptr, true)) ;
686 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
688 INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
689 INetProtocol::File,
690 INetURLObject::EncodeMechanism::All,
691 RTL_TEXTENCODING_UTF8,
692 FSysStyle::Dos);
693 if (!aUri.HasError())
695 rBegin = nPos;
696 rEnd = nUriEnd;
697 return
698 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
704 if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
706 bool bDot = false;
707 for (sal_Int32 i = nPos + 1; i != rEnd; ++i)
709 sal_Unicode c2 = rText[i];
710 if (INetMIME::isAtomChar(c2))
711 bDot = false;
712 else if (bDot)
713 break;
714 else if (c2 == '.')
715 bDot = true;
716 else
718 if (c2 == '@')
720 ++i;
721 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
722 if (nLabels >= 1
723 && isBoundary1(rCharClass, rText, i, rEnd))
725 INetURLObject aUri(rText.subView(nPos, i - nPos),
726 INetProtocol::Mailto,
727 INetURLObject::EncodeMechanism::All);
728 if (!aUri.HasError())
730 rBegin = nPos;
731 rEnd = i;
732 return aUri.GetMainURL(
733 INetURLObject::DecodeMechanism::ToIUri);
737 break;
741 bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
742 bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
744 rBegin = rEnd;
745 return OUString();
748 OUString URIHelper::FindFirstDOIInText(std::u16string_view rText,
749 sal_Int32 & rBegin,
750 sal_Int32 & rEnd,
751 CharClass const & rCharClass)
753 if (rBegin > rEnd || rEnd > static_cast<sal_Int32>(rText.size()))
754 return OUString();
756 sal_Int32 start = 7;
757 sal_Int32 count = rEnd-rBegin;
758 OUString candidate(rText.substr(rBegin, count));
759 // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
760 if (candidate.startsWithIgnoreAsciiCase("doi:10."))
762 bool flag = true;
763 sal_Int32 digit = 0;
764 for (sal_Int32 i=start; i<count; i++)
766 sal_Unicode c = candidate[i];
767 // Match 4 to 9 digits before slash
768 if (digit >= 0)
770 if (digit>9)
772 flag = false;
773 break;
776 if ( rCharClass.isDigit(candidate,i) )
778 digit++;
780 else if (c=='/' && digit>=4 && i<count-1)
782 digit=-1;
784 else
786 flag = false;
787 break;
790 // Match [-._;()\/:a-zA-Z0-9] after slash
791 else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || c == '-' || c=='_' ||
792 c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && i<count-1) || c==':'))
794 flag = false;
795 break;
798 if (flag && digit==-1)
800 return OUString::Concat("https://doi.org/")+candidate.subView(4);
803 rBegin = rEnd;
804 return OUString();
807 OUString URIHelper::removePassword(OUString const & rURI,
808 INetURLObject::EncodeMechanism eEncodeMechanism,
809 INetURLObject::DecodeMechanism eDecodeMechanism,
810 rtl_TextEncoding eCharset)
812 INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
813 return aObj.HasError() ?
814 rURI :
815 aObj.GetURLNoPass(eDecodeMechanism, eCharset);
818 OUString URIHelper::resolveIdnaHost(OUString const & url) {
819 css::uno::Reference<css::uri::XUriReference> uri(
820 css::uri::UriReferenceFactory::create(
821 comphelper::getProcessComponentContext())
822 ->parse(url));
823 if (!(uri.is() && uri->hasAuthority())) {
824 return url;
826 auto auth(uri->getAuthority());
827 if (auth.isEmpty())
828 return url;
829 sal_Int32 hostStart = auth.indexOf('@') + 1;
830 sal_Int32 hostEnd = auth.getLength();
831 while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd - 1])) {
832 --hostEnd;
834 if (hostEnd > hostStart && auth[hostEnd - 1] == ':') {
835 --hostEnd;
836 } else {
837 hostEnd = auth.getLength();
839 auto asciiOnly = true;
840 for (auto i = hostStart; i != hostEnd; ++i) {
841 if (!rtl::isAscii(auth[i])) {
842 asciiOnly = false;
843 break;
846 if (asciiOnly) {
847 // Avoid icu::IDNA case normalization in purely non-IDNA domain names:
848 return url;
850 UErrorCode e = U_ZERO_ERROR;
851 std::unique_ptr<icu::IDNA> idna(
852 icu::IDNA::createUTS46Instance(
853 (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_CHECK_CONTEXTO),
854 e));
855 if (U_FAILURE(e)) {
856 SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
857 return url;
859 icu::UnicodeString ascii;
860 icu::IDNAInfo info;
861 idna->nameToASCII(
862 icu::UnicodeString(
863 reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
864 hostEnd - hostStart),
865 ascii, info, e);
866 if (U_FAILURE(e) || info.hasErrors()) {
867 return url;
869 OUStringBuffer buf(uri->getScheme());
870 buf.append(OUString::Concat("://") + auth.subView(0, hostStart));
871 buf.append(
872 reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
873 ascii.length());
874 buf.append(auth.subView(hostEnd) + uri->getPath());
875 if (uri->hasQuery()) {
876 buf.append("?" + uri->getQuery());
878 if (uri->hasFragment()) {
879 buf.append("#" + uri->getFragment());
881 return buf.makeStringAndClear();
884 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */