1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <rtl/ustrbuf.hxx>
24 #include <rtl/textenc.h>
25 #include <sal/types.h>
26 #include <o3tl/typed_flags_set.hxx>
29 #include <string_view>
33 namespace com::sun::star::util
{
37 namespace com::sun::star::uno
{ template <typename
> class Reference
; }
39 // Common URL prefixes for various schemes:
40 inline constexpr OUStringLiteral INET_FTP_SCHEME
= u
"ftp://";
41 inline constexpr OUStringLiteral INET_HTTP_SCHEME
= u
"http://";
42 inline constexpr OUStringLiteral INET_HTTPS_SCHEME
= u
"https://";
43 inline constexpr OUStringLiteral INET_FILE_SCHEME
= u
"file://";
44 inline constexpr OUStringLiteral INET_MAILTO_SCHEME
= u
"mailto:";
45 inline constexpr OUStringLiteral INET_HID_SCHEME
= u
"hid:";
47 #define URL_PREFIX_PRIV_SOFFICE "private:"
50 enum class INetProtocol
84 /** The supported notations for file system paths.
88 /** VOS notation (e.g., "//server/dir/file").
92 /** Unix notation (e.g., "/dir/file").
96 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
100 /** Detect the used notation.
102 @descr For the following descriptions, please note that
103 whereas FSYS_DEFAULT includes all style bits, combinations of only
104 a few style bits are also possible, and are also described.
106 @descr When used to translate a file system path to a file URL,
107 the subset of the following productions for which the appropriate
108 style bit is set are checked in order (using the conventions of
109 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
112 Production T1 (VOS local; FSysStyle::Vos only):
117 Production T2 (VOS host; FSysStyle::Vos only):
118 "//" [host] ["/" *UCS4]
120 "file://" host "/" *UCS4
122 Production T3 (UNC; FSysStyle::Dos only):
123 "\\" [host] ["\" *UCS4]
125 "file://" host "/" *UCS4
126 replacing "\" by "/" within <*UCS4>
128 Production T4 (Unix-like DOS; FSysStyle::Dos only):
129 ALPHA ":" ["/" *UCS4]
131 "file:///" ALPHA ":/" *UCS4
132 replacing "\" by "/" within <*UCS4>
134 Production T5 (DOS; FSysStyle::Dos only):
135 ALPHA ":" ["\" *UCS4]
137 "file:///" ALPHA ":/" *UCS4
138 replacing "\" by "/" within <*UCS4>
144 replacing the delimiter by "/" within <*UCS4>. The delimiter is
145 that character from the set { "/", "\" } which appears most
146 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
147 is removed from the set; if FSysStyle::Dos is not among the style
148 bits, "\" is removed from the set). If two or more
149 characters appear the same number of times, the character
150 mentioned first in that set is chosen. If the first character
151 of <*UCS4> is the delimiter, that character is not copied.
153 @descr When used to translate a file URL to a file system path,
154 the following productions are checked in order (using the
155 conventions of RFC 2234, RFC 2396, and RFC 2732):
157 Production F1 (VOS; FSysStyle::Vos):
158 "file://" host "/" fpath ["#" fragment]
162 Production F2 (DOS; FSysStyle::Dos):
163 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
165 ALPHA ":" ["\" fpath]
166 replacing "/" by "\" in <fpath>
168 Production F3 (Unix; FSysStyle::Unix):
169 "file:///" fpath ["#" fragment]
173 Detect
= Vos
| Unix
| Dos
176 template<> struct typed_flags
<FSysStyle
> : is_typed_flags
<FSysStyle
, 0x07> {};
179 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
182 // Get- and Set-Methods:
184 /** The way input strings that represent (parts of) URIs are interpreted
187 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
188 sequences of escape sequences, representing the UTF-8 coded characters.
190 @descr Along with an EncodeMechanism parameter, the set-methods all
191 take an rtl_TextEncoding parameter, which is ignored unless the
192 EncodeMechanism is EncodeMechanism::WasEncoded.
194 enum class EncodeMechanism
196 /** All escape sequences that are already present are ignored, and are
197 interpreted as literal sequences of three characters.
201 /** Sequences of escape sequences, that represent characters from the
202 specified character set and that can be converted to UTF-32
203 characters, are first decoded. If they have to be encoded, they
204 are converted to UTF-8 characters and are than translated into
205 (sequences of) escape sequences. Other escape sequences are
206 copied verbatim (but using upper case hex digits).
210 /** All escape sequences that are already present are copied verbatim
211 (but using upper case hex digits).
216 /** The way strings that represent (parts of) URIs are returned from get-
219 @descr Along with a DecodeMechanism parameter, the get-methods all
220 take an rtl_TextEncoding parameter, which is ignored unless the
221 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
223 enum class DecodeMechanism
225 /** The (part of the) URI is returned unchanged. Since URIs are
226 written using a subset of US-ASCII, the returned string is
227 guaranteed to contain only US-ASCII characters.
231 /** All sequences of escape sequences that represent UTF-8 coded
232 UTF-32 characters with a numerical value greater than 0x7F, are
233 replaced by the respective UTF-16 characters. All other escape
234 sequences are not decoded.
238 /** All (sequences of) escape sequences that represent characters from
239 the specified character set, and that can be converted to UTF-32,
240 are replaced by the respective UTF-16 characters. All other
241 escape sequences are not decoded.
245 /** All (sequences of) escape sequences that represent characters from
246 the specified character set, that can be converted to UTF-32, and
247 that (in the case of ASCII characters) can safely be decoded
248 without altering the meaning of the (part of the) URI, are
249 replaced by the respective UTF-16 characters. All other escape
250 sequences are not decoded.
255 // General Structure:
258 m_aAbsURIRef(256), m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
) {}
260 bool HasError() const { return m_eScheme
== INetProtocol::NotValid
; }
262 OUString
GetMainURL(DecodeMechanism eMechanism
,
263 rtl_TextEncoding eCharset
264 = RTL_TEXTENCODING_UTF8
) const
265 { return decode(m_aAbsURIRef
, eMechanism
, eCharset
); }
267 OUString
GetURLNoPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
268 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
271 OUString
GetURLNoMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
272 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
276 getAbbreviated(css::uno::Reference
< css::util::XStringWidth
> const & rStringWidth
,
278 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
279 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
282 bool operator ==(INetURLObject
const & rObject
) const;
284 bool operator !=(INetURLObject
const & rObject
) const
285 { return !(*this == rObject
); }
289 inline explicit INetURLObject(
290 std::u16string_view rTheAbsURIRef
,
291 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
292 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
294 inline bool SetURL(std::u16string_view rTheAbsURIRef
,
295 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
296 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
298 bool ConcatData(INetProtocol eTheScheme
, std::u16string_view rTheUser
,
299 std::u16string_view rThePassword
,
300 std::u16string_view rTheHost
, sal_uInt32 nThePort
,
301 std::u16string_view rThePath
);
305 inline INetURLObject(std::u16string_view rTheAbsURIRef
,
306 INetProtocol eTheSmartScheme
,
307 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
308 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
309 FSysStyle eStyle
= FSysStyle::Detect
);
311 void SetSmartProtocol(INetProtocol eTheSmartScheme
)
312 { m_eSmartScheme
= eTheSmartScheme
; }
315 SetSmartURL(std::u16string_view rTheAbsURIRef
,
316 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
317 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
318 FSysStyle eStyle
= FSysStyle::Detect
);
321 smartRel2Abs(OUString
const & rTheRelURIRef
,
323 bool bIgnoreFragment
= false,
324 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
325 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
326 bool bRelativeNonURIs
= false,
327 FSysStyle eStyle
= FSysStyle::Detect
) const;
332 GetNewAbsURL(OUString
const & rTheRelURIRef
,
333 INetURLObject
* pTheAbsURIRef
)
336 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
337 (because of syntactic reasons), either rTheRelURIRef or an empty
338 string is returned: If all of the parameters eEncodeMechanism,
339 eDecodeMechanism and eCharset have their respective default values,
340 then rTheRelURIRef is returned unmodified; otherwise, an empty string
344 GetAbsURL(std::u16string_view rTheBaseURIRef
,
345 OUString
const & rTheRelURIRef
,
346 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
347 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
348 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
350 static inline OUString
351 GetRelURL(std::u16string_view rTheBaseURIRef
,
352 OUString
const & rTheAbsURIRef
,
353 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
354 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
355 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
356 FSysStyle eStyle
= FSysStyle::Detect
);
360 OUString
getExternalURL() const;
362 static inline bool translateToExternal(std::u16string_view rTheIntURIRef
,
363 OUString
& rTheExtURIRef
,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri
,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8
);
369 static inline bool translateToInternal(std::u16string_view rTheExtURIRef
,
370 OUString
& rTheIntURIRef
,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri
,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8
);
380 INetProtocol
GetProtocol() const { return m_eScheme
; }
382 bool isSchemeEqualTo(INetProtocol scheme
) const { return scheme
== m_eScheme
; }
384 bool isSchemeEqualTo(std::u16string_view scheme
) const;
386 /** Check if the scheme is one of the WebDAV scheme
389 * @return true is one other scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static OUString
GetScheme(INetProtocol eTheScheme
);
401 /** Return the human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static const OUString
& GetSchemeName(INetProtocol eTheScheme
);
409 static INetProtocol
CompareProtocolScheme(std::u16string_view aTheAbsURIRef
);
413 bool HasUserData() const { return m_aUser
.isPresent(); }
415 OUString
GetUser(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
416 rtl_TextEncoding eCharset
417 = RTL_TEXTENCODING_UTF8
) const
418 { return decode(m_aUser
, eMechanism
, eCharset
); }
420 OUString
GetPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
421 rtl_TextEncoding eCharset
422 = RTL_TEXTENCODING_UTF8
) const
423 { return decode(m_aAuth
, eMechanism
, eCharset
); }
425 bool SetUser(std::u16string_view rTheUser
)
426 { return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
); }
428 inline bool SetPass(std::u16string_view rThePassword
);
430 inline bool SetUserAndPass(std::u16string_view rTheUser
,
431 std::u16string_view rThePassword
);
435 bool HasPort() const { return m_aPort
.isPresent(); }
437 OUString
GetHost(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
438 rtl_TextEncoding eCharset
439 = RTL_TEXTENCODING_UTF8
) const
440 { return decode(m_aHost
, eMechanism
, eCharset
); }
442 OUString
GetHostPort(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
443 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
) const;
445 sal_uInt32
GetPort() const;
447 bool SetHost(std::u16string_view rTheHost
)
448 { return setHost(rTheHost
, RTL_TEXTENCODING_UTF8
); }
450 bool SetPort(sal_uInt32 nThePort
);
454 bool HasURLPath() const { return !m_aPath
.isEmpty(); }
456 OUString
GetURLPath(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
457 rtl_TextEncoding eCharset
458 = RTL_TEXTENCODING_UTF8
) const
459 { return decode(m_aPath
, eMechanism
, eCharset
); }
461 bool SetURLPath(std::u16string_view rThePath
,
462 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
463 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
464 { return setPath(rThePath
, eMechanism
, eCharset
); }
466 // Hierarchical Path:
468 /** A constant to address the last segment in various methods dealing with
471 @descr It is often more efficient to address the last segment using
472 this constant, than to determine its ordinal value using
475 enum { LAST_SEGMENT
= -1 };
477 /** The number of segments in the hierarchical path.
479 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
482 hierarchical-path = 1*("/" segment)
484 segment = name *(";" param)
486 name = [base ["." extension]]
490 extension = *<any pchar except ".">
494 @param bIgnoreFinalSlash If true, a final slash at the end of the
495 hierarchical path does not denote an empty segment, but is ignored.
497 @return The number of segments in the hierarchical path. If the path
498 is not hierarchical, 0 is returned.
500 sal_Int32
getSegmentCount(bool bIgnoreFinalSlash
= true) const;
502 /** Remove a segment from the hierarchical path.
504 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
505 if addressing the last segment.
507 @param bIgnoreFinalSlash If true, a final slash at the end of the
508 hierarchical path does not denote an empty segment, but is ignored.
510 @return True if the segment has successfully been removed (and the
511 resulting URI is still valid). If the path is not hierarchical, or
512 the specified segment does not exist, false is returned. If false is
513 returned, the object is not modified.
515 bool removeSegment(sal_Int32 nIndex
= LAST_SEGMENT
,
516 bool bIgnoreFinalSlash
= true);
518 /** Insert a new segment into the hierarchical path.
519 A final slash at the end of the
520 hierarchical path does not denote an empty segment, but is ignored.
522 @param rTheName The name part of the new segment. The new segment
523 will contain no parameters.
525 @param bAppendFinalSlash If the new segment is appended at the end of
526 the hierarchical path, this parameter specifies whether to add a final
527 slash after it or not.
529 @param nIndex The non-negative index of the segment before which
530 to insert the new segment. LAST_SEGMENT or an nIndex that equals
531 getSegmentCount() inserts the new segment at the end of the
534 @param eMechanism See the general discussion for set-methods.
536 @param eCharset See the general discussion for set-methods.
538 @return True if the segment has successfully been inserted (and the
539 resulting URI is still valid). If the path is not hierarchical, or
540 the specified place to insert the new segment does not exist, false is
541 returned. If false is returned, the object is not modified.
543 bool insertName(std::u16string_view rTheName
,
544 bool bAppendFinalSlash
= false,
545 sal_Int32 nIndex
= LAST_SEGMENT
,
546 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
547 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
549 /** Get the name of a segment of the hierarchical path.
551 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
552 if addressing the last segment.
554 @param bIgnoreFinalSlash If true, a final slash at the end of the
555 hierarchical path does not denote an empty segment, but is ignored.
557 @param eMechanism See the general discussion for get-methods.
559 @param eCharset See the general discussion for get-methods.
561 @return The name part of the specified segment. If the path is not
562 hierarchical, or the specified segment does not exits, an empty string
565 OUString
getName(sal_Int32 nIndex
= LAST_SEGMENT
,
566 bool bIgnoreFinalSlash
= true,
567 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
568 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
571 /** Set the name of the last segment (preserving any parameters and any query or
574 @param rTheName The new name.
576 @param eMechanism See the general discussion for get-methods.
578 @param eCharset See the general discussion for get-methods.
580 @return True if the name has successfully been modified (and the
581 resulting URI is still valid). If the path is not hierarchical, or
582 a last segment does not exist, false is returned. If false is
583 returned, the object is not modified.
585 bool setName(std::u16string_view rTheName
,
586 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
587 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
589 /** Get the base of the name of a segment.
591 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
592 if addressing the last segment.
594 @param bIgnoreFinalSlash If true, a final slash at the end of the
595 hierarchical path does not denote an empty segment, but is ignored.
597 @param eMechanism See the general discussion for get-methods.
599 @param eCharset See the general discussion for get-methods.
601 @return The base part of the specified segment. If the path is
602 not hierarchical, or the specified segment does not exits, an empty
605 OUString
getBase(sal_Int32 nIndex
= LAST_SEGMENT
,
606 bool bIgnoreFinalSlash
= true,
607 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
608 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
611 /** Set the base of the name of a segment (preserving the extension).
612 A final slash at the end of the
613 hierarchical path does not denote an empty segment, but is ignored.
615 @param rTheBase The new base.
617 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
618 if addressing the last segment.
620 @param eMechanism See the general discussion for set-methods.
622 @param eCharset See the general discussion for set-methods.
624 @return True if the base has successfully been modified (and the
625 resulting URI is still valid). If the path is not hierarchical, or
626 the specified segment does not exist, false is returned. If false is
627 returned, the object is not modified.
629 bool setBase(std::u16string_view rTheBase
,
630 sal_Int32 nIndex
= LAST_SEGMENT
,
631 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
632 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
634 /** Determine whether the name of the last segment has an extension.
636 @return True if the name of the specified segment has an extension.
637 If the path is not hierarchical, or the specified segment does not
638 exist, false is returned.
640 bool hasExtension() const;
642 /** Get the extension of the name of a segment.
644 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
645 if addressing the last segment.
647 @param bIgnoreFinalSlash If true, a final slash at the end of the
648 hierarchical path does not denote an empty segment, but is ignored.
650 @param eMechanism See the general discussion for get-methods.
652 @param eCharset See the general discussion for get-methods.
654 @return The extension part of the specified segment. If the path is
655 not hierarchical, or the specified segment does not exits, an empty
658 OUString
getExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
659 bool bIgnoreFinalSlash
= true,
660 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
661 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
664 /** Set the extension of the name of a segment (replacing an already
667 @param rTheExtension The new extension.
669 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
670 if addressing the last segment.
672 @param bIgnoreFinalSlash If true, a final slash at the end of the
673 hierarchical path does not denote an empty segment, but is ignored.
675 @param eCharset See the general discussion for set-methods.
677 @return True if the extension has successfully been modified (and the
678 resulting URI is still valid). If the path is not hierarchical, or
679 the specified segment does not exist, false is returned. If false is
680 returned, the object is not modified.
682 bool setExtension(std::u16string_view rTheExtension
,
683 sal_Int32 nIndex
= LAST_SEGMENT
,
684 bool bIgnoreFinalSlash
= true,
685 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
687 /** Remove the extension of the name of a segment.
689 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
690 if addressing the last segment.
692 @param bIgnoreFinalSlash If true, a final slash at the end of the
693 hierarchical path does not denote an empty segment, but is ignored.
695 @return True if the extension has successfully been removed (and the
696 resulting URI is still valid), or if the name did not have an
697 extension. If the path is not hierarchical, or the specified segment
698 does not exist, false is returned. If false is returned, the object
701 bool removeExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
702 bool bIgnoreFinalSlash
= true);
704 /** Determine whether the hierarchical path ends in a final slash.
706 @return True if the hierarchical path ends in a final slash. If the
707 path is not hierarchical, false is returned.
709 bool hasFinalSlash() const;
711 /** Make the hierarchical path end in a final slash (if it does not
714 @return True if a final slash has successfully been appended (and the
715 resulting URI is still valid), or if the hierarchical path already
716 ended in a final slash. If the path is not hierarchical, false is
717 returned. If false is returned, the object is not modified.
719 bool setFinalSlash();
721 /** Remove a final slash from the hierarchical path.
723 @return True if a final slash has successfully been removed (and the
724 resulting URI is still valid), or if the hierarchical path already did
725 not end in a final slash. If the path is not hierarchical, false is
726 returned. If false is returned, the object is not modified.
728 bool removeFinalSlash();
732 bool HasParam() const { return m_aQuery
.isPresent(); }
734 OUString
GetParam(rtl_TextEncoding eCharset
735 = RTL_TEXTENCODING_UTF8
) const
736 { return decode(m_aQuery
, DecodeMechanism::NONE
, eCharset
); }
738 inline bool SetParam(std::u16string_view rTheQuery
,
739 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
740 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
744 bool HasMark() const { return m_aFragment
.isPresent(); }
746 OUString
GetMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
747 rtl_TextEncoding eCharset
748 = RTL_TEXTENCODING_UTF8
) const
749 { return decode(m_aFragment
, eMechanism
, eCharset
); }
751 inline bool SetMark(std::u16string_view rTheFragment
,
752 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
753 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
757 /** Return the file system path represented by a file URL (ignoring any
760 @param eStyle The notation of the returned file system path.
762 @param pDelimiter Upon successful return, this parameter can return
763 the character that is the 'main' delimiter within the returned file
764 system path (e.g., "/" for Unix, "\" for DOS). This is
765 especially useful for routines that later try to shorten the returned
766 file system path at a 'good' position, e.g. to fit it into some
767 limited display space.
769 @return The file system path represented by this file URL. If this
770 file URL does not represent a file system path according to the
771 specified notation, or if this is not a file URL at all, an empty
774 OUString
getFSysPath(FSysStyle eStyle
, sal_Unicode
* pDelimiter
= nullptr)
778 std::unique_ptr
<SvMemoryStream
> getData() const;
784 PART_USER_PASSWORD
= 0x00001,
785 PART_FPATH
= 0x00008,
786 PART_AUTHORITY
= 0x00010,
787 PART_REL_SEGMENT_EXTRA
= 0x00020,
789 PART_HTTP_PATH
= 0x00080,
790 PART_MESSAGE_ID_PATH
= 0x00100,
791 PART_MAILTO
= 0x00200,
792 PART_PATH_BEFORE_QUERY
= 0x00400,
793 PART_PCHAR
= 0x00800,
794 PART_VISIBLE
= 0x01000,
795 PART_VISIBLE_NONSPECIAL
= 0x02000,
796 PART_UNO_PARAM_VALUE
= 0x04000,
797 PART_UNAMBIGUOUS
= 0x08000,
798 PART_URIC_NO_SLASH
= 0x10000,
799 PART_HTTP_QUERY
= 0x20000, //TODO! unused?
802 enum class EscapeType
809 /** Encode some text as part of a URI.
811 @param rText Some text (for its interpretation, see the general
812 discussion for set-methods).
814 @param ePart The part says which characters are 'forbidden' and must
815 be encoded (replaced by escape sequences). Characters outside the US-
816 ASCII range are always 'forbidden.'
818 @param eMechanism See the general discussion for set-methods.
820 @param eCharset See the general discussion for set-methods.
822 @return The text, encoded according to the given mechanism and
823 charset ('forbidden' characters replaced by escape sequences).
825 static OUString
encode( std::u16string_view rText
, Part ePart
,
826 EncodeMechanism eMechanism
,
827 rtl_TextEncoding eCharset
828 = RTL_TEXTENCODING_UTF8
);
831 /** Decode some text.
833 @param rText Some (encoded) text.
835 @param eMechanism See the general discussion for get-methods.
837 @param eCharset See the general discussion for get-methods.
839 @return The text, decoded according to the given mechanism and
840 charset (escape sequences replaced by 'raw' characters).
842 static inline OUString
decode(std::u16string_view rText
,
843 DecodeMechanism eMechanism
,
844 rtl_TextEncoding eCharset
845 = RTL_TEXTENCODING_UTF8
);
847 static void appendUCS4Escape(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
);
849 static void appendUCS4(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
,
850 EscapeType eEscapeType
, Part ePart
,
851 rtl_TextEncoding eCharset
, bool bKeepVisibleEscapes
);
853 static sal_uInt32
getUTF32(sal_Unicode
const *& rBegin
,
854 sal_Unicode
const * pEnd
,
855 EncodeMechanism eMechanism
,
856 rtl_TextEncoding eCharset
,
857 EscapeType
& rEscapeType
);
859 // Specialized helpers:
861 static sal_uInt32
scanDomain(sal_Unicode
const *& rBegin
,
862 sal_Unicode
const * pEnd
,
865 // OBSOLETE Hierarchical Path:
867 OUString
GetPartBeforeLastName() const;
869 /** Get the last segment in the path.
871 @param eMechanism See the general discussion for get-methods.
873 @param eCharset See the general discussion for get-methods.
875 @return For a hierarchical URL, the last segment (everything after
876 the last unencoded '/'). Note that this last segment may be empty. If
877 the URL is not hierarchical, an empty string is returned.
879 OUString
GetLastName(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
880 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
883 /** Get the 'extension' of the last segment in the path.
885 @return For a hierarchical URL, everything after the first unencoded
886 '.' in the last segment of the path. Note that this 'extension' may
887 be empty. If the URL is not hierarchical, or if the last segment does
888 not contain an unencoded '.', an empty string is returned.
890 OUString
GetFileExtension() const;
892 bool Append(std::u16string_view rTheSegment
,
893 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
894 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
898 // OBSOLETE File URLs:
900 OUString
PathToFileName() const;
902 OUString
GetFull() const;
904 OUString
GetPath() const;
906 void SetBase(std::u16string_view rTheBase
);
908 OUString
GetBase() const;
910 void SetExtension(std::u16string_view rTheExtension
);
912 OUString
CutExtension();
914 static bool IsCaseSensitive() { return true; }
916 void changeScheme(INetProtocol eTargetScheme
);
918 // INetProtocol::Macro, INetProtocol::Uno, INetProtocol::Slot,
919 // vnd.sun.star.script, etc. All the types of URLs which shouldn't
920 // be accepted from an outside controlled source
921 bool IsExoticProtocol() const;
924 // General Structure:
926 class SAL_DLLPRIVATE SubString
932 explicit SubString(sal_Int32 nTheBegin
= -1,
933 sal_Int32 nTheLength
= 0):
934 m_nBegin(nTheBegin
), m_nLength(nTheLength
) {}
936 bool isPresent() const { return m_nBegin
!= -1; }
938 bool isEmpty() const { return m_nLength
== 0; }
940 sal_Int32
getBegin() const { return m_nBegin
; }
942 sal_Int32
getLength() const { return m_nLength
; }
944 sal_Int32
getEnd() const { return m_nBegin
+ m_nLength
; }
948 sal_Int32
set(OUStringBuffer
& rString
,
949 std::u16string_view rSubString
,
950 sal_Int32 nTheBegin
);
952 sal_Int32
set(OUString
& rString
,
953 std::u16string_view rSubString
);
955 sal_Int32
set(OUStringBuffer
& rString
,
956 std::u16string_view rSubString
);
958 inline void operator +=(sal_Int32 nDelta
);
960 int compare(SubString
const & rOther
,
961 OUStringBuffer
const & rThisString
,
962 OUStringBuffer
const & rOtherString
) const;
965 OUStringBuffer m_aAbsURIRef
;
973 SubString m_aFragment
;
974 INetProtocol m_eScheme
;
975 INetProtocol m_eSmartScheme
;
977 TOOLS_DLLPRIVATE
void setInvalid();
980 std::u16string_view rTheAbsURIRef
,
981 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
, bool bSmart
,
986 bool convertRelToAbs(
987 OUString
const & rTheRelURIRef
,
988 INetURLObject
& rTheAbsURIRef
, bool & rWasAbsolute
,
989 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
990 bool bIgnoreFragment
, bool bSmart
, bool bRelativeNonURIs
,
991 FSysStyle eStyle
) const;
993 bool convertAbsToRel(
994 OUString
const & rTheAbsURIRef
,
995 OUString
& rTheRelURIRef
, EncodeMechanism eEncodeMechanism
,
996 DecodeMechanism eDecodeMechanism
, rtl_TextEncoding eCharset
,
997 FSysStyle eStyle
) const;
1001 static bool convertIntToExt(
1002 std::u16string_view rTheIntURIRef
,
1003 OUString
& rTheExtURIRef
, DecodeMechanism eDecodeMechanism
,
1004 rtl_TextEncoding eCharset
);
1006 static bool convertExtToInt(
1007 std::u16string_view rTheExtURIRef
,
1008 OUString
& rTheIntURIRef
, DecodeMechanism eDecodeMechanism
,
1009 rtl_TextEncoding eCharset
);
1015 TOOLS_DLLPRIVATE
static inline SchemeInfo
const & getSchemeInfo(
1016 INetProtocol eTheScheme
);
1018 TOOLS_DLLPRIVATE
inline SchemeInfo
const & getSchemeInfo() const;
1020 TOOLS_DLLPRIVATE
static PrefixInfo
const * getPrefix(
1021 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1025 TOOLS_DLLPRIVATE sal_Int32
getAuthorityBegin() const;
1027 TOOLS_DLLPRIVATE SubString
getAuthority() const;
1032 std::u16string_view rTheUser
,
1033 rtl_TextEncoding eCharset
);
1035 bool clearPassword();
1038 std::u16string_view rThePassword
,
1039 rtl_TextEncoding eCharset
);
1043 TOOLS_DLLPRIVATE
static bool parseHost(
1044 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
,
1045 OUStringBuffer
* pCanonic
);
1047 TOOLS_DLLPRIVATE
static bool parseHostOrNetBiosName(
1048 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1049 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1050 bool bNetBiosName
, OUStringBuffer
* pCanonic
);
1053 std::u16string_view rTheHost
,
1054 rtl_TextEncoding eCharset
);
1058 TOOLS_DLLPRIVATE
static bool parsePath(
1059 INetProtocol eScheme
, sal_Unicode
const ** pBegin
,
1060 sal_Unicode
const * pEnd
, EncodeMechanism eMechanism
,
1061 rtl_TextEncoding eCharset
, bool bSkippedInitialSlash
,
1062 sal_uInt32 nSegmentDelimiter
, sal_uInt32 nAltSegmentDelimiter
,
1063 sal_uInt32 nQueryDelimiter
, sal_uInt32 nFragmentDelimiter
,
1064 OUStringBuffer
&rSynPath
);
1067 std::u16string_view rThePath
,
1068 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1070 // Hierarchical Path:
1072 TOOLS_DLLPRIVATE
bool checkHierarchical() const;
1074 TOOLS_DLLPRIVATE SubString
getSegment(
1075 sal_Int32 nIndex
, bool bIgnoreFinalSlash
) const;
1082 std::u16string_view rTheQuery
,
1083 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1087 bool clearFragment();
1090 std::u16string_view rTheMark
,
1091 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1095 TOOLS_DLLPRIVATE
bool hasDosVolume(FSysStyle eStyle
) const;
1099 TOOLS_DLLPRIVATE
static inline void appendEscape(
1100 OUStringBuffer
& rTheText
, sal_uInt32 nOctet
);
1102 static void encodeText(
1103 OUStringBuffer
& rOutputBuffer
,
1104 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1105 Part ePart
, EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1106 bool bKeepVisibleEscapes
);
1108 static inline void encodeText(
1109 OUStringBuffer
& rOutputBuffer
,
1110 std::u16string_view rTheText
, Part ePart
,
1111 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1112 bool bKeepVisibleEscapes
);
1114 static OUString
decode(
1115 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1116 DecodeMechanism
, rtl_TextEncoding eCharset
);
1118 inline OUString
decode(
1119 SubString
const & rSubString
,
1120 DecodeMechanism eMechanism
, rtl_TextEncoding eCharset
) const;
1122 // Specialized helpers:
1124 TOOLS_DLLPRIVATE
static bool scanIPv6reference(
1125 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1129 inline void INetURLObject::encodeText( OUStringBuffer
& rOutputBuffer
,
1130 std::u16string_view rTheText
,
1132 EncodeMechanism eMechanism
,
1133 rtl_TextEncoding eCharset
,
1134 bool bKeepVisibleEscapes
)
1136 encodeText(rOutputBuffer
,
1138 rTheText
.data() + rTheText
.size(), ePart
,
1139 eMechanism
, eCharset
, bKeepVisibleEscapes
);
1142 inline OUString
INetURLObject::decode(SubString
const & rSubString
,
1143 DecodeMechanism eMechanism
,
1144 rtl_TextEncoding eCharset
) const
1146 return rSubString
.isPresent() ?
1147 decode(m_aAbsURIRef
.getStr() + rSubString
.getBegin(),
1148 m_aAbsURIRef
.getStr() + rSubString
.getEnd(),
1149 eMechanism
, eCharset
) :
1153 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef
,
1154 EncodeMechanism eMechanism
,
1155 rtl_TextEncoding eCharset
):
1156 m_aAbsURIRef(rTheAbsURIRef
.size() * 2), m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
)
1158 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1162 inline bool INetURLObject::SetURL(std::u16string_view rTheAbsURIRef
,
1163 EncodeMechanism eMechanism
,
1164 rtl_TextEncoding eCharset
)
1166 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1170 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef
,
1171 INetProtocol eTheSmartScheme
,
1172 EncodeMechanism eMechanism
,
1173 rtl_TextEncoding eCharset
,
1175 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(eTheSmartScheme
)
1177 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true, eStyle
);
1180 inline bool INetURLObject::SetSmartURL(std::u16string_view rTheAbsURIRef
,
1181 EncodeMechanism eMechanism
,
1182 rtl_TextEncoding eCharset
,
1185 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true,
1189 inline INetURLObject
1190 INetURLObject::smartRel2Abs(OUString
const & rTheRelURIRef
,
1191 bool & rWasAbsolute
,
1192 bool bIgnoreFragment
,
1193 EncodeMechanism eMechanism
,
1194 rtl_TextEncoding eCharset
,
1195 bool bRelativeNonURIs
,
1196 FSysStyle eStyle
) const
1198 INetURLObject aTheAbsURIRef
;
1199 convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, rWasAbsolute
,
1200 eMechanism
, eCharset
, bIgnoreFragment
, true,
1201 bRelativeNonURIs
, eStyle
);
1202 return aTheAbsURIRef
;
1205 inline bool INetURLObject::GetNewAbsURL(OUString
const & rTheRelURIRef
,
1206 INetURLObject
* pTheAbsURIRef
)
1209 INetURLObject aTheAbsURIRef
;
1211 if (!convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, bWasAbsolute
,
1212 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false/*bIgnoreFragment*/, false, false,
1216 *pTheAbsURIRef
= aTheAbsURIRef
;
1221 inline OUString
INetURLObject::GetRelURL(std::u16string_view rTheBaseURIRef
,
1222 OUString
const & rTheAbsURIRef
,
1223 EncodeMechanism eEncodeMechanism
,
1224 DecodeMechanism eDecodeMechanism
,
1225 rtl_TextEncoding eCharset
,
1228 OUString aTheRelURIRef
;
1229 INetURLObject(rTheBaseURIRef
, eEncodeMechanism
, eCharset
).
1230 convertAbsToRel(rTheAbsURIRef
, aTheRelURIRef
, eEncodeMechanism
,
1231 eDecodeMechanism
, eCharset
, eStyle
);
1232 return aTheRelURIRef
;
1236 inline bool INetURLObject::translateToExternal(std::u16string_view
1238 OUString
& rTheExtURIRef
,
1241 rtl_TextEncoding eCharset
)
1243 return convertIntToExt(rTheIntURIRef
, rTheExtURIRef
,
1244 eDecodeMechanism
, eCharset
);
1248 inline bool INetURLObject::translateToInternal(std::u16string_view
1250 OUString
& rTheIntURIRef
,
1253 rtl_TextEncoding eCharset
)
1255 return convertExtToInt(rTheExtURIRef
, rTheIntURIRef
,
1256 eDecodeMechanism
, eCharset
);
1259 inline bool INetURLObject::SetPass(std::u16string_view rThePassword
)
1261 return rThePassword
.empty() ?
1263 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
);
1266 inline bool INetURLObject::SetUserAndPass(std::u16string_view rTheUser
,
1267 std::u16string_view rThePassword
)
1269 return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
)
1270 && (rThePassword
.empty() ?
1272 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
));
1275 inline bool INetURLObject::SetParam(std::u16string_view rTheQuery
,
1276 EncodeMechanism eMechanism
,
1277 rtl_TextEncoding eCharset
)
1279 if (rTheQuery
.empty())
1284 return setQuery(rTheQuery
, eMechanism
, eCharset
);
1287 inline bool INetURLObject::SetMark(std::u16string_view rTheFragment
,
1288 EncodeMechanism eMechanism
,
1289 rtl_TextEncoding eCharset
)
1291 return rTheFragment
.empty() ?
1293 setFragment(rTheFragment
, eMechanism
, eCharset
);
1297 inline OUString
INetURLObject::encode(std::u16string_view rText
, Part ePart
,
1298 EncodeMechanism eMechanism
,
1299 rtl_TextEncoding eCharset
)
1301 OUStringBuffer aBuf
;
1302 encodeText(aBuf
, rText
, ePart
, eMechanism
, eCharset
, false);
1303 return aBuf
.makeStringAndClear();
1307 inline OUString
INetURLObject::decode(std::u16string_view rText
,
1308 DecodeMechanism eMechanism
,
1309 rtl_TextEncoding eCharset
)
1311 return decode(rText
.data(), rText
.data() + rText
.size(),
1312 eMechanism
, eCharset
);
1317 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */