android: Update app-specific/MIME type icons
[LibreOffice.git] / include / tools / urlobj.hxx
blobdfd658722826424271563289b2c8f377b62f6cb7
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <rtl/ustrbuf.hxx>
24 #include <rtl/textenc.h>
25 #include <sal/types.h>
26 #include <o3tl/typed_flags_set.hxx>
28 #include <memory>
29 #include <string_view>
31 class SvMemoryStream;
33 namespace com::sun::star::util {
34 class XStringWidth;
37 namespace com::sun::star::uno { template <typename > class Reference; }
39 // Common URL prefixes for various schemes:
40 inline constexpr OUStringLiteral INET_FTP_SCHEME = u"ftp://";
41 inline constexpr OUStringLiteral INET_HTTP_SCHEME = u"http://";
42 inline constexpr OUStringLiteral INET_HTTPS_SCHEME = u"https://";
43 inline constexpr OUStringLiteral INET_FILE_SCHEME = u"file://";
44 inline constexpr OUStringLiteral INET_MAILTO_SCHEME = u"mailto:";
45 inline constexpr OUStringLiteral INET_HID_SCHEME = u"hid:";
47 #define URL_PREFIX_PRIV_SOFFICE "private:"
49 // Schemes:
50 enum class INetProtocol
52 NotValid,
53 Ftp,
54 Http,
55 File,
56 Mailto,
57 VndSunStarWebdav,
58 PrivSoffice,
59 VndSunStarHelp,
60 Https,
61 Slot,
62 Macro,
63 Javascript,
64 Data,
65 Cid,
66 VndSunStarHier,
67 Uno,
68 Component,
69 VndSunStarPkg,
70 Ldap,
71 Db,
72 VndSunStarCmd,
73 Telnet,
74 VndSunStarExpand,
75 VndSunStarTdoc,
76 Generic,
77 Smb,
78 Hid,
79 Sftp,
80 Cmis,
81 LAST = Cmis
84 /** The supported notations for file system paths.
86 enum class FSysStyle
88 /** VOS notation (e.g., "//server/dir/file").
90 Vos = 0x1,
92 /** Unix notation (e.g., "/dir/file").
94 Unix = 0x2,
96 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
98 Dos = 0x4,
100 /** Detect the used notation.
102 @descr For the following descriptions, please note that
103 whereas FSYS_DEFAULT includes all style bits, combinations of only
104 a few style bits are also possible, and are also described.
106 @descr When used to translate a file system path to a file URL,
107 the subset of the following productions for which the appropriate
108 style bit is set are checked in order (using the conventions of
109 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
110 character):
112 Production T1 (VOS local; FSysStyle::Vos only):
113 "//." ["/" *UCS4]
114 becomes
115 "file:///" *UCS4
117 Production T2 (VOS host; FSysStyle::Vos only):
118 "//" [host] ["/" *UCS4]
119 becomes
120 "file://" host "/" *UCS4
122 Production T3 (UNC; FSysStyle::Dos only):
123 "\\" [host] ["\" *UCS4]
124 becomes
125 "file://" host "/" *UCS4
126 replacing "\" by "/" within <*UCS4>
128 Production T4 (Unix-like DOS; FSysStyle::Dos only):
129 ALPHA ":" ["/" *UCS4]
130 becomes
131 "file:///" ALPHA ":/" *UCS4
132 replacing "\" by "/" within <*UCS4>
134 Production T5 (DOS; FSysStyle::Dos only):
135 ALPHA ":" ["\" *UCS4]
136 becomes
137 "file:///" ALPHA ":/" *UCS4
138 replacing "\" by "/" within <*UCS4>
140 Production T6 (any):
141 *UCS4
142 becomes
143 "file:///" *UCS4
144 replacing the delimiter by "/" within <*UCS4>. The delimiter is
145 that character from the set { "/", "\" } which appears most
146 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
147 is removed from the set; if FSysStyle::Dos is not among the style
148 bits, "\" is removed from the set). If two or more
149 characters appear the same number of times, the character
150 mentioned first in that set is chosen. If the first character
151 of <*UCS4> is the delimiter, that character is not copied.
153 @descr When used to translate a file URL to a file system path,
154 the following productions are checked in order (using the
155 conventions of RFC 2234, RFC 2396, and RFC 2732):
157 Production F1 (VOS; FSysStyle::Vos):
158 "file://" host "/" fpath ["#" fragment]
159 becomes
160 "//" host "/" fpath
162 Production F2 (DOS; FSysStyle::Dos):
163 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
164 becomes
165 ALPHA ":" ["\" fpath]
166 replacing "/" by "\" in <fpath>
168 Production F3 (Unix; FSysStyle::Unix):
169 "file:///" fpath ["#" fragment]
170 becomes
171 "/" fpath
173 Detect = Vos | Unix | Dos
175 namespace o3tl {
176 template<> struct typed_flags<FSysStyle> : is_typed_flags<FSysStyle, 0x07> {};
179 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
181 public:
182 // Get- and Set-Methods:
184 /** The way input strings that represent (parts of) URIs are interpreted
185 in set-methods.
187 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
188 sequences of escape sequences, representing the UTF-8 coded characters.
190 @descr Along with an EncodeMechanism parameter, the set-methods all
191 take an rtl_TextEncoding parameter, which is ignored unless the
192 EncodeMechanism is EncodeMechanism::WasEncoded.
194 enum class EncodeMechanism
196 /** All escape sequences that are already present are ignored, and are
197 interpreted as literal sequences of three characters.
199 All,
201 /** Sequences of escape sequences, that represent characters from the
202 specified character set and that can be converted to UTF-32
203 characters, are first decoded. If they have to be encoded, they
204 are converted to UTF-8 characters and are than translated into
205 (sequences of) escape sequences. Other escape sequences are
206 copied verbatim (but using upper case hex digits).
208 WasEncoded,
210 /** All escape sequences that are already present are copied verbatim
211 (but using upper case hex digits).
213 NotCanonical
216 /** The way strings that represent (parts of) URIs are returned from get-
217 methods.
219 @descr Along with a DecodeMechanism parameter, the get-methods all
220 take an rtl_TextEncoding parameter, which is ignored unless the
221 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
223 enum class DecodeMechanism
225 /** The (part of the) URI is returned unchanged. Since URIs are
226 written using a subset of US-ASCII, the returned string is
227 guaranteed to contain only US-ASCII characters.
229 NONE,
231 /** All sequences of escape sequences that represent UTF-8 coded
232 UTF-32 characters with a numerical value greater than 0x7F, are
233 replaced by the respective UTF-16 characters. All other escape
234 sequences are not decoded.
236 ToIUri,
238 /** All (sequences of) escape sequences that represent characters from
239 the specified character set, and that can be converted to UTF-32,
240 are replaced by the respective UTF-16 characters. All other
241 escape sequences are not decoded.
243 WithCharset,
245 /** All (sequences of) escape sequences that represent characters from
246 the specified character set, that can be converted to UTF-32, and
247 that (in the case of ASCII characters) can safely be decoded
248 without altering the meaning of the (part of the) URI, are
249 replaced by the respective UTF-16 characters. All other escape
250 sequences are not decoded.
252 Unambiguous
255 // General Structure:
257 INetURLObject():
258 m_aAbsURIRef(256), m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) {}
260 bool HasError() const { return m_eScheme == INetProtocol::NotValid; }
262 OUString GetMainURL(DecodeMechanism eMechanism,
263 rtl_TextEncoding eCharset
264 = RTL_TEXTENCODING_UTF8) const
265 { return decode(m_aAbsURIRef, eMechanism, eCharset); }
267 OUString GetURLNoPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
268 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
269 const;
271 OUString GetURLNoMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
272 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
273 const;
275 OUString
276 getAbbreviated(css::uno::Reference< css::util::XStringWidth > const & rStringWidth,
277 sal_Int32 nWidth,
278 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
279 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
280 const;
282 bool operator ==(INetURLObject const & rObject) const;
284 bool operator !=(INetURLObject const & rObject) const
285 { return !(*this == rObject); }
287 // Strict Parsing:
289 inline explicit INetURLObject(
290 std::u16string_view rTheAbsURIRef,
291 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
292 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
294 inline bool SetURL(std::u16string_view rTheAbsURIRef,
295 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
296 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
298 bool ConcatData(INetProtocol eTheScheme, std::u16string_view rTheUser,
299 std::u16string_view rThePassword,
300 std::u16string_view rTheHost, sal_uInt32 nThePort,
301 std::u16string_view rThePath);
303 // Smart Parsing:
305 inline INetURLObject(std::u16string_view rTheAbsURIRef,
306 INetProtocol eTheSmartScheme,
307 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
308 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
309 FSysStyle eStyle = FSysStyle::Detect);
311 void SetSmartProtocol(INetProtocol eTheSmartScheme)
312 { m_eSmartScheme = eTheSmartScheme; }
314 inline bool
315 SetSmartURL(std::u16string_view rTheAbsURIRef,
316 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
317 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
318 FSysStyle eStyle = FSysStyle::Detect);
320 inline INetURLObject
321 smartRel2Abs(OUString const & rTheRelURIRef,
322 bool & rWasAbsolute,
323 bool bIgnoreFragment = false,
324 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
325 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
326 bool bRelativeNonURIs = false,
327 FSysStyle eStyle = FSysStyle::Detect) const;
329 // Relative URLs:
331 inline bool
332 GetNewAbsURL(OUString const & rTheRelURIRef,
333 INetURLObject * pTheAbsURIRef)
334 const;
336 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
337 (because of syntactic reasons), either rTheRelURIRef or an empty
338 string is returned: If all of the parameters eEncodeMechanism,
339 eDecodeMechanism and eCharset have their respective default values,
340 then rTheRelURIRef is returned unmodified; otherwise, an empty string
341 is returned.
343 static OUString
344 GetAbsURL(std::u16string_view rTheBaseURIRef,
345 OUString const & rTheRelURIRef,
346 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
347 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
348 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
350 static inline OUString
351 GetRelURL(std::u16string_view rTheBaseURIRef,
352 OUString const & rTheAbsURIRef,
353 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
354 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
355 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
356 FSysStyle eStyle = FSysStyle::Detect);
358 // External URLs:
360 OUString getExternalURL() const;
362 static inline bool translateToExternal(std::u16string_view rTheIntURIRef,
363 OUString & rTheExtURIRef,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8);
369 static inline bool translateToInternal(std::u16string_view rTheExtURIRef,
370 OUString & rTheIntURIRef,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8);
376 // Scheme:
378 struct SchemeInfo;
380 INetProtocol GetProtocol() const { return m_eScheme; }
382 bool isSchemeEqualTo(INetProtocol scheme) const { return scheme == m_eScheme; }
384 bool isSchemeEqualTo(std::u16string_view scheme) const;
386 /** Check if the scheme is one of the WebDAV scheme
387 * we know about.
389 * @return true is one other scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static OUString GetScheme(INetProtocol eTheScheme);
401 /** Return the human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static const OUString & GetSchemeName(INetProtocol eTheScheme);
409 static INetProtocol CompareProtocolScheme(std::u16string_view aTheAbsURIRef);
411 // User Info:
413 bool HasUserData() const { return m_aUser.isPresent(); }
415 OUString GetUser(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
416 rtl_TextEncoding eCharset
417 = RTL_TEXTENCODING_UTF8) const
418 { return decode(m_aUser, eMechanism, eCharset); }
420 OUString GetPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
421 rtl_TextEncoding eCharset
422 = RTL_TEXTENCODING_UTF8) const
423 { return decode(m_aAuth, eMechanism, eCharset); }
425 bool SetUser(std::u16string_view rTheUser)
426 { return setUser(rTheUser, RTL_TEXTENCODING_UTF8); }
428 inline bool SetPass(std::u16string_view rThePassword);
430 inline bool SetUserAndPass(std::u16string_view rTheUser,
431 std::u16string_view rThePassword);
433 // Host and Port:
435 bool HasPort() const { return m_aPort.isPresent(); }
437 OUString GetHost(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
438 rtl_TextEncoding eCharset
439 = RTL_TEXTENCODING_UTF8) const
440 { return decode(m_aHost, eMechanism, eCharset); }
442 OUString GetHostPort(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
443 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) const;
445 sal_uInt32 GetPort() const;
447 bool SetHost(std::u16string_view rTheHost)
448 { return setHost(rTheHost, RTL_TEXTENCODING_UTF8); }
450 bool SetPort(sal_uInt32 nThePort);
452 // Path:
454 bool HasURLPath() const { return !m_aPath.isEmpty(); }
456 OUString GetURLPath(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
457 rtl_TextEncoding eCharset
458 = RTL_TEXTENCODING_UTF8) const
459 { return decode(m_aPath, eMechanism, eCharset); }
461 bool SetURLPath(std::u16string_view rThePath,
462 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
463 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
464 { return setPath(rThePath, eMechanism, eCharset); }
466 // Hierarchical Path:
468 /** A constant to address the last segment in various methods dealing with
469 hierarchical paths.
471 @descr It is often more efficient to address the last segment using
472 this constant, than to determine its ordinal value using
473 getSegmentCount().
475 enum { LAST_SEGMENT = -1 };
477 /** The number of segments in the hierarchical path.
479 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
480 form
482 hierarchical-path = 1*("/" segment)
484 segment = name *(";" param)
486 name = [base ["." extension]]
488 base = 1*pchar
490 extension = *<any pchar except ".">
492 param = *pchar
494 @param bIgnoreFinalSlash If true, a final slash at the end of the
495 hierarchical path does not denote an empty segment, but is ignored.
497 @return The number of segments in the hierarchical path. If the path
498 is not hierarchical, 0 is returned.
500 sal_Int32 getSegmentCount(bool bIgnoreFinalSlash = true) const;
502 /** Remove a segment from the hierarchical path.
504 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
505 if addressing the last segment.
507 @param bIgnoreFinalSlash If true, a final slash at the end of the
508 hierarchical path does not denote an empty segment, but is ignored.
510 @return True if the segment has successfully been removed (and the
511 resulting URI is still valid). If the path is not hierarchical, or
512 the specified segment does not exist, false is returned. If false is
513 returned, the object is not modified.
515 bool removeSegment(sal_Int32 nIndex = LAST_SEGMENT,
516 bool bIgnoreFinalSlash = true);
518 /** Insert a new segment into the hierarchical path.
519 A final slash at the end of the
520 hierarchical path does not denote an empty segment, but is ignored.
522 @param rTheName The name part of the new segment. The new segment
523 will contain no parameters.
525 @param bAppendFinalSlash If the new segment is appended at the end of
526 the hierarchical path, this parameter specifies whether to add a final
527 slash after it or not.
529 @param nIndex The non-negative index of the segment before which
530 to insert the new segment. LAST_SEGMENT or an nIndex that equals
531 getSegmentCount() inserts the new segment at the end of the
532 hierarchical path.
534 @param eMechanism See the general discussion for set-methods.
536 @param eCharset See the general discussion for set-methods.
538 @return True if the segment has successfully been inserted (and the
539 resulting URI is still valid). If the path is not hierarchical, or
540 the specified place to insert the new segment does not exist, false is
541 returned. If false is returned, the object is not modified.
543 bool insertName(std::u16string_view rTheName,
544 bool bAppendFinalSlash = false,
545 sal_Int32 nIndex = LAST_SEGMENT,
546 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
547 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
549 /** Get the name of a segment of the hierarchical path.
551 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
552 if addressing the last segment.
554 @param bIgnoreFinalSlash If true, a final slash at the end of the
555 hierarchical path does not denote an empty segment, but is ignored.
557 @param eMechanism See the general discussion for get-methods.
559 @param eCharset See the general discussion for get-methods.
561 @return The name part of the specified segment. If the path is not
562 hierarchical, or the specified segment does not exits, an empty string
563 is returned.
565 OUString getName(sal_Int32 nIndex = LAST_SEGMENT,
566 bool bIgnoreFinalSlash = true,
567 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
568 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
569 const;
571 /** Set the name of the last segment (preserving any parameters and any query or
572 fragment part).
574 @param rTheName The new name.
576 @param eMechanism See the general discussion for get-methods.
578 @param eCharset See the general discussion for get-methods.
580 @return True if the name has successfully been modified (and the
581 resulting URI is still valid). If the path is not hierarchical, or
582 a last segment does not exist, false is returned. If false is
583 returned, the object is not modified.
585 bool setName(std::u16string_view rTheName,
586 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
587 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
589 /** Get the base of the name of a segment.
591 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
592 if addressing the last segment.
594 @param bIgnoreFinalSlash If true, a final slash at the end of the
595 hierarchical path does not denote an empty segment, but is ignored.
597 @param eMechanism See the general discussion for get-methods.
599 @param eCharset See the general discussion for get-methods.
601 @return The base part of the specified segment. If the path is
602 not hierarchical, or the specified segment does not exits, an empty
603 string is returned.
605 OUString getBase(sal_Int32 nIndex = LAST_SEGMENT,
606 bool bIgnoreFinalSlash = true,
607 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
608 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
609 const;
611 /** Set the base of the name of a segment (preserving the extension).
612 A final slash at the end of the
613 hierarchical path does not denote an empty segment, but is ignored.
615 @param rTheBase The new base.
617 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
618 if addressing the last segment.
620 @param eMechanism See the general discussion for set-methods.
622 @param eCharset See the general discussion for set-methods.
624 @return True if the base has successfully been modified (and the
625 resulting URI is still valid). If the path is not hierarchical, or
626 the specified segment does not exist, false is returned. If false is
627 returned, the object is not modified.
629 bool setBase(std::u16string_view rTheBase,
630 sal_Int32 nIndex = LAST_SEGMENT,
631 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
632 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
634 /** Determine whether the name of the last segment has an extension.
636 @return True if the name of the specified segment has an extension.
637 If the path is not hierarchical, or the specified segment does not
638 exist, false is returned.
640 bool hasExtension() const;
642 /** Get the extension of the name of a segment.
644 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
645 if addressing the last segment.
647 @param bIgnoreFinalSlash If true, a final slash at the end of the
648 hierarchical path does not denote an empty segment, but is ignored.
650 @param eMechanism See the general discussion for get-methods.
652 @param eCharset See the general discussion for get-methods.
654 @return The extension part of the specified segment. If the path is
655 not hierarchical, or the specified segment does not exits, an empty
656 string is returned.
658 OUString getExtension(sal_Int32 nIndex = LAST_SEGMENT,
659 bool bIgnoreFinalSlash = true,
660 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
661 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
662 const;
664 /** Set the extension of the name of a segment (replacing an already
665 existing extension).
667 @param rTheExtension The new extension.
669 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
670 if addressing the last segment.
672 @param bIgnoreFinalSlash If true, a final slash at the end of the
673 hierarchical path does not denote an empty segment, but is ignored.
675 @param eCharset See the general discussion for set-methods.
677 @return True if the extension has successfully been modified (and the
678 resulting URI is still valid). If the path is not hierarchical, or
679 the specified segment does not exist, false is returned. If false is
680 returned, the object is not modified.
682 bool setExtension(std::u16string_view rTheExtension,
683 sal_Int32 nIndex = LAST_SEGMENT,
684 bool bIgnoreFinalSlash = true,
685 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
687 /** Remove the extension of the name of a segment.
689 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
690 if addressing the last segment.
692 @param bIgnoreFinalSlash If true, a final slash at the end of the
693 hierarchical path does not denote an empty segment, but is ignored.
695 @return True if the extension has successfully been removed (and the
696 resulting URI is still valid), or if the name did not have an
697 extension. If the path is not hierarchical, or the specified segment
698 does not exist, false is returned. If false is returned, the object
699 is not modified.
701 bool removeExtension(sal_Int32 nIndex = LAST_SEGMENT,
702 bool bIgnoreFinalSlash = true);
704 /** Determine whether the hierarchical path ends in a final slash.
706 @return True if the hierarchical path ends in a final slash. If the
707 path is not hierarchical, false is returned.
709 bool hasFinalSlash() const;
711 /** Make the hierarchical path end in a final slash (if it does not
712 already do so).
714 @return True if a final slash has successfully been appended (and the
715 resulting URI is still valid), or if the hierarchical path already
716 ended in a final slash. If the path is not hierarchical, false is
717 returned. If false is returned, the object is not modified.
719 bool setFinalSlash();
721 /** Remove a final slash from the hierarchical path.
723 @return True if a final slash has successfully been removed (and the
724 resulting URI is still valid), or if the hierarchical path already did
725 not end in a final slash. If the path is not hierarchical, false is
726 returned. If false is returned, the object is not modified.
728 bool removeFinalSlash();
730 // Query:
732 bool HasParam() const { return m_aQuery.isPresent(); }
734 OUString GetParam(rtl_TextEncoding eCharset
735 = RTL_TEXTENCODING_UTF8) const
736 { return decode(m_aQuery, DecodeMechanism::NONE, eCharset); }
738 inline bool SetParam(std::u16string_view rTheQuery,
739 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
740 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
742 // Fragment:
744 bool HasMark() const { return m_aFragment.isPresent(); }
746 OUString GetMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
747 rtl_TextEncoding eCharset
748 = RTL_TEXTENCODING_UTF8) const
749 { return decode(m_aFragment, eMechanism, eCharset); }
751 inline bool SetMark(std::u16string_view rTheFragment,
752 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
753 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
755 // File URLs:
757 /** Return the file system path represented by a file URL (ignoring any
758 fragment part).
760 @param eStyle The notation of the returned file system path.
762 @param pDelimiter Upon successful return, this parameter can return
763 the character that is the 'main' delimiter within the returned file
764 system path (e.g., "/" for Unix, "\" for DOS). This is
765 especially useful for routines that later try to shorten the returned
766 file system path at a 'good' position, e.g. to fit it into some
767 limited display space.
769 @return The file system path represented by this file URL. If this
770 file URL does not represent a file system path according to the
771 specified notation, or if this is not a file URL at all, an empty
772 string is returned.
774 OUString getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter = nullptr)
775 const;
777 // Data URLs:
778 std::unique_ptr<SvMemoryStream> getData() const;
780 // Coding:
782 enum Part
784 PART_USER_PASSWORD = 0x00001,
785 PART_FPATH = 0x00008,
786 PART_AUTHORITY = 0x00010,
787 PART_REL_SEGMENT_EXTRA = 0x00020,
788 PART_URIC = 0x00040,
789 PART_HTTP_PATH = 0x00080,
790 PART_MESSAGE_ID_PATH = 0x00100,
791 PART_MAILTO = 0x00200,
792 PART_PATH_BEFORE_QUERY = 0x00400,
793 PART_PCHAR = 0x00800,
794 PART_VISIBLE = 0x01000,
795 PART_VISIBLE_NONSPECIAL = 0x02000,
796 PART_UNO_PARAM_VALUE = 0x04000,
797 PART_UNAMBIGUOUS = 0x08000,
798 PART_URIC_NO_SLASH = 0x10000,
799 PART_HTTP_QUERY = 0x20000, //TODO! unused?
802 enum class EscapeType
804 NONE,
805 Octet,
806 Utf32
809 /** Encode some text as part of a URI.
811 @param rText Some text (for its interpretation, see the general
812 discussion for set-methods).
814 @param ePart The part says which characters are 'forbidden' and must
815 be encoded (replaced by escape sequences). Characters outside the US-
816 ASCII range are always 'forbidden.'
818 @param eMechanism See the general discussion for set-methods.
820 @param eCharset See the general discussion for set-methods.
822 @return The text, encoded according to the given mechanism and
823 charset ('forbidden' characters replaced by escape sequences).
825 static OUString encode( std::u16string_view rText, Part ePart,
826 EncodeMechanism eMechanism,
827 rtl_TextEncoding eCharset
828 = RTL_TEXTENCODING_UTF8);
831 /** Decode some text.
833 @param rText Some (encoded) text.
835 @param eMechanism See the general discussion for get-methods.
837 @param eCharset See the general discussion for get-methods.
839 @return The text, decoded according to the given mechanism and
840 charset (escape sequences replaced by 'raw' characters).
842 static inline OUString decode(std::u16string_view rText,
843 DecodeMechanism eMechanism,
844 rtl_TextEncoding eCharset
845 = RTL_TEXTENCODING_UTF8);
847 static void appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4);
849 static void appendUCS4(OUStringBuffer & rTheText, sal_uInt32 nUCS4,
850 EscapeType eEscapeType, Part ePart,
851 rtl_TextEncoding eCharset, bool bKeepVisibleEscapes);
853 static sal_uInt32 getUTF32(sal_Unicode const *& rBegin,
854 sal_Unicode const * pEnd,
855 EncodeMechanism eMechanism,
856 rtl_TextEncoding eCharset,
857 EscapeType & rEscapeType);
859 // Specialized helpers:
861 static sal_uInt32 scanDomain(sal_Unicode const *& rBegin,
862 sal_Unicode const * pEnd,
863 bool bEager = true);
865 // OBSOLETE Hierarchical Path:
867 OUString GetPartBeforeLastName() const;
869 /** Get the last segment in the path.
871 @param eMechanism See the general discussion for get-methods.
873 @param eCharset See the general discussion for get-methods.
875 @return For a hierarchical URL, the last segment (everything after
876 the last unencoded '/'). Note that this last segment may be empty. If
877 the URL is not hierarchical, an empty string is returned.
879 OUString GetLastName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
880 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
881 const;
883 /** Get the 'extension' of the last segment in the path.
885 @return For a hierarchical URL, everything after the first unencoded
886 '.' in the last segment of the path. Note that this 'extension' may
887 be empty. If the URL is not hierarchical, or if the last segment does
888 not contain an unencoded '.', an empty string is returned.
890 OUString GetFileExtension() const;
892 bool Append(std::u16string_view rTheSegment,
893 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
894 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
896 void CutLastName();
898 // OBSOLETE File URLs:
900 OUString PathToFileName() const;
902 OUString GetFull() const;
904 OUString GetPath() const;
906 void SetBase(std::u16string_view rTheBase);
908 OUString GetBase() const;
910 void SetExtension(std::u16string_view rTheExtension);
912 OUString CutExtension();
914 static bool IsCaseSensitive() { return true; }
916 void changeScheme(INetProtocol eTargetScheme);
918 // INetProtocol::Macro, INetProtocol::Uno, INetProtocol::Slot,
919 // vnd.sun.star.script, etc. All the types of URLs which shouldn't
920 // be accepted from an outside controlled source
921 bool IsExoticProtocol() const;
923 private:
924 // General Structure:
926 class SAL_DLLPRIVATE SubString
928 sal_Int32 m_nBegin;
929 sal_Int32 m_nLength;
931 public:
932 explicit SubString(sal_Int32 nTheBegin = -1,
933 sal_Int32 nTheLength = 0):
934 m_nBegin(nTheBegin), m_nLength(nTheLength) {}
936 bool isPresent() const { return m_nBegin != -1; }
938 bool isEmpty() const { return m_nLength == 0; }
940 sal_Int32 getBegin() const { return m_nBegin; }
942 sal_Int32 getLength() const { return m_nLength; }
944 sal_Int32 getEnd() const { return m_nBegin + m_nLength; }
946 sal_Int32 clear();
948 sal_Int32 set(OUStringBuffer & rString,
949 std::u16string_view rSubString,
950 sal_Int32 nTheBegin);
952 sal_Int32 set(OUString & rString,
953 std::u16string_view rSubString);
955 sal_Int32 set(OUStringBuffer & rString,
956 std::u16string_view rSubString);
958 inline void operator +=(sal_Int32 nDelta);
960 int compare(SubString const & rOther,
961 OUStringBuffer const & rThisString,
962 OUStringBuffer const & rOtherString) const;
965 OUStringBuffer m_aAbsURIRef;
966 SubString m_aScheme;
967 SubString m_aUser;
968 SubString m_aAuth;
969 SubString m_aHost;
970 SubString m_aPort;
971 SubString m_aPath;
972 SubString m_aQuery;
973 SubString m_aFragment;
974 INetProtocol m_eScheme;
975 INetProtocol m_eSmartScheme;
977 TOOLS_DLLPRIVATE void setInvalid();
979 bool setAbsURIRef(
980 std::u16string_view rTheAbsURIRef,
981 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart,
982 FSysStyle eStyle);
984 // Relative URLs:
986 bool convertRelToAbs(
987 OUString const & rTheRelURIRef,
988 INetURLObject & rTheAbsURIRef, bool & rWasAbsolute,
989 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
990 bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs,
991 FSysStyle eStyle) const;
993 bool convertAbsToRel(
994 OUString const & rTheAbsURIRef,
995 OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism,
996 DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset,
997 FSysStyle eStyle) const;
999 // External URLs:
1001 static bool convertIntToExt(
1002 std::u16string_view rTheIntURIRef,
1003 OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism,
1004 rtl_TextEncoding eCharset);
1006 static bool convertExtToInt(
1007 std::u16string_view rTheExtURIRef,
1008 OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism,
1009 rtl_TextEncoding eCharset);
1011 // Scheme:
1013 struct PrefixInfo;
1015 TOOLS_DLLPRIVATE static inline SchemeInfo const & getSchemeInfo(
1016 INetProtocol eTheScheme);
1018 TOOLS_DLLPRIVATE inline SchemeInfo const & getSchemeInfo() const;
1020 TOOLS_DLLPRIVATE static PrefixInfo const * getPrefix(
1021 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1023 // Authority:
1025 TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const;
1027 TOOLS_DLLPRIVATE SubString getAuthority() const;
1029 // User Info:
1031 bool setUser(
1032 std::u16string_view rTheUser,
1033 rtl_TextEncoding eCharset);
1035 bool clearPassword();
1037 bool setPassword(
1038 std::u16string_view rThePassword,
1039 rtl_TextEncoding eCharset);
1041 // Host and Port:
1043 TOOLS_DLLPRIVATE static bool parseHost(
1044 sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
1045 OUStringBuffer* pCanonic);
1047 TOOLS_DLLPRIVATE static bool parseHostOrNetBiosName(
1048 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1049 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1050 bool bNetBiosName, OUStringBuffer* pCanonic);
1052 bool setHost(
1053 std::u16string_view rTheHost,
1054 rtl_TextEncoding eCharset);
1056 // Path:
1058 TOOLS_DLLPRIVATE static bool parsePath(
1059 INetProtocol eScheme, sal_Unicode const ** pBegin,
1060 sal_Unicode const * pEnd, EncodeMechanism eMechanism,
1061 rtl_TextEncoding eCharset, bool bSkippedInitialSlash,
1062 sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter,
1063 sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter,
1064 OUStringBuffer &rSynPath);
1066 bool setPath(
1067 std::u16string_view rThePath,
1068 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1070 // Hierarchical Path:
1072 TOOLS_DLLPRIVATE bool checkHierarchical() const;
1074 TOOLS_DLLPRIVATE SubString getSegment(
1075 sal_Int32 nIndex, bool bIgnoreFinalSlash) const;
1077 // Query:
1079 void clearQuery();
1081 bool setQuery(
1082 std::u16string_view rTheQuery,
1083 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1085 // Fragment:
1087 bool clearFragment();
1089 bool setFragment(
1090 std::u16string_view rTheMark,
1091 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1093 // FILE URLs:
1095 TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const;
1097 // Coding:
1099 TOOLS_DLLPRIVATE static inline void appendEscape(
1100 OUStringBuffer & rTheText, sal_uInt32 nOctet);
1102 static void encodeText(
1103 OUStringBuffer& rOutputBuffer,
1104 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1105 Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1106 bool bKeepVisibleEscapes);
1108 static inline void encodeText(
1109 OUStringBuffer& rOutputBuffer,
1110 std::u16string_view rTheText, Part ePart,
1111 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1112 bool bKeepVisibleEscapes);
1114 static OUString decode(
1115 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1116 DecodeMechanism, rtl_TextEncoding eCharset);
1118 inline OUString decode(
1119 SubString const & rSubString,
1120 DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const;
1122 // Specialized helpers:
1124 TOOLS_DLLPRIVATE static bool scanIPv6reference(
1125 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1128 // static
1129 inline void INetURLObject::encodeText( OUStringBuffer& rOutputBuffer,
1130 std::u16string_view rTheText,
1131 Part ePart,
1132 EncodeMechanism eMechanism,
1133 rtl_TextEncoding eCharset,
1134 bool bKeepVisibleEscapes)
1136 encodeText(rOutputBuffer,
1137 rTheText.data(),
1138 rTheText.data() + rTheText.size(), ePart,
1139 eMechanism, eCharset, bKeepVisibleEscapes);
1142 inline OUString INetURLObject::decode(SubString const & rSubString,
1143 DecodeMechanism eMechanism,
1144 rtl_TextEncoding eCharset) const
1146 return rSubString.isPresent() ?
1147 decode(m_aAbsURIRef.getStr() + rSubString.getBegin(),
1148 m_aAbsURIRef.getStr() + rSubString.getEnd(),
1149 eMechanism, eCharset) :
1150 OUString();
1153 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef,
1154 EncodeMechanism eMechanism,
1155 rtl_TextEncoding eCharset):
1156 m_aAbsURIRef(rTheAbsURIRef.size() * 2), m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1158 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1159 FSysStyle(0));
1162 inline bool INetURLObject::SetURL(std::u16string_view rTheAbsURIRef,
1163 EncodeMechanism eMechanism,
1164 rtl_TextEncoding eCharset)
1166 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1167 FSysStyle(0));
1170 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef,
1171 INetProtocol eTheSmartScheme,
1172 EncodeMechanism eMechanism,
1173 rtl_TextEncoding eCharset,
1174 FSysStyle eStyle):
1175 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(eTheSmartScheme)
1177 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, eStyle);
1180 inline bool INetURLObject::SetSmartURL(std::u16string_view rTheAbsURIRef,
1181 EncodeMechanism eMechanism,
1182 rtl_TextEncoding eCharset,
1183 FSysStyle eStyle)
1185 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true,
1186 eStyle);
1189 inline INetURLObject
1190 INetURLObject::smartRel2Abs(OUString const & rTheRelURIRef,
1191 bool & rWasAbsolute,
1192 bool bIgnoreFragment,
1193 EncodeMechanism eMechanism,
1194 rtl_TextEncoding eCharset,
1195 bool bRelativeNonURIs,
1196 FSysStyle eStyle) const
1198 INetURLObject aTheAbsURIRef;
1199 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, rWasAbsolute,
1200 eMechanism, eCharset, bIgnoreFragment, true,
1201 bRelativeNonURIs, eStyle);
1202 return aTheAbsURIRef;
1205 inline bool INetURLObject::GetNewAbsURL(OUString const & rTheRelURIRef,
1206 INetURLObject * pTheAbsURIRef)
1207 const
1209 INetURLObject aTheAbsURIRef;
1210 bool bWasAbsolute;
1211 if (!convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute,
1212 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false/*bIgnoreFragment*/, false, false,
1213 FSysStyle::Detect))
1214 return false;
1215 if (pTheAbsURIRef)
1216 *pTheAbsURIRef = aTheAbsURIRef;
1217 return true;
1220 // static
1221 inline OUString INetURLObject::GetRelURL(std::u16string_view rTheBaseURIRef,
1222 OUString const & rTheAbsURIRef,
1223 EncodeMechanism eEncodeMechanism,
1224 DecodeMechanism eDecodeMechanism,
1225 rtl_TextEncoding eCharset,
1226 FSysStyle eStyle)
1228 OUString aTheRelURIRef;
1229 INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
1230 convertAbsToRel(rTheAbsURIRef, aTheRelURIRef, eEncodeMechanism,
1231 eDecodeMechanism, eCharset, eStyle);
1232 return aTheRelURIRef;
1235 // static
1236 inline bool INetURLObject::translateToExternal(std::u16string_view
1237 rTheIntURIRef,
1238 OUString & rTheExtURIRef,
1239 DecodeMechanism
1240 eDecodeMechanism,
1241 rtl_TextEncoding eCharset)
1243 return convertIntToExt(rTheIntURIRef, rTheExtURIRef,
1244 eDecodeMechanism, eCharset);
1247 // static
1248 inline bool INetURLObject::translateToInternal(std::u16string_view
1249 rTheExtURIRef,
1250 OUString & rTheIntURIRef,
1251 DecodeMechanism
1252 eDecodeMechanism,
1253 rtl_TextEncoding eCharset)
1255 return convertExtToInt(rTheExtURIRef, rTheIntURIRef,
1256 eDecodeMechanism, eCharset);
1259 inline bool INetURLObject::SetPass(std::u16string_view rThePassword)
1261 return rThePassword.empty() ?
1262 clearPassword() :
1263 setPassword(rThePassword, RTL_TEXTENCODING_UTF8);
1266 inline bool INetURLObject::SetUserAndPass(std::u16string_view rTheUser,
1267 std::u16string_view rThePassword)
1269 return setUser(rTheUser, RTL_TEXTENCODING_UTF8)
1270 && (rThePassword.empty() ?
1271 clearPassword() :
1272 setPassword(rThePassword, RTL_TEXTENCODING_UTF8));
1275 inline bool INetURLObject::SetParam(std::u16string_view rTheQuery,
1276 EncodeMechanism eMechanism,
1277 rtl_TextEncoding eCharset)
1279 if (rTheQuery.empty())
1281 clearQuery();
1282 return false;
1284 return setQuery(rTheQuery, eMechanism, eCharset);
1287 inline bool INetURLObject::SetMark(std::u16string_view rTheFragment,
1288 EncodeMechanism eMechanism,
1289 rtl_TextEncoding eCharset)
1291 return rTheFragment.empty() ?
1292 clearFragment() :
1293 setFragment(rTheFragment, eMechanism, eCharset);
1296 // static
1297 inline OUString INetURLObject::encode(std::u16string_view rText, Part ePart,
1298 EncodeMechanism eMechanism,
1299 rtl_TextEncoding eCharset)
1301 OUStringBuffer aBuf;
1302 encodeText(aBuf, rText, ePart, eMechanism, eCharset, false);
1303 return aBuf.makeStringAndClear();
1306 // static
1307 inline OUString INetURLObject::decode(std::u16string_view rText,
1308 DecodeMechanism eMechanism,
1309 rtl_TextEncoding eCharset)
1311 return decode(rText.data(), rText.data() + rText.size(),
1312 eMechanism, eCharset);
1315 #endif
1317 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */