cid#1636693 COPY_INSTEAD_OF_MOVE
[LibreOffice.git] / include / tools / urlobj.hxx
blob33af8104dc0f23c49423142708f02e2213155cb0
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <rtl/ustrbuf.hxx>
24 #include <rtl/textenc.h>
25 #include <sal/types.h>
26 #include <o3tl/typed_flags_set.hxx>
28 #include <memory>
29 #include <string_view>
31 class SvMemoryStream;
33 namespace com::sun::star::util {
34 class XStringWidth;
37 namespace com::sun::star::uno { template <typename > class Reference; }
39 // Common URL prefixes for various schemes:
40 inline constexpr OUString INET_FTP_SCHEME = u"ftp://"_ustr;
41 inline constexpr OUString INET_HTTP_SCHEME = u"http://"_ustr;
42 inline constexpr OUString INET_HTTPS_SCHEME = u"https://"_ustr;
43 inline constexpr OUString INET_FILE_SCHEME = u"file://"_ustr;
44 inline constexpr OUString INET_MAILTO_SCHEME = u"mailto:"_ustr;
45 inline constexpr OUString INET_HID_SCHEME = u"hid:"_ustr;
47 #define URL_PREFIX_PRIV_SOFFICE "private:"
49 // Schemes:
50 enum class INetProtocol
52 NotValid,
53 Ftp,
54 Http,
55 File,
56 Mailto,
57 VndSunStarWebdav,
58 PrivSoffice,
59 VndSunStarHelp,
60 Https,
61 Slot,
62 Macro,
63 Javascript,
64 Data,
65 Cid,
66 VndSunStarHier,
67 Uno,
68 Component,
69 VndSunStarPkg,
70 Ldap,
71 Db,
72 VndSunStarCmd,
73 Telnet,
74 VndSunStarExpand,
75 VndSunStarTdoc,
76 Generic,
77 Smb,
78 Hid,
79 Sftp,
80 Cmis,
81 LAST = Cmis
84 /** The supported notations for file system paths.
86 enum class FSysStyle
88 /** VOS notation (e.g., "//server/dir/file").
90 Vos = 0x1,
92 /** Unix notation (e.g., "/dir/file").
94 Unix = 0x2,
96 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
98 Dos = 0x4,
100 /** Detect the used notation.
102 @descr For the following descriptions, please note that
103 whereas FSYS_DEFAULT includes all style bits, combinations of only
104 a few style bits are also possible, and are also described.
106 @descr When used to translate a file system path to a file URL,
107 the subset of the following productions for which the appropriate
108 style bit is set are checked in order (using the conventions of
109 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
110 character):
112 Production T1 (VOS local; FSysStyle::Vos only):
113 "//." ["/" *UCS4]
114 becomes
115 "file:///" *UCS4
117 Production T2 (VOS host; FSysStyle::Vos only):
118 "//" [host] ["/" *UCS4]
119 becomes
120 "file://" host "/" *UCS4
122 Production T3 (UNC; FSysStyle::Dos only):
123 "\\" [host] ["\" *UCS4]
124 becomes
125 "file://" host "/" *UCS4
126 replacing "\" by "/" within <*UCS4>
128 Production T4 (Unix-like DOS; FSysStyle::Dos only):
129 ALPHA ":" ["/" *UCS4]
130 becomes
131 "file:///" ALPHA ":/" *UCS4
132 replacing "\" by "/" within <*UCS4>
134 Production T5 (DOS; FSysStyle::Dos only):
135 ALPHA ":" ["\" *UCS4]
136 becomes
137 "file:///" ALPHA ":/" *UCS4
138 replacing "\" by "/" within <*UCS4>
140 Production T6 (any):
141 *UCS4
142 becomes
143 "file:///" *UCS4
144 replacing the delimiter by "/" within <*UCS4>. The delimiter is
145 that character from the set { "/", "\" } which appears most
146 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
147 is removed from the set; if FSysStyle::Dos is not among the style
148 bits, "\" is removed from the set). If two or more
149 characters appear the same number of times, the character
150 mentioned first in that set is chosen. If the first character
151 of <*UCS4> is the delimiter, that character is not copied.
153 @descr When used to translate a file URL to a file system path,
154 the following productions are checked in order (using the
155 conventions of RFC 2234, RFC 2396, and RFC 2732):
157 Production F1 (VOS; FSysStyle::Vos):
158 "file://" host "/" fpath ["#" fragment]
159 becomes
160 "//" host "/" fpath
162 Production F2 (DOS; FSysStyle::Dos):
163 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
164 becomes
165 ALPHA ":" ["\" fpath]
166 replacing "/" by "\" in <fpath>
168 Production F3 (Unix; FSysStyle::Unix):
169 "file:///" fpath ["#" fragment]
170 becomes
171 "/" fpath
173 Detect = Vos | Unix | Dos
175 namespace o3tl {
176 template<> struct typed_flags<FSysStyle> : is_typed_flags<FSysStyle, 0x07> {};
179 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
181 public:
182 // Get- and Set-Methods:
184 /** The way input strings that represent (parts of) URIs are interpreted
185 in set-methods.
187 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
188 sequences of escape sequences, representing the UTF-8 coded characters.
190 @descr Along with an EncodeMechanism parameter, the set-methods all
191 take an rtl_TextEncoding parameter, which is ignored unless the
192 EncodeMechanism is EncodeMechanism::WasEncoded.
194 enum class EncodeMechanism
196 /** All escape sequences that are already present are ignored, and are
197 interpreted as literal sequences of three characters.
199 All,
201 /** Sequences of escape sequences, that represent characters from the
202 specified character set and that can be converted to UTF-32
203 characters, are first decoded. If they have to be encoded, they
204 are converted to UTF-8 characters and are than translated into
205 (sequences of) escape sequences. Other escape sequences are
206 copied verbatim (but using upper case hex digits).
208 WasEncoded,
210 /** All escape sequences that are already present are copied verbatim
211 (but using upper case hex digits).
213 NotCanonical
216 /** The way strings that represent (parts of) URIs are returned from get-
217 methods.
219 @descr Along with a DecodeMechanism parameter, the get-methods all
220 take an rtl_TextEncoding parameter, which is ignored unless the
221 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
223 enum class DecodeMechanism
225 /** The (part of the) URI is returned unchanged. Since URIs are
226 written using a subset of US-ASCII, the returned string is
227 guaranteed to contain only US-ASCII characters.
229 NONE,
231 /** All sequences of escape sequences that represent UTF-8 coded
232 UTF-32 characters with a numerical value greater than 0x7F, are
233 replaced by the respective UTF-16 characters. All other escape
234 sequences are not decoded.
236 ToIUri,
238 /** All (sequences of) escape sequences that represent characters from
239 the specified character set, and that can be converted to UTF-32,
240 are replaced by the respective UTF-16 characters. All other
241 escape sequences are not decoded.
243 WithCharset,
245 /** All (sequences of) escape sequences that represent characters from
246 the specified character set, that can be converted to UTF-32, and
247 that (in the case of ASCII characters) can safely be decoded
248 without altering the meaning of the (part of the) URI, are
249 replaced by the respective UTF-16 characters. All other escape
250 sequences are not decoded.
252 Unambiguous
255 // General Structure:
257 INetURLObject():
258 m_aAbsURIRef(256), m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) {}
260 bool HasError() const { return m_eScheme == INetProtocol::NotValid; }
262 OUString GetMainURL(DecodeMechanism eMechanism,
263 rtl_TextEncoding eCharset
264 = RTL_TEXTENCODING_UTF8) const
265 { return decode(m_aAbsURIRef, eMechanism, eCharset); }
267 OUString GetURLNoPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
268 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
269 const;
271 OUString GetURLNoMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
272 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
273 const;
275 OUString
276 getAbbreviated(css::uno::Reference< css::util::XStringWidth > const & rStringWidth,
277 sal_Int32 nWidth,
278 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
279 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
280 const;
282 bool operator ==(INetURLObject const & rObject) const;
284 bool operator !=(INetURLObject const & rObject) const
285 { return !(*this == rObject); }
287 // Strict Parsing:
289 inline explicit INetURLObject(
290 std::u16string_view rTheAbsURIRef,
291 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
292 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
294 inline bool SetURL(std::u16string_view rTheAbsURIRef,
295 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
296 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
298 bool ConcatData(INetProtocol eTheScheme, std::u16string_view rTheUser,
299 std::u16string_view rThePassword,
300 std::u16string_view rTheHost, sal_uInt32 nThePort,
301 std::u16string_view rThePath);
303 // Smart Parsing:
305 inline INetURLObject(std::u16string_view rTheAbsURIRef,
306 INetProtocol eTheSmartScheme,
307 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
308 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
309 FSysStyle eStyle = FSysStyle::Detect);
311 void SetSmartProtocol(INetProtocol eTheSmartScheme)
312 { m_eSmartScheme = eTheSmartScheme; }
314 inline bool
315 SetSmartURL(std::u16string_view rTheAbsURIRef,
316 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
317 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
318 FSysStyle eStyle = FSysStyle::Detect);
320 inline INetURLObject
321 smartRel2Abs(OUString const & rTheRelURIRef,
322 bool & rWasAbsolute,
323 bool bIgnoreFragment = false,
324 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
325 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
326 bool bRelativeNonURIs = false,
327 FSysStyle eStyle = FSysStyle::Detect) const;
329 // Relative URLs:
331 inline bool
332 GetNewAbsURL(OUString const & rTheRelURIRef,
333 INetURLObject * pTheAbsURIRef)
334 const;
336 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
337 (because of syntactic reasons), either rTheRelURIRef or an empty
338 string is returned: If all of the parameters eEncodeMechanism,
339 eDecodeMechanism and eCharset have their respective default values,
340 then rTheRelURIRef is returned unmodified; otherwise, an empty string
341 is returned.
343 static OUString
344 GetAbsURL(std::u16string_view rTheBaseURIRef,
345 OUString const & rTheRelURIRef,
346 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
347 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
348 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
350 static inline OUString
351 GetRelURL(std::u16string_view rTheBaseURIRef,
352 OUString const & rTheAbsURIRef,
353 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
354 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
355 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
356 FSysStyle eStyle = FSysStyle::Detect);
358 // External URLs:
360 OUString getExternalURL() const;
362 static inline bool translateToExternal(std::u16string_view rTheIntURIRef,
363 OUString & rTheExtURIRef,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8);
369 static inline bool translateToInternal(std::u16string_view rTheExtURIRef,
370 OUString & rTheIntURIRef,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8);
376 // Scheme:
378 struct SchemeInfo;
380 INetProtocol GetProtocol() const { return m_eScheme; }
382 bool isSchemeEqualTo(INetProtocol scheme) const { return scheme == m_eScheme; }
384 bool isSchemeEqualTo(std::u16string_view scheme) const;
386 /** Check if the scheme is one of the WebDAV scheme
387 * we know about.
389 * @return true is one other scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static const OUString & GetScheme(INetProtocol eTheScheme);
401 /** Return the human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static const OUString & GetSchemeName(INetProtocol eTheScheme);
409 static INetProtocol CompareProtocolScheme(std::u16string_view aTheAbsURIRef);
411 // User Info:
413 bool HasUserData() const { return m_aUser.isPresent(); }
415 OUString GetUser(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
416 rtl_TextEncoding eCharset
417 = RTL_TEXTENCODING_UTF8) const
418 { return decode(m_aUser, eMechanism, eCharset); }
420 OUString GetPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
421 rtl_TextEncoding eCharset
422 = RTL_TEXTENCODING_UTF8) const
423 { return decode(m_aAuth, eMechanism, eCharset); }
425 bool SetUser(std::u16string_view rTheUser)
426 { return setUser(rTheUser, RTL_TEXTENCODING_UTF8); }
428 inline bool SetPass(std::u16string_view rThePassword);
430 // Host and Port:
432 bool HasPort() const { return m_aPort.isPresent(); }
434 OUString GetHost(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
435 rtl_TextEncoding eCharset
436 = RTL_TEXTENCODING_UTF8) const
437 { return decode(m_aHost, eMechanism, eCharset); }
439 OUString GetHostPort(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
440 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) const;
442 sal_uInt32 GetPort() const;
444 bool SetHost(std::u16string_view rTheHost)
445 { return setHost(rTheHost, RTL_TEXTENCODING_UTF8); }
447 bool SetPort(sal_uInt32 nThePort);
449 // Path:
451 bool HasURLPath() const { return !m_aPath.isEmpty(); }
453 OUString GetURLPath(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
454 rtl_TextEncoding eCharset
455 = RTL_TEXTENCODING_UTF8) const
456 { return decode(m_aPath, eMechanism, eCharset); }
458 bool SetURLPath(std::u16string_view rThePath,
459 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
460 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
461 { return setPath(rThePath, eMechanism, eCharset); }
463 // Hierarchical Path:
465 /** A constant to address the last segment in various methods dealing with
466 hierarchical paths.
468 @descr It is often more efficient to address the last segment using
469 this constant, than to determine its ordinal value using
470 getSegmentCount().
472 enum { LAST_SEGMENT = -1 };
474 /** The number of segments in the hierarchical path.
476 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
477 form
479 hierarchical-path = 1*("/" segment)
481 segment = name *(";" param)
483 name = [base ["." extension]]
485 base = 1*pchar
487 extension = *<any pchar except ".">
489 param = *pchar
491 @param bIgnoreFinalSlash If true, a final slash at the end of the
492 hierarchical path does not denote an empty segment, but is ignored.
494 @return The number of segments in the hierarchical path. If the path
495 is not hierarchical, 0 is returned.
497 sal_Int32 getSegmentCount(bool bIgnoreFinalSlash = true) const;
499 /** Remove a segment from the hierarchical path.
501 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
502 if addressing the last segment.
504 @param bIgnoreFinalSlash If true, a final slash at the end of the
505 hierarchical path does not denote an empty segment, but is ignored.
507 @return True if the segment has successfully been removed (and the
508 resulting URI is still valid). If the path is not hierarchical, or
509 the specified segment does not exist, false is returned. If false is
510 returned, the object is not modified.
512 bool removeSegment(sal_Int32 nIndex = LAST_SEGMENT,
513 bool bIgnoreFinalSlash = true);
515 /** Insert a new segment into the hierarchical path.
516 A final slash at the end of the
517 hierarchical path does not denote an empty segment, but is ignored.
519 @param rTheName The name part of the new segment. The new segment
520 will contain no parameters.
522 @param bAppendFinalSlash If the new segment is appended at the end of
523 the hierarchical path, this parameter specifies whether to add a final
524 slash after it or not.
526 @param nIndex The non-negative index of the segment before which
527 to insert the new segment. LAST_SEGMENT or an nIndex that equals
528 getSegmentCount() inserts the new segment at the end of the
529 hierarchical path.
531 @param eMechanism See the general discussion for set-methods.
533 @param eCharset See the general discussion for set-methods.
535 @return True if the segment has successfully been inserted (and the
536 resulting URI is still valid). If the path is not hierarchical, or
537 the specified place to insert the new segment does not exist, false is
538 returned. If false is returned, the object is not modified.
540 bool insertName(std::u16string_view rTheName,
541 bool bAppendFinalSlash = false,
542 sal_Int32 nIndex = LAST_SEGMENT,
543 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
544 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
546 /** Get the name of a segment of the hierarchical path.
548 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
549 if addressing the last segment.
551 @param bIgnoreFinalSlash If true, a final slash at the end of the
552 hierarchical path does not denote an empty segment, but is ignored.
554 @param eMechanism See the general discussion for get-methods.
556 @param eCharset See the general discussion for get-methods.
558 @return The name part of the specified segment. If the path is not
559 hierarchical, or the specified segment does not exits, an empty string
560 is returned.
562 OUString getName(sal_Int32 nIndex = LAST_SEGMENT,
563 bool bIgnoreFinalSlash = true,
564 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
565 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
566 const;
568 /** Set the name of the last segment (preserving any parameters and any query or
569 fragment part).
571 @param rTheName The new name.
573 @param eMechanism See the general discussion for get-methods.
575 @param eCharset See the general discussion for get-methods.
577 @return True if the name has successfully been modified (and the
578 resulting URI is still valid). If the path is not hierarchical, or
579 a last segment does not exist, false is returned. If false is
580 returned, the object is not modified.
582 bool setName(std::u16string_view rTheName,
583 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
584 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
586 /** Get the base of the name of a segment.
588 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
589 if addressing the last segment.
591 @param bIgnoreFinalSlash If true, a final slash at the end of the
592 hierarchical path does not denote an empty segment, but is ignored.
594 @param eMechanism See the general discussion for get-methods.
596 @param eCharset See the general discussion for get-methods.
598 @return The base part of the specified segment. If the path is
599 not hierarchical, or the specified segment does not exits, an empty
600 string is returned.
602 OUString getBase(sal_Int32 nIndex = LAST_SEGMENT,
603 bool bIgnoreFinalSlash = true,
604 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
605 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
606 const;
608 /** Set the base of the name of a segment (preserving the extension).
609 A final slash at the end of the
610 hierarchical path does not denote an empty segment, but is ignored.
612 @param rTheBase The new base.
614 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
615 if addressing the last segment.
617 @param eMechanism See the general discussion for set-methods.
619 @param eCharset See the general discussion for set-methods.
621 @return True if the base has successfully been modified (and the
622 resulting URI is still valid). If the path is not hierarchical, or
623 the specified segment does not exist, false is returned. If false is
624 returned, the object is not modified.
626 bool setBase(std::u16string_view rTheBase,
627 sal_Int32 nIndex = LAST_SEGMENT,
628 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
629 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
631 /** Determine whether the name of the last segment has an extension.
633 @return True if the name of the specified segment has an extension.
634 If the path is not hierarchical, or the specified segment does not
635 exist, false is returned.
637 bool hasExtension() const;
639 /** Get the extension of the name of a segment.
641 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
642 if addressing the last segment.
644 @param bIgnoreFinalSlash If true, a final slash at the end of the
645 hierarchical path does not denote an empty segment, but is ignored.
647 @param eMechanism See the general discussion for get-methods.
649 @param eCharset See the general discussion for get-methods.
651 @return The extension part of the specified segment. If the path is
652 not hierarchical, or the specified segment does not exits, an empty
653 string is returned.
655 OUString getExtension(sal_Int32 nIndex = LAST_SEGMENT,
656 bool bIgnoreFinalSlash = true,
657 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
658 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
659 const;
661 /** Set the extension of the name of a segment (replacing an already
662 existing extension).
664 @param rTheExtension The new extension.
666 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
667 if addressing the last segment.
669 @param bIgnoreFinalSlash If true, a final slash at the end of the
670 hierarchical path does not denote an empty segment, but is ignored.
672 @param eCharset See the general discussion for set-methods.
674 @return True if the extension has successfully been modified (and the
675 resulting URI is still valid). If the path is not hierarchical, or
676 the specified segment does not exist, false is returned. If false is
677 returned, the object is not modified.
679 bool setExtension(std::u16string_view rTheExtension,
680 sal_Int32 nIndex = LAST_SEGMENT,
681 bool bIgnoreFinalSlash = true,
682 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
684 /** Remove the extension of the name of a segment.
686 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
687 if addressing the last segment.
689 @param bIgnoreFinalSlash If true, a final slash at the end of the
690 hierarchical path does not denote an empty segment, but is ignored.
692 @return True if the extension has successfully been removed (and the
693 resulting URI is still valid), or if the name did not have an
694 extension. If the path is not hierarchical, or the specified segment
695 does not exist, false is returned. If false is returned, the object
696 is not modified.
698 bool removeExtension(sal_Int32 nIndex = LAST_SEGMENT,
699 bool bIgnoreFinalSlash = true);
701 /** Determine whether the hierarchical path ends in a final slash.
703 @return True if the hierarchical path ends in a final slash. If the
704 path is not hierarchical, false is returned.
706 bool hasFinalSlash() const;
708 /** Make the hierarchical path end in a final slash (if it does not
709 already do so).
711 @return True if a final slash has successfully been appended (and the
712 resulting URI is still valid), or if the hierarchical path already
713 ended in a final slash. If the path is not hierarchical, false is
714 returned. If false is returned, the object is not modified.
716 bool setFinalSlash();
718 /** Remove a final slash from the hierarchical path.
720 @return True if a final slash has successfully been removed (and the
721 resulting URI is still valid), or if the hierarchical path already did
722 not end in a final slash. If the path is not hierarchical, false is
723 returned. If false is returned, the object is not modified.
725 bool removeFinalSlash();
727 // Query:
729 bool HasParam() const { return m_aQuery.isPresent(); }
731 OUString GetParam(rtl_TextEncoding eCharset
732 = RTL_TEXTENCODING_UTF8) const
733 { return decode(m_aQuery, DecodeMechanism::NONE, eCharset); }
735 inline bool SetParam(std::u16string_view rTheQuery,
736 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
737 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
739 // Fragment:
741 bool HasMark() const { return m_aFragment.isPresent(); }
743 OUString GetMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
744 rtl_TextEncoding eCharset
745 = RTL_TEXTENCODING_UTF8) const
746 { return decode(m_aFragment, eMechanism, eCharset); }
748 inline bool SetMark(std::u16string_view rTheFragment,
749 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
750 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
752 // File URLs:
754 /** Return the file system path represented by a file URL (ignoring any
755 fragment part).
757 @param eStyle The notation of the returned file system path.
759 @param pDelimiter Upon successful return, this parameter can return
760 the character that is the 'main' delimiter within the returned file
761 system path (e.g., "/" for Unix, "\" for DOS). This is
762 especially useful for routines that later try to shorten the returned
763 file system path at a 'good' position, e.g. to fit it into some
764 limited display space.
766 @return The file system path represented by this file URL. If this
767 file URL does not represent a file system path according to the
768 specified notation, or if this is not a file URL at all, an empty
769 string is returned.
771 OUString getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter = nullptr)
772 const;
774 // Data URLs:
775 std::unique_ptr<SvMemoryStream> getData() const;
777 // Coding:
779 enum Part
781 PART_USER_PASSWORD = 0x00001,
782 PART_FPATH = 0x00008,
783 PART_AUTHORITY = 0x00010,
784 PART_REL_SEGMENT_EXTRA = 0x00020,
785 PART_URIC = 0x00040,
786 PART_HTTP_PATH = 0x00080,
787 PART_MESSAGE_ID_PATH = 0x00100,
788 PART_MAILTO = 0x00200,
789 PART_PATH_BEFORE_QUERY = 0x00400,
790 PART_PCHAR = 0x00800,
791 PART_VISIBLE = 0x01000,
792 PART_VISIBLE_NONSPECIAL = 0x02000,
793 PART_UNO_PARAM_VALUE = 0x04000,
794 PART_UNAMBIGUOUS = 0x08000,
795 PART_URIC_NO_SLASH = 0x10000,
796 PART_HTTP_QUERY = 0x20000, //TODO! unused?
799 enum class EscapeType
801 NONE,
802 Octet,
803 Utf32
806 /** Encode some text as part of a URI.
808 @param rText Some text (for its interpretation, see the general
809 discussion for set-methods).
811 @param ePart The part says which characters are 'forbidden' and must
812 be encoded (replaced by escape sequences). Characters outside the US-
813 ASCII range are always 'forbidden.'
815 @param eMechanism See the general discussion for set-methods.
817 @param eCharset See the general discussion for set-methods.
819 @return The text, encoded according to the given mechanism and
820 charset ('forbidden' characters replaced by escape sequences).
822 static OUString encode( std::u16string_view rText, Part ePart,
823 EncodeMechanism eMechanism,
824 rtl_TextEncoding eCharset
825 = RTL_TEXTENCODING_UTF8);
828 /** Decode some text.
830 @param rText Some (encoded) text.
832 @param eMechanism See the general discussion for get-methods.
834 @param eCharset See the general discussion for get-methods.
836 @return The text, decoded according to the given mechanism and
837 charset (escape sequences replaced by 'raw' characters).
839 static inline OUString decode(std::u16string_view rText,
840 DecodeMechanism eMechanism,
841 rtl_TextEncoding eCharset
842 = RTL_TEXTENCODING_UTF8);
844 static void appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4);
846 static void appendUCS4(OUStringBuffer & rTheText, sal_uInt32 nUCS4,
847 EscapeType eEscapeType, Part ePart,
848 rtl_TextEncoding eCharset, bool bKeepVisibleEscapes);
850 static sal_uInt32 getUTF32(sal_Unicode const *& rBegin,
851 sal_Unicode const * pEnd,
852 EncodeMechanism eMechanism,
853 rtl_TextEncoding eCharset,
854 EscapeType & rEscapeType);
856 // Specialized helpers:
858 static sal_uInt32 scanDomain(sal_Unicode const *& rBegin,
859 sal_Unicode const * pEnd,
860 bool bEager = true);
862 // OBSOLETE Hierarchical Path:
864 OUString GetPartBeforeLastName() const;
866 /** Get the last segment in the path.
868 @param eMechanism See the general discussion for get-methods.
870 @param eCharset See the general discussion for get-methods.
872 @return For a hierarchical URL, the last segment (everything after
873 the last unencoded '/'). Note that this last segment may be empty. If
874 the URL is not hierarchical, an empty string is returned.
876 OUString GetLastName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
877 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
878 const;
880 /** Get the 'extension' of the last segment in the path.
882 @return For a hierarchical URL, everything after the first unencoded
883 '.' in the last segment of the path. Note that this 'extension' may
884 be empty. If the URL is not hierarchical, or if the last segment does
885 not contain an unencoded '.', an empty string is returned.
887 OUString GetFileExtension() const;
889 bool Append(std::u16string_view rTheSegment,
890 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
891 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
893 void CutLastName();
895 // OBSOLETE File URLs:
897 OUString PathToFileName() const;
899 OUString GetFull() const;
901 OUString GetPath() const;
903 void SetBase(std::u16string_view rTheBase);
905 OUString GetBase() const;
907 void SetExtension(std::u16string_view rTheExtension);
909 OUString CutExtension();
911 static bool IsCaseSensitive() { return true; }
913 void changeScheme(INetProtocol eTargetScheme);
915 // INetProtocol::Macro, INetProtocol::Uno, INetProtocol::Slot,
916 // vnd.sun.star.script, etc. All the types of URLs which shouldn't
917 // be accepted from an outside controlled source
918 bool IsExoticProtocol() const;
920 private:
921 // General Structure:
923 class SAL_DLLPRIVATE SubString
925 sal_Int32 m_nBegin;
926 sal_Int32 m_nLength;
928 public:
929 explicit SubString(sal_Int32 nTheBegin = -1,
930 sal_Int32 nTheLength = 0):
931 m_nBegin(nTheBegin), m_nLength(nTheLength) {}
933 bool isPresent() const { return m_nBegin != -1; }
935 bool isEmpty() const { return m_nLength == 0; }
937 sal_Int32 getBegin() const { return m_nBegin; }
939 sal_Int32 getLength() const { return m_nLength; }
941 sal_Int32 getEnd() const { return m_nBegin + m_nLength; }
943 sal_Int32 clear();
945 sal_Int32 set(OUStringBuffer & rString,
946 std::u16string_view rSubString,
947 sal_Int32 nTheBegin);
949 sal_Int32 set(OUString & rString,
950 std::u16string_view rSubString);
952 sal_Int32 set(OUStringBuffer & rString,
953 std::u16string_view rSubString);
955 inline void operator +=(sal_Int32 nDelta);
957 int compare(SubString const & rOther,
958 OUStringBuffer const & rThisString,
959 OUStringBuffer const & rOtherString) const;
962 OUStringBuffer m_aAbsURIRef;
963 SubString m_aScheme;
964 SubString m_aUser;
965 SubString m_aAuth;
966 SubString m_aHost;
967 SubString m_aPort;
968 SubString m_aPath;
969 SubString m_aQuery;
970 SubString m_aFragment;
971 INetProtocol m_eScheme;
972 INetProtocol m_eSmartScheme;
974 TOOLS_DLLPRIVATE void setInvalid();
976 bool setAbsURIRef(
977 std::u16string_view rTheAbsURIRef,
978 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart,
979 FSysStyle eStyle);
981 // Relative URLs:
983 bool convertRelToAbs(
984 OUString const & rTheRelURIRef,
985 INetURLObject & rTheAbsURIRef, bool & rWasAbsolute,
986 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
987 bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs,
988 FSysStyle eStyle) const;
990 bool convertAbsToRel(
991 OUString const & rTheAbsURIRef,
992 OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism,
993 DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset,
994 FSysStyle eStyle) const;
996 // External URLs:
998 static bool convertIntToExt(
999 std::u16string_view rTheIntURIRef,
1000 OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism,
1001 rtl_TextEncoding eCharset);
1003 static bool convertExtToInt(
1004 std::u16string_view rTheExtURIRef,
1005 OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism,
1006 rtl_TextEncoding eCharset);
1008 // Scheme:
1010 struct PrefixInfo;
1012 TOOLS_DLLPRIVATE static inline SchemeInfo const & getSchemeInfo(
1013 INetProtocol eTheScheme);
1015 TOOLS_DLLPRIVATE inline SchemeInfo const & getSchemeInfo() const;
1017 TOOLS_DLLPRIVATE static PrefixInfo const * getPrefix(
1018 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1020 // Authority:
1022 TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const;
1024 TOOLS_DLLPRIVATE SubString getAuthority() const;
1026 // User Info:
1028 bool setUser(
1029 std::u16string_view rTheUser,
1030 rtl_TextEncoding eCharset);
1032 bool clearPassword();
1034 bool setPassword(
1035 std::u16string_view rThePassword,
1036 rtl_TextEncoding eCharset);
1038 // Host and Port:
1040 TOOLS_DLLPRIVATE static bool parseHost(
1041 sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
1042 OUStringBuffer* pCanonic);
1044 TOOLS_DLLPRIVATE static bool parseHostOrNetBiosName(
1045 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1046 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1047 bool bNetBiosName, OUStringBuffer* pCanonic);
1049 bool setHost(
1050 std::u16string_view rTheHost,
1051 rtl_TextEncoding eCharset);
1053 // Path:
1055 TOOLS_DLLPRIVATE static bool parsePath(
1056 INetProtocol eScheme, sal_Unicode const ** pBegin,
1057 sal_Unicode const * pEnd, EncodeMechanism eMechanism,
1058 rtl_TextEncoding eCharset, bool bSkippedInitialSlash,
1059 sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter,
1060 sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter,
1061 OUStringBuffer &rSynPath);
1063 bool setPath(
1064 std::u16string_view rThePath,
1065 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1067 // Hierarchical Path:
1069 TOOLS_DLLPRIVATE bool checkHierarchical() const;
1071 TOOLS_DLLPRIVATE SubString getSegment(
1072 sal_Int32 nIndex, bool bIgnoreFinalSlash) const;
1074 // Query:
1076 void clearQuery();
1078 bool setQuery(
1079 std::u16string_view rTheQuery,
1080 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1082 // Fragment:
1084 bool clearFragment();
1086 bool setFragment(
1087 std::u16string_view rTheMark,
1088 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1090 // FILE URLs:
1092 TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const;
1094 // Coding:
1096 TOOLS_DLLPRIVATE static inline void appendEscape(
1097 OUStringBuffer & rTheText, sal_uInt32 nOctet);
1099 static void encodeText(
1100 OUStringBuffer& rOutputBuffer,
1101 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1102 Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1103 bool bKeepVisibleEscapes);
1105 static inline void encodeText(
1106 OUStringBuffer& rOutputBuffer,
1107 std::u16string_view rTheText, Part ePart,
1108 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1109 bool bKeepVisibleEscapes);
1111 static OUString decode(
1112 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1113 DecodeMechanism, rtl_TextEncoding eCharset);
1115 inline OUString decode(
1116 SubString const & rSubString,
1117 DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const;
1119 // Specialized helpers:
1121 TOOLS_DLLPRIVATE static bool scanIPv6reference(
1122 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1125 // static
1126 inline void INetURLObject::encodeText( OUStringBuffer& rOutputBuffer,
1127 std::u16string_view rTheText,
1128 Part ePart,
1129 EncodeMechanism eMechanism,
1130 rtl_TextEncoding eCharset,
1131 bool bKeepVisibleEscapes)
1133 encodeText(rOutputBuffer,
1134 rTheText.data(),
1135 rTheText.data() + rTheText.size(), ePart,
1136 eMechanism, eCharset, bKeepVisibleEscapes);
1139 inline OUString INetURLObject::decode(SubString const & rSubString,
1140 DecodeMechanism eMechanism,
1141 rtl_TextEncoding eCharset) const
1143 return rSubString.isPresent() ?
1144 decode(m_aAbsURIRef.getStr() + rSubString.getBegin(),
1145 m_aAbsURIRef.getStr() + rSubString.getEnd(),
1146 eMechanism, eCharset) :
1147 OUString();
1150 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef,
1151 EncodeMechanism eMechanism,
1152 rtl_TextEncoding eCharset):
1153 m_aAbsURIRef(rTheAbsURIRef.size() * 2), m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1155 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1156 FSysStyle(0));
1159 inline bool INetURLObject::SetURL(std::u16string_view rTheAbsURIRef,
1160 EncodeMechanism eMechanism,
1161 rtl_TextEncoding eCharset)
1163 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1164 FSysStyle(0));
1167 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef,
1168 INetProtocol eTheSmartScheme,
1169 EncodeMechanism eMechanism,
1170 rtl_TextEncoding eCharset,
1171 FSysStyle eStyle):
1172 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(eTheSmartScheme)
1174 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, eStyle);
1177 inline bool INetURLObject::SetSmartURL(std::u16string_view rTheAbsURIRef,
1178 EncodeMechanism eMechanism,
1179 rtl_TextEncoding eCharset,
1180 FSysStyle eStyle)
1182 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true,
1183 eStyle);
1186 inline INetURLObject
1187 INetURLObject::smartRel2Abs(OUString const & rTheRelURIRef,
1188 bool & rWasAbsolute,
1189 bool bIgnoreFragment,
1190 EncodeMechanism eMechanism,
1191 rtl_TextEncoding eCharset,
1192 bool bRelativeNonURIs,
1193 FSysStyle eStyle) const
1195 INetURLObject aTheAbsURIRef;
1196 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, rWasAbsolute,
1197 eMechanism, eCharset, bIgnoreFragment, true,
1198 bRelativeNonURIs, eStyle);
1199 return aTheAbsURIRef;
1202 inline bool INetURLObject::GetNewAbsURL(OUString const & rTheRelURIRef,
1203 INetURLObject * pTheAbsURIRef)
1204 const
1206 INetURLObject aTheAbsURIRef;
1207 bool bWasAbsolute;
1208 if (!convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute,
1209 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false/*bIgnoreFragment*/, false, false,
1210 FSysStyle::Detect))
1211 return false;
1212 if (pTheAbsURIRef)
1213 *pTheAbsURIRef = std::move(aTheAbsURIRef);
1214 return true;
1217 // static
1218 inline OUString INetURLObject::GetRelURL(std::u16string_view rTheBaseURIRef,
1219 OUString const & rTheAbsURIRef,
1220 EncodeMechanism eEncodeMechanism,
1221 DecodeMechanism eDecodeMechanism,
1222 rtl_TextEncoding eCharset,
1223 FSysStyle eStyle)
1225 OUString aTheRelURIRef;
1226 INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
1227 convertAbsToRel(rTheAbsURIRef, aTheRelURIRef, eEncodeMechanism,
1228 eDecodeMechanism, eCharset, eStyle);
1229 return aTheRelURIRef;
1232 // static
1233 inline bool INetURLObject::translateToExternal(std::u16string_view
1234 rTheIntURIRef,
1235 OUString & rTheExtURIRef,
1236 DecodeMechanism
1237 eDecodeMechanism,
1238 rtl_TextEncoding eCharset)
1240 return convertIntToExt(rTheIntURIRef, rTheExtURIRef,
1241 eDecodeMechanism, eCharset);
1244 // static
1245 inline bool INetURLObject::translateToInternal(std::u16string_view
1246 rTheExtURIRef,
1247 OUString & rTheIntURIRef,
1248 DecodeMechanism
1249 eDecodeMechanism,
1250 rtl_TextEncoding eCharset)
1252 return convertExtToInt(rTheExtURIRef, rTheIntURIRef,
1253 eDecodeMechanism, eCharset);
1256 inline bool INetURLObject::SetPass(std::u16string_view rThePassword)
1258 return rThePassword.empty() ?
1259 clearPassword() :
1260 setPassword(rThePassword, RTL_TEXTENCODING_UTF8);
1263 inline bool INetURLObject::SetParam(std::u16string_view rTheQuery,
1264 EncodeMechanism eMechanism,
1265 rtl_TextEncoding eCharset)
1267 if (rTheQuery.empty())
1269 clearQuery();
1270 return false;
1272 return setQuery(rTheQuery, eMechanism, eCharset);
1275 inline bool INetURLObject::SetMark(std::u16string_view rTheFragment,
1276 EncodeMechanism eMechanism,
1277 rtl_TextEncoding eCharset)
1279 return rTheFragment.empty() ?
1280 clearFragment() :
1281 setFragment(rTheFragment, eMechanism, eCharset);
1284 // static
1285 inline OUString INetURLObject::encode(std::u16string_view rText, Part ePart,
1286 EncodeMechanism eMechanism,
1287 rtl_TextEncoding eCharset)
1289 OUStringBuffer aBuf;
1290 encodeText(aBuf, rText, ePart, eMechanism, eCharset, false);
1291 return aBuf.makeStringAndClear();
1294 // static
1295 inline OUString INetURLObject::decode(std::u16string_view rText,
1296 DecodeMechanism eMechanism,
1297 rtl_TextEncoding eCharset)
1299 return decode(rText.data(), rText.data() + rText.size(),
1300 eMechanism, eCharset);
1303 #endif
1305 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */