Bump version to 6.4-15
[LibreOffice.git] / include / tools / urlobj.hxx
blob522dcca35fcdce3b75ee690063ea493f886b28b7
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <rtl/ustrbuf.hxx>
24 #include <rtl/textenc.h>
25 #include <sal/types.h>
26 #include <o3tl/typed_flags_set.hxx>
28 #include <memory>
29 #include <string_view>
31 class SvMemoryStream;
33 namespace com { namespace sun { namespace star { namespace util {
34 class XStringWidth;
35 } } } }
37 namespace com { namespace sun { namespace star { namespace uno { template <typename > class Reference; } } } }
39 // Common URL prefixes for various schemes:
40 #define INET_FTP_SCHEME "ftp://"
41 #define INET_HTTP_SCHEME "http://"
42 #define INET_HTTPS_SCHEME "https://"
43 #define INET_FILE_SCHEME "file://"
44 #define INET_MAILTO_SCHEME "mailto:"
45 #define INET_HID_SCHEME "hid:"
47 #define URL_PREFIX_PRIV_SOFFICE "private:"
49 // Schemes:
50 enum class INetProtocol
52 NotValid,
53 Ftp,
54 Http,
55 File,
56 Mailto,
57 VndSunStarWebdav,
58 PrivSoffice,
59 VndSunStarHelp,
60 Https,
61 Slot,
62 Macro,
63 Javascript,
64 Data,
65 Cid,
66 VndSunStarHier,
67 Uno,
68 Component,
69 VndSunStarPkg,
70 Ldap,
71 Db,
72 VndSunStarCmd,
73 Telnet,
74 VndSunStarExpand,
75 VndSunStarTdoc,
76 Generic,
77 Smb,
78 Hid,
79 Sftp,
80 Cmis,
81 LAST = Cmis
84 /** The supported notations for file system paths.
86 enum class FSysStyle
88 /** VOS notation (e.g., "//server/dir/file").
90 Vos = 0x1,
92 /** Unix notation (e.g., "/dir/file").
94 Unix = 0x2,
96 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
98 Dos = 0x4,
100 /** Detect the used notation.
102 @descr For the following descriptions, please note that
103 whereas FSYS_DEFAULT includes all style bits, combinations of only
104 a few style bits are also possible, and are also described.
106 @descr When used to translate a file system path to a file URL,
107 the subset of the following productions for which the appropriate
108 style bit is set are checked in order (using the conventions of
109 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
110 character):
112 Production T1 (VOS local; FSysStyle::Vos only):
113 "//." ["/" *UCS4]
114 becomes
115 "file:///" *UCS4
117 Production T2 (VOS host; FSysStyle::Vos only):
118 "//" [host] ["/" *UCS4]
119 becomes
120 "file://" host "/" *UCS4
122 Production T3 (UNC; FSysStyle::Dos only):
123 "\\" [host] ["\" *UCS4]
124 becomes
125 "file://" host "/" *UCS4
126 replacing "\" by "/" within <*UCS4>
128 Production T4 (Unix-like DOS; FSysStyle::Dos only):
129 ALPHA ":" ["/" *UCS4]
130 becomes
131 "file:///" ALPHA ":/" *UCS4
132 replacing "\" by "/" within <*UCS4>
134 Production T5 (DOS; FSysStyle::Dos only):
135 ALPHA ":" ["\" *UCS4]
136 becomes
137 "file:///" ALPHA ":/" *UCS4
138 replacing "\" by "/" within <*UCS4>
140 Production T6 (any):
141 *UCS4
142 becomes
143 "file:///" *UCS4
144 replacing the delimiter by "/" within <*UCS4>. The delimiter is
145 that character from the set { "/", "\" } which appears most
146 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
147 is removed from the set; if FSysStyle::Dos is not among the style
148 bits, "\" is removed from the set). If two or more
149 characters appear the same number of times, the character
150 mentioned first in that set is chosen. If the first character
151 of <*UCS4> is the delimiter, that character is not copied.
153 @descr When used to translate a file URL to a file system path,
154 the following productions are checked in order (using the
155 conventions of RFC 2234, RFC 2396, and RFC 2732):
157 Production F1 (VOS; FSysStyle::Vos):
158 "file://" host "/" fpath ["#" fragment]
159 becomes
160 "//" host "/" fpath
162 Production F2 (DOS; FSysStyle::Dos):
163 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
164 becomes
165 ALPHA ":" ["\" fpath]
166 replacing "/" by "\" in <fpath>
168 Production F3 (Unix; FSysStyle::Unix):
169 "file:///" fpath ["#" fragment]
170 becomes
171 "/" fpath
173 Detect = Vos | Unix | Dos
175 namespace o3tl {
176 template<> struct typed_flags<FSysStyle> : is_typed_flags<FSysStyle, 0x07> {};
179 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
181 public:
182 // Get- and Set-Methods:
184 /** The way input strings that represent (parts of) URIs are interpreted
185 in set-methods.
187 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
188 sequences of escape sequences, representing the UTF-8 coded characters.
190 @descr Along with an EncodeMechanism parameter, the set-methods all
191 take an rtl_TextEncoding parameter, which is ignored unless the
192 EncodeMechanism is EncodeMechanism::WasEncoded.
194 enum class EncodeMechanism
196 /** All escape sequences that are already present are ignored, and are
197 interpreted as literal sequences of three characters.
199 All,
201 /** Sequences of escape sequences, that represent characters from the
202 specified character set and that can be converted to UTF-32
203 characters, are first decoded. If they have to be encoded, they
204 are converted to UTF-8 characters and are than translated into
205 (sequences of) escape sequences. Other escape sequences are
206 copied verbatim (but using upper case hex digits).
208 WasEncoded,
210 /** All escape sequences that are already present are copied verbatim
211 (but using upper case hex digits).
213 NotCanonical
216 /** The way strings that represent (parts of) URIs are returned from get-
217 methods.
219 @descr Along with a DecodeMechanism parameter, the get-methods all
220 take an rtl_TextEncoding parameter, which is ignored unless the
221 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
223 enum class DecodeMechanism
225 /** The (part of the) URI is returned unchanged. Since URIs are
226 written using a subset of US-ASCII, the returned string is
227 guaranteed to contain only US-ASCII characters.
229 NONE,
231 /** All sequences of escape sequences that represent UTF-8 coded
232 UTF-32 characters with a numerical value greater than 0x7F, are
233 replaced by the respective UTF-16 characters. All other escape
234 sequences are not decoded.
236 ToIUri,
238 /** All (sequences of) escape sequences that represent characters from
239 the specified character set, and that can be converted to UTF-32,
240 are replaced by the respective UTF-16 characters. All other
241 escape sequences are not decoded.
243 WithCharset,
245 /** All (sequences of) escape sequences that represent characters from
246 the specified character set, that can be converted to UTF-32, and
247 that (in the case of ASCII characters) can safely be decoded
248 without altering the meaning of the (part of the) URI, are
249 replaced by the respective UTF-16 characters. All other escape
250 sequences are not decoded.
252 Unambiguous
255 // General Structure:
257 INetURLObject():
258 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) {}
260 bool HasError() const { return m_eScheme == INetProtocol::NotValid; }
262 OUString GetMainURL(DecodeMechanism eMechanism,
263 rtl_TextEncoding eCharset
264 = RTL_TEXTENCODING_UTF8) const
265 { return decode(m_aAbsURIRef, eMechanism, eCharset); }
267 OUString GetURLNoPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
268 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
269 const;
271 OUString GetURLNoMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
272 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
273 const;
275 OUString
276 getAbbreviated(css::uno::Reference< css::util::XStringWidth > const & rStringWidth,
277 sal_Int32 nWidth,
278 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
279 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
280 const;
282 bool operator ==(INetURLObject const & rObject) const;
284 bool operator !=(INetURLObject const & rObject) const
285 { return !(*this == rObject); }
287 // Strict Parsing:
289 inline explicit INetURLObject(
290 OUString const & rTheAbsURIRef,
291 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
292 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
294 inline bool SetURL(OUString const & rTheAbsURIRef,
295 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
296 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
298 bool ConcatData(INetProtocol eTheScheme, OUString const & rTheUser,
299 OUString const & rThePassword,
300 OUString const & rTheHost, sal_uInt32 nThePort,
301 OUString const & rThePath);
303 // Smart Parsing:
305 inline INetURLObject(OUString const & rTheAbsURIRef,
306 INetProtocol eTheSmartScheme,
307 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
308 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
309 FSysStyle eStyle = FSysStyle::Detect);
311 void SetSmartProtocol(INetProtocol eTheSmartScheme)
312 { m_eSmartScheme = eTheSmartScheme; }
314 inline bool
315 SetSmartURL(OUString const & rTheAbsURIRef,
316 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
317 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
318 FSysStyle eStyle = FSysStyle::Detect);
320 inline INetURLObject
321 smartRel2Abs(OUString const & rTheRelURIRef,
322 bool & rWasAbsolute,
323 bool bIgnoreFragment = false,
324 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
325 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
326 bool bRelativeNonURIs = false,
327 FSysStyle eStyle = FSysStyle::Detect) const;
329 // Relative URLs:
331 inline bool
332 GetNewAbsURL(OUString const & rTheRelURIRef,
333 INetURLObject * pTheAbsURIRef)
334 const;
336 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
337 (because of syntactic reasons), either rTheRelURIRef or an empty
338 string is returned: If all of the parameters eEncodeMechanism,
339 eDecodeMechanism and eCharset have their respective default values,
340 then rTheRelURIRef is returned unmodified; otherwise, an empty string
341 is returned.
343 static OUString
344 GetAbsURL(OUString const & rTheBaseURIRef,
345 OUString const & rTheRelURIRef,
346 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
347 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
348 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
350 static inline OUString
351 GetRelURL(OUString const & rTheBaseURIRef,
352 OUString const & rTheAbsURIRef,
353 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
354 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
355 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
356 FSysStyle eStyle = FSysStyle::Detect);
358 // External URLs:
360 OUString getExternalURL() const;
362 static inline bool translateToExternal(OUString const & rTheIntURIRef,
363 OUString & rTheExtURIRef,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8);
369 static inline bool translateToInternal(OUString const & rTheExtURIRef,
370 OUString & rTheIntURIRef,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8);
376 // Scheme:
378 struct SchemeInfo;
380 INetProtocol GetProtocol() const { return m_eScheme; }
382 bool isSchemeEqualTo(INetProtocol scheme) const { return scheme == m_eScheme; }
384 bool isSchemeEqualTo(std::u16string_view scheme) const;
386 /** Check if the scheme is one of the WebDAV scheme
387 * we know about.
389 * @return true is one other scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static OUString GetScheme(INetProtocol eTheScheme);
401 /** Return the human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static OUString GetSchemeName(INetProtocol eTheScheme);
409 static INetProtocol CompareProtocolScheme(OUString const &
410 rTheAbsURIRef);
412 // User Info:
414 bool HasUserData() const { return m_aUser.isPresent(); }
416 OUString GetUser(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
417 rtl_TextEncoding eCharset
418 = RTL_TEXTENCODING_UTF8) const
419 { return decode(m_aUser, eMechanism, eCharset); }
421 OUString GetPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
422 rtl_TextEncoding eCharset
423 = RTL_TEXTENCODING_UTF8) const
424 { return decode(m_aAuth, eMechanism, eCharset); }
426 bool SetUser(OUString const & rTheUser)
427 { return setUser(rTheUser, RTL_TEXTENCODING_UTF8); }
429 inline bool SetPass(OUString const & rThePassword);
431 inline bool SetUserAndPass(OUString const & rTheUser,
432 OUString const & rThePassword);
434 // Host and Port:
436 bool HasPort() const { return m_aPort.isPresent(); }
438 OUString GetHost(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
439 rtl_TextEncoding eCharset
440 = RTL_TEXTENCODING_UTF8) const
441 { return decode(m_aHost, eMechanism, eCharset); }
443 OUString GetHostPort(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
444 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) const;
446 sal_uInt32 GetPort() const;
448 bool SetHost(OUString const & rTheHost)
449 { return setHost(rTheHost, RTL_TEXTENCODING_UTF8); }
451 bool SetPort(sal_uInt32 nThePort);
453 // Path:
455 bool HasURLPath() const { return !m_aPath.isEmpty(); }
457 OUString GetURLPath(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
458 rtl_TextEncoding eCharset
459 = RTL_TEXTENCODING_UTF8) const
460 { return decode(m_aPath, eMechanism, eCharset); }
462 bool SetURLPath(OUString const & rThePath,
463 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
464 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
465 { return setPath(rThePath, eMechanism, eCharset); }
467 // Hierarchical Path:
469 /** A constant to address the last segment in various methods dealing with
470 hierarchical paths.
472 @descr It is often more efficient to address the last segment using
473 this constant, than to determine its ordinal value using
474 getSegmentCount().
476 enum { LAST_SEGMENT = -1 };
478 /** The number of segments in the hierarchical path.
480 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
481 form
483 hierarchical-path = 1*("/" segment)
485 segment = name *(";" param)
487 name = [base ["." extension]]
489 base = 1*pchar
491 extension = *<any pchar except ".">
493 param = *pchar
495 @param bIgnoreFinalSlash If true, a final slash at the end of the
496 hierarchical path does not denote an empty segment, but is ignored.
498 @return The number of segments in the hierarchical path. If the path
499 is not hierarchical, 0 is returned.
501 sal_Int32 getSegmentCount(bool bIgnoreFinalSlash = true) const;
503 /** Remove a segment from the hierarchical path.
505 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
506 if addressing the last segment.
508 @param bIgnoreFinalSlash If true, a final slash at the end of the
509 hierarchical path does not denote an empty segment, but is ignored.
511 @return True if the segment has successfully been removed (and the
512 resulting URI is still valid). If the path is not hierarchical, or
513 the specified segment does not exist, false is returned. If false is
514 returned, the object is not modified.
516 bool removeSegment(sal_Int32 nIndex = LAST_SEGMENT,
517 bool bIgnoreFinalSlash = true);
519 /** Insert a new segment into the hierarchical path.
520 A final slash at the end of the
521 hierarchical path does not denote an empty segment, but is ignored.
523 @param rTheName The name part of the new segment. The new segment
524 will contain no parameters.
526 @param bAppendFinalSlash If the new segment is appended at the end of
527 the hierarchical path, this parameter specifies whether to add a final
528 slash after it or not.
530 @param nIndex The non-negative index of the segment before which
531 to insert the new segment. LAST_SEGMENT or an nIndex that equals
532 getSegmentCount() inserts the new segment at the end of the
533 hierarchical path.
535 @param eMechanism See the general discussion for set-methods.
537 @param eCharset See the general discussion for set-methods.
539 @return True if the segment has successfully been inserted (and the
540 resulting URI is still valid). If the path is not hierarchical, or
541 the specified place to insert the new segment does not exist, false is
542 returned. If false is returned, the object is not modified.
544 bool insertName(OUString const & rTheName,
545 bool bAppendFinalSlash = false,
546 sal_Int32 nIndex = LAST_SEGMENT,
547 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
548 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
550 /** Get the name of a segment of the hierarchical path.
552 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
553 if addressing the last segment.
555 @param bIgnoreFinalSlash If true, a final slash at the end of the
556 hierarchical path does not denote an empty segment, but is ignored.
558 @param eMechanism See the general discussion for get-methods.
560 @param eCharset See the general discussion for get-methods.
562 @return The name part of the specified segment. If the path is not
563 hierarchical, or the specified segment does not exits, an empty string
564 is returned.
566 OUString getName(sal_Int32 nIndex = LAST_SEGMENT,
567 bool bIgnoreFinalSlash = true,
568 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
569 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
570 const;
572 /** Set the name of the last segment (preserving any parameters and any query or
573 fragment part).
575 @param rTheName The new name.
577 @param eMechanism See the general discussion for get-methods.
579 @param eCharset See the general discussion for get-methods.
581 @return True if the name has successfully been modified (and the
582 resulting URI is still valid). If the path is not hierarchical, or
583 a last segment does not exist, false is returned. If false is
584 returned, the object is not modified.
586 bool setName(OUString const& rTheName,
587 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
588 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
590 /** Get the base of the name of a segment.
592 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
593 if addressing the last segment.
595 @param bIgnoreFinalSlash If true, a final slash at the end of the
596 hierarchical path does not denote an empty segment, but is ignored.
598 @param eMechanism See the general discussion for get-methods.
600 @param eCharset See the general discussion for get-methods.
602 @return The base part of the specified segment. If the path is
603 not hierarchical, or the specified segment does not exits, an empty
604 string is returned.
606 OUString getBase(sal_Int32 nIndex = LAST_SEGMENT,
607 bool bIgnoreFinalSlash = true,
608 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
609 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
610 const;
612 /** Set the base of the name of a segment (preserving the extension).
613 A final slash at the end of the
614 hierarchical path does not denote an empty segment, but is ignored.
616 @param rTheBase The new base.
618 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
619 if addressing the last segment.
621 @param eMechanism See the general discussion for set-methods.
623 @param eCharset See the general discussion for set-methods.
625 @return True if the base has successfully been modified (and the
626 resulting URI is still valid). If the path is not hierarchical, or
627 the specified segment does not exist, false is returned. If false is
628 returned, the object is not modified.
630 bool setBase(OUString const & rTheBase,
631 sal_Int32 nIndex = LAST_SEGMENT,
632 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
633 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
635 /** Determine whether the name of the last segment has an extension.
637 @return True if the name of the specified segment has an extension.
638 If the path is not hierarchical, or the specified segment does not
639 exist, false is returned.
641 bool hasExtension() const;
643 /** Get the extension of the name of a segment.
645 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
646 if addressing the last segment.
648 @param bIgnoreFinalSlash If true, a final slash at the end of the
649 hierarchical path does not denote an empty segment, but is ignored.
651 @param eMechanism See the general discussion for get-methods.
653 @param eCharset See the general discussion for get-methods.
655 @return The extension part of the specified segment. If the path is
656 not hierarchical, or the specified segment does not exits, an empty
657 string is returned.
659 OUString getExtension(sal_Int32 nIndex = LAST_SEGMENT,
660 bool bIgnoreFinalSlash = true,
661 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
662 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
663 const;
665 /** Set the extension of the name of a segment (replacing an already
666 existing extension).
668 @param rTheExtension The new extension.
670 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
671 if addressing the last segment.
673 @param bIgnoreFinalSlash If true, a final slash at the end of the
674 hierarchical path does not denote an empty segment, but is ignored.
676 @param eCharset See the general discussion for set-methods.
678 @return True if the extension has successfully been modified (and the
679 resulting URI is still valid). If the path is not hierarchical, or
680 the specified segment does not exist, false is returned. If false is
681 returned, the object is not modified.
683 bool setExtension(OUString const & rTheExtension,
684 sal_Int32 nIndex = LAST_SEGMENT,
685 bool bIgnoreFinalSlash = true,
686 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
688 /** Remove the extension of the name of a segment.
690 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
691 if addressing the last segment.
693 @param bIgnoreFinalSlash If true, a final slash at the end of the
694 hierarchical path does not denote an empty segment, but is ignored.
696 @return True if the extension has successfully been removed (and the
697 resulting URI is still valid), or if the name did not have an
698 extension. If the path is not hierarchical, or the specified segment
699 does not exist, false is returned. If false is returned, the object
700 is not modified.
702 bool removeExtension(sal_Int32 nIndex = LAST_SEGMENT,
703 bool bIgnoreFinalSlash = true);
705 /** Determine whether the hierarchical path ends in a final slash.
707 @return True if the hierarchical path ends in a final slash. If the
708 path is not hierarchical, false is returned.
710 bool hasFinalSlash() const;
712 /** Make the hierarchical path end in a final slash (if it does not
713 already do so).
715 @return True if a final slash has successfully been appended (and the
716 resulting URI is still valid), or if the hierarchical path already
717 ended in a final slash. If the path is not hierarchical, false is
718 returned. If false is returned, the object is not modified.
720 bool setFinalSlash();
722 /** Remove a final slash from the hierarchical path.
724 @return True if a final slash has successfully been removed (and the
725 resulting URI is still valid), or if the hierarchical path already did
726 not end in a final slash. If the path is not hierarchical, false is
727 returned. If false is returned, the object is not modified.
729 bool removeFinalSlash();
731 // Query:
733 bool HasParam() const { return m_aQuery.isPresent(); }
735 OUString GetParam(rtl_TextEncoding eCharset
736 = RTL_TEXTENCODING_UTF8) const
737 { return decode(m_aQuery, DecodeMechanism::NONE, eCharset); }
739 inline bool SetParam(OUString const & rTheQuery,
740 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
741 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
743 // Fragment:
745 bool HasMark() const { return m_aFragment.isPresent(); }
747 OUString GetMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
748 rtl_TextEncoding eCharset
749 = RTL_TEXTENCODING_UTF8) const
750 { return decode(m_aFragment, eMechanism, eCharset); }
752 inline bool SetMark(OUString const & rTheFragment,
753 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
754 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
756 // File URLs:
758 /** Return the file system path represented by a file URL (ignoring any
759 fragment part).
761 @param eStyle The notation of the returned file system path.
763 @param pDelimiter Upon successful return, this parameter can return
764 the character that is the 'main' delimiter within the returned file
765 system path (e.g., "/" for Unix, "\" for DOS). This is
766 especially useful for routines that later try to shorten the returned
767 file system path at a 'good' position, e.g. to fit it into some
768 limited display space.
770 @return The file system path represented by this file URL. If this
771 file URL does not represent a file system path according to the
772 specified notation, or if this is not a file URL at all, an empty
773 string is returned.
775 OUString getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter = nullptr)
776 const;
778 // Data URLs:
779 std::unique_ptr<SvMemoryStream> getData() const;
781 // Coding:
783 enum Part
785 PART_USER_PASSWORD = 0x00001,
786 PART_FPATH = 0x00008,
787 PART_AUTHORITY = 0x00010,
788 PART_REL_SEGMENT_EXTRA = 0x00020,
789 PART_URIC = 0x00040,
790 PART_HTTP_PATH = 0x00080,
791 PART_MESSAGE_ID_PATH = 0x00100,
792 PART_MAILTO = 0x00200,
793 PART_PATH_BEFORE_QUERY = 0x00400,
794 PART_PCHAR = 0x00800,
795 PART_VISIBLE = 0x01000,
796 PART_VISIBLE_NONSPECIAL = 0x02000,
797 PART_UNO_PARAM_VALUE = 0x04000,
798 PART_UNAMBIGUOUS = 0x08000,
799 PART_URIC_NO_SLASH = 0x10000,
800 PART_HTTP_QUERY = 0x20000, //TODO! unused?
803 enum class EscapeType
805 NONE,
806 Octet,
807 Utf32
810 /** Encode some text as part of a URI.
812 @param rText Some text (for its interpretation, see the general
813 discussion for set-methods).
815 @param ePart The part says which characters are 'forbidden' and must
816 be encoded (replaced by escape sequences). Characters outside the US-
817 ASCII range are always 'forbidden.'
819 @param eMechanism See the general discussion for set-methods.
821 @param eCharset See the general discussion for set-methods.
823 @return The text, encoded according to the given mechanism and
824 charset ('forbidden' characters replaced by escape sequences).
826 static inline OUString encode(OUString const & rText, Part ePart,
827 EncodeMechanism eMechanism,
828 rtl_TextEncoding eCharset
829 = RTL_TEXTENCODING_UTF8);
831 /** Decode some text.
833 @param rText Some (encoded) text.
835 @param eMechanism See the general discussion for get-methods.
837 @param eCharset See the general discussion for get-methods.
839 @return The text, decoded according to the given mechanism and
840 charset (escape sequences replaced by 'raw' characters).
842 static inline OUString decode(OUString const & rText,
843 DecodeMechanism eMechanism,
844 rtl_TextEncoding eCharset
845 = RTL_TEXTENCODING_UTF8);
847 static inline OUString decode(OUStringBuffer const & rText,
848 DecodeMechanism eMechanism,
849 rtl_TextEncoding eCharset
850 = RTL_TEXTENCODING_UTF8);
852 static void appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4);
854 static void appendUCS4(OUStringBuffer & rTheText, sal_uInt32 nUCS4,
855 EscapeType eEscapeType, Part ePart,
856 rtl_TextEncoding eCharset, bool bKeepVisibleEscapes);
858 static sal_uInt32 getUTF32(sal_Unicode const *& rBegin,
859 sal_Unicode const * pEnd,
860 EncodeMechanism eMechanism,
861 rtl_TextEncoding eCharset,
862 EscapeType & rEscapeType);
864 // Specialized helpers:
866 static sal_uInt32 scanDomain(sal_Unicode const *& rBegin,
867 sal_Unicode const * pEnd,
868 bool bEager = true);
870 // OBSOLETE Hierarchical Path:
872 OUString GetPartBeforeLastName() const;
874 /** Get the last segment in the path.
876 @param eMechanism See the general discussion for get-methods.
878 @param eCharset See the general discussion for get-methods.
880 @return For a hierarchical URL, the last segment (everything after
881 the last unencoded '/'). Note that this last segment may be empty. If
882 the URL is not hierarchical, an empty string is returned.
884 OUString GetLastName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
885 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
886 const;
888 /** Get the 'extension' of the last segment in the path.
890 @return For a hierarchical URL, everything after the first unencoded
891 '.' in the last segment of the path. Note that this 'extension' may
892 be empty. If the URL is not hierarchical, or if the last segment does
893 not contain an unencoded '.', an empty string is returned.
895 OUString GetFileExtension() const;
897 bool Append(OUString const & rTheSegment,
898 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
899 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
901 void CutLastName();
903 // OBSOLETE File URLs:
905 OUString PathToFileName() const;
907 OUString GetFull() const;
909 OUString GetPath() const;
911 void SetBase(OUString const & rTheBase);
913 OUString GetBase() const;
915 void SetExtension(OUString const & rTheExtension);
917 OUString CutExtension();
919 static bool IsCaseSensitive() { return true; }
921 void changeScheme(INetProtocol eTargetScheme);
923 private:
924 // General Structure:
926 class SubString
928 sal_Int32 m_nBegin;
929 sal_Int32 m_nLength;
931 public:
932 explicit SubString(sal_Int32 nTheBegin = -1,
933 sal_Int32 nTheLength = 0):
934 m_nBegin(nTheBegin), m_nLength(nTheLength) {}
936 bool isPresent() const { return m_nBegin != -1; }
938 bool isEmpty() const { return m_nLength == 0; }
940 sal_Int32 getBegin() const { return m_nBegin; }
942 sal_Int32 getLength() const { return m_nLength; }
944 sal_Int32 getEnd() const { return m_nBegin + m_nLength; }
946 inline sal_Int32 clear();
948 inline sal_Int32 set(OUStringBuffer & rString,
949 OUString const & rSubString,
950 sal_Int32 nTheBegin);
952 inline sal_Int32 set(OUString & rString,
953 OUString const & rSubString);
955 inline sal_Int32 set(OUStringBuffer & rString,
956 OUString const & rSubString);
958 inline void operator +=(sal_Int32 nDelta);
960 int compare(SubString const & rOther,
961 OUStringBuffer const & rThisString,
962 OUStringBuffer const & rOtherString) const;
965 OUStringBuffer m_aAbsURIRef;
966 SubString m_aScheme;
967 SubString m_aUser;
968 SubString m_aAuth;
969 SubString m_aHost;
970 SubString m_aPort;
971 SubString m_aPath;
972 SubString m_aQuery;
973 SubString m_aFragment;
974 INetProtocol m_eScheme;
975 INetProtocol m_eSmartScheme;
977 TOOLS_DLLPRIVATE void setInvalid();
979 bool setAbsURIRef(
980 OUString const & rTheAbsURIRef,
981 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart,
982 FSysStyle eStyle);
984 // Relative URLs:
986 bool convertRelToAbs(
987 OUString const & rTheRelURIRef,
988 INetURLObject & rTheAbsURIRef, bool & rWasAbsolute,
989 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
990 bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs,
991 FSysStyle eStyle) const;
993 bool convertAbsToRel(
994 OUString const & rTheAbsURIRef,
995 OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism,
996 DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset,
997 FSysStyle eStyle) const;
999 // External URLs:
1001 static bool convertIntToExt(
1002 OUString const & rTheIntURIRef,
1003 OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism,
1004 rtl_TextEncoding eCharset);
1006 static bool convertExtToInt(
1007 OUString const & rTheExtURIRef,
1008 OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism,
1009 rtl_TextEncoding eCharset);
1011 // Scheme:
1013 struct PrefixInfo;
1015 TOOLS_DLLPRIVATE static inline SchemeInfo const & getSchemeInfo(
1016 INetProtocol eTheScheme);
1018 TOOLS_DLLPRIVATE inline SchemeInfo const & getSchemeInfo() const;
1020 TOOLS_DLLPRIVATE static PrefixInfo const * getPrefix(
1021 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1023 // Authority:
1025 TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const;
1027 TOOLS_DLLPRIVATE SubString getAuthority() const;
1029 // User Info:
1031 bool setUser(
1032 OUString const & rTheUser,
1033 rtl_TextEncoding eCharset);
1035 bool clearPassword();
1037 bool setPassword(
1038 OUString const & rThePassword,
1039 rtl_TextEncoding eCharset);
1041 // Host and Port:
1043 TOOLS_DLLPRIVATE static bool parseHost(
1044 sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
1045 OUString & rCanonic);
1047 TOOLS_DLLPRIVATE static bool parseHostOrNetBiosName(
1048 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1049 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1050 bool bNetBiosName, OUStringBuffer* pCanonic);
1052 bool setHost(
1053 OUString const & rTheHost,
1054 rtl_TextEncoding eCharset);
1056 // Path:
1058 TOOLS_DLLPRIVATE static bool parsePath(
1059 INetProtocol eScheme, sal_Unicode const ** pBegin,
1060 sal_Unicode const * pEnd, EncodeMechanism eMechanism,
1061 rtl_TextEncoding eCharset, bool bSkippedInitialSlash,
1062 sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter,
1063 sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter,
1064 OUStringBuffer &rSynPath);
1066 bool setPath(
1067 OUString const & rThePath,
1068 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1070 // Hierarchical Path:
1072 TOOLS_DLLPRIVATE bool checkHierarchical() const;
1074 TOOLS_DLLPRIVATE SubString getSegment(
1075 sal_Int32 nIndex, bool bIgnoreFinalSlash) const;
1077 // Query:
1079 void clearQuery();
1081 bool setQuery(
1082 OUString const & rTheQuery,
1083 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1085 // Fragment:
1087 bool clearFragment();
1089 bool setFragment(
1090 OUString const & rTheMark,
1091 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1093 // FILE URLs:
1095 TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const;
1097 // Coding:
1099 TOOLS_DLLPRIVATE static inline void appendEscape(
1100 OUStringBuffer & rTheText, sal_uInt32 nOctet);
1102 static OUString encodeText(
1103 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1104 Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1105 bool bKeepVisibleEscapes);
1107 static inline OUString encodeText(
1108 OUString const & rTheText, Part ePart,
1109 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1110 bool bKeepVisibleEscapes);
1112 static OUString decode(
1113 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1114 DecodeMechanism, rtl_TextEncoding eCharset);
1116 inline OUString decode(
1117 SubString const & rSubString,
1118 DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const;
1120 // Specialized helpers:
1122 TOOLS_DLLPRIVATE static bool scanIPv6reference(
1123 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1126 // static
1127 inline OUString INetURLObject::encodeText(OUString const & rTheText,
1128 Part ePart,
1129 EncodeMechanism eMechanism,
1130 rtl_TextEncoding eCharset,
1131 bool bKeepVisibleEscapes)
1133 return encodeText(rTheText.getStr(),
1134 rTheText.getStr() + rTheText.getLength(), ePart,
1135 eMechanism, eCharset, bKeepVisibleEscapes);
1138 inline OUString INetURLObject::decode(SubString const & rSubString,
1139 DecodeMechanism eMechanism,
1140 rtl_TextEncoding eCharset) const
1142 return rSubString.isPresent() ?
1143 decode(m_aAbsURIRef.getStr() + rSubString.getBegin(),
1144 m_aAbsURIRef.getStr() + rSubString.getEnd(),
1145 eMechanism, eCharset) :
1146 OUString();
1149 inline INetURLObject::INetURLObject(OUString const & rTheAbsURIRef,
1150 EncodeMechanism eMechanism,
1151 rtl_TextEncoding eCharset):
1152 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1154 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1155 FSysStyle(0));
1158 inline bool INetURLObject::SetURL(OUString const & rTheAbsURIRef,
1159 EncodeMechanism eMechanism,
1160 rtl_TextEncoding eCharset)
1162 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1163 FSysStyle(0));
1166 inline INetURLObject::INetURLObject(OUString const & rTheAbsURIRef,
1167 INetProtocol eTheSmartScheme,
1168 EncodeMechanism eMechanism,
1169 rtl_TextEncoding eCharset,
1170 FSysStyle eStyle):
1171 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(eTheSmartScheme)
1173 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, eStyle);
1176 inline bool INetURLObject::SetSmartURL(OUString const & rTheAbsURIRef,
1177 EncodeMechanism eMechanism,
1178 rtl_TextEncoding eCharset,
1179 FSysStyle eStyle)
1181 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true,
1182 eStyle);
1185 inline INetURLObject
1186 INetURLObject::smartRel2Abs(OUString const & rTheRelURIRef,
1187 bool & rWasAbsolute,
1188 bool bIgnoreFragment,
1189 EncodeMechanism eMechanism,
1190 rtl_TextEncoding eCharset,
1191 bool bRelativeNonURIs,
1192 FSysStyle eStyle) const
1194 INetURLObject aTheAbsURIRef;
1195 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, rWasAbsolute,
1196 eMechanism, eCharset, bIgnoreFragment, true,
1197 bRelativeNonURIs, eStyle);
1198 return aTheAbsURIRef;
1201 inline bool INetURLObject::GetNewAbsURL(OUString const & rTheRelURIRef,
1202 INetURLObject * pTheAbsURIRef)
1203 const
1205 INetURLObject aTheAbsURIRef;
1206 bool bWasAbsolute;
1207 if (!convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute,
1208 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false/*bIgnoreFragment*/, false, false,
1209 FSysStyle::Detect))
1210 return false;
1211 if (pTheAbsURIRef)
1212 *pTheAbsURIRef = aTheAbsURIRef;
1213 return true;
1216 // static
1217 inline OUString INetURLObject::GetRelURL(OUString const & rTheBaseURIRef,
1218 OUString const & rTheAbsURIRef,
1219 EncodeMechanism eEncodeMechanism,
1220 DecodeMechanism eDecodeMechanism,
1221 rtl_TextEncoding eCharset,
1222 FSysStyle eStyle)
1224 OUString aTheRelURIRef;
1225 INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
1226 convertAbsToRel(rTheAbsURIRef, aTheRelURIRef, eEncodeMechanism,
1227 eDecodeMechanism, eCharset, eStyle);
1228 return aTheRelURIRef;
1231 // static
1232 inline bool INetURLObject::translateToExternal(OUString const &
1233 rTheIntURIRef,
1234 OUString & rTheExtURIRef,
1235 DecodeMechanism
1236 eDecodeMechanism,
1237 rtl_TextEncoding eCharset)
1239 return convertIntToExt(rTheIntURIRef, rTheExtURIRef,
1240 eDecodeMechanism, eCharset);
1243 // static
1244 inline bool INetURLObject::translateToInternal(OUString const &
1245 rTheExtURIRef,
1246 OUString & rTheIntURIRef,
1247 DecodeMechanism
1248 eDecodeMechanism,
1249 rtl_TextEncoding eCharset)
1251 return convertExtToInt(rTheExtURIRef, rTheIntURIRef,
1252 eDecodeMechanism, eCharset);
1255 inline bool INetURLObject::SetPass(OUString const & rThePassword)
1257 return rThePassword.isEmpty() ?
1258 clearPassword() :
1259 setPassword(rThePassword, RTL_TEXTENCODING_UTF8);
1262 inline bool INetURLObject::SetUserAndPass(OUString const & rTheUser,
1263 OUString const & rThePassword)
1265 return setUser(rTheUser, RTL_TEXTENCODING_UTF8)
1266 && (rThePassword.isEmpty() ?
1267 clearPassword() :
1268 setPassword(rThePassword, RTL_TEXTENCODING_UTF8));
1271 inline bool INetURLObject::SetParam(OUString const & rTheQuery,
1272 EncodeMechanism eMechanism,
1273 rtl_TextEncoding eCharset)
1275 if (rTheQuery.isEmpty())
1277 clearQuery();
1278 return false;
1280 return setQuery(rTheQuery, eMechanism, eCharset);
1283 inline bool INetURLObject::SetMark(OUString const & rTheFragment,
1284 EncodeMechanism eMechanism,
1285 rtl_TextEncoding eCharset)
1287 return rTheFragment.isEmpty() ?
1288 clearFragment() :
1289 setFragment(rTheFragment, eMechanism, eCharset);
1292 // static
1293 inline OUString INetURLObject::encode(OUString const & rText, Part ePart,
1294 EncodeMechanism eMechanism,
1295 rtl_TextEncoding eCharset)
1297 return encodeText(rText, ePart, eMechanism, eCharset, false);
1300 // static
1301 inline OUString INetURLObject::decode(OUString const & rText,
1302 DecodeMechanism eMechanism,
1303 rtl_TextEncoding eCharset)
1305 return decode(rText.getStr(), rText.getStr() + rText.getLength(),
1306 eMechanism, eCharset);
1309 inline OUString INetURLObject::decode(OUStringBuffer const & rText,
1310 DecodeMechanism eMechanism,
1311 rtl_TextEncoding eCharset)
1313 return decode(rText.getStr(), rText.getStr() + rText.getLength(),
1314 eMechanism, eCharset);
1317 #endif
1319 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */