Version 5.4.3.2, tag libreoffice-5.4.3.2
[LibreOffice.git] / include / tools / urlobj.hxx
blobcdbede709e29353110a363f15510c8038ba5ee84
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <com/sun/star/uno/Reference.hxx>
24 #include <rtl/string.h>
25 #include <rtl/ustrbuf.hxx>
26 #include <rtl/textenc.h>
27 #include <sal/types.h>
28 #include <o3tl/typed_flags_set.hxx>
30 #include <memory>
32 class SvMemoryStream;
34 namespace com { namespace sun { namespace star { namespace util {
35 class XStringWidth;
36 } } } }
38 // Common URL prefixes for various schemes:
39 #define INET_FTP_SCHEME "ftp://"
40 #define INET_HTTP_SCHEME "http://"
41 #define INET_HTTPS_SCHEME "https://"
42 #define INET_FILE_SCHEME "file://"
43 #define INET_MAILTO_SCHEME "mailto:"
44 #define INET_HID_SCHEME "hid:"
46 #define URL_PREFIX_PRIV_SOFFICE "private:"
48 // Schemes:
49 enum class INetProtocol
51 NotValid,
52 Ftp,
53 Http,
54 File,
55 Mailto,
56 VndSunStarWebdav,
57 PrivSoffice,
58 VndSunStarHelp,
59 Https,
60 Slot,
61 Macro,
62 Javascript,
63 Data,
64 Cid,
65 VndSunStarHier,
66 Uno,
67 Component,
68 VndSunStarPkg,
69 Ldap,
70 Db,
71 VndSunStarCmd,
72 Telnet,
73 VndSunStarExpand,
74 VndSunStarTdoc,
75 Generic,
76 Smb,
77 Hid,
78 Sftp,
79 Cmis,
80 LAST = Cmis
83 /** The supported notations for file system paths.
85 enum class FSysStyle
87 /** VOS notation (e.g., "//server/dir/file").
89 Vos = 0x1,
91 /** Unix notation (e.g., "/dir/file").
93 Unix = 0x2,
95 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
97 Dos = 0x4,
99 /** Detect the used notation.
101 @descr For the following descriptions, please note that
102 whereas FSYS_DEFAULT includes all style bits, combinations of only
103 a few style bits are also possible, and are also described.
105 @descr When used to translate a file system path to a file URL,
106 the subset of the following productions for which the appropriate
107 style bit is set are checked in order (using the conventions of
108 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
109 character):
111 Production T1 (VOS local; FSysStyle::Vos only):
112 "//." ["/" *UCS4]
113 becomes
114 "file:///" *UCS4
116 Production T2 (VOS host; FSysStyle::Vos only):
117 "//" [host] ["/" *UCS4]
118 becomes
119 "file://" host "/" *UCS4
121 Production T3 (UNC; FSysStyle::Dos only):
122 "\\" [host] ["\" *UCS4]
123 becomes
124 "file://" host "/" *UCS4
125 replacing "\" by "/" within <*UCS4>
127 Production T4 (Unix-like DOS; FSysStyle::Dos only):
128 ALPHA ":" ["/" *UCS4]
129 becomes
130 "file:///" ALPHA ":/" *UCS4
131 replacing "\" by "/" within <*UCS4>
133 Production T5 (DOS; FSysStyle::Dos only):
134 ALPHA ":" ["\" *UCS4]
135 becomes
136 "file:///" ALPHA ":/" *UCS4
137 replacing "\" by "/" within <*UCS4>
139 Production T6 (any):
140 *UCS4
141 becomes
142 "file:///" *UCS4
143 replacing the delimiter by "/" within <*UCS4>. The delimiter is
144 that character from the set { "/", "\" } which appears most
145 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
146 is removed from the set; if FSysStyle::Dos is not among the style
147 bits, "\" is removed from the set). If two or more
148 characters appear the same number of times, the character
149 mentioned first in that set is chosen. If the first character
150 of <*UCS4> is the delimiter, that character is not copied.
152 @descr When used to translate a file URL to a file system path,
153 the following productions are checked in order (using the
154 conventions of RFC 2234, RFC 2396, and RFC 2732):
156 Production F1 (VOS; FSysStyle::Vos):
157 "file://" host "/" fpath ["#" fragment]
158 becomes
159 "//" host "/" fpath
161 Production F2 (DOS; FSysStyle::Dos):
162 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
163 becomes
164 ALPHA ":" ["\" fpath]
165 replacing "/" by "\" in <fpath>
167 Production F3 (Unix; FSysStyle::Unix):
168 "file:///" fpath ["#" fragment]
169 becomes
170 "/" fpath
172 Detect = Vos | Unix | Dos
174 namespace o3tl {
175 template<> struct typed_flags<FSysStyle> : is_typed_flags<FSysStyle, 0x07> {};
178 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
180 public:
181 // Get- and Set-Methods:
183 /** The way input strings that represent (parts of) URIs are interpreted
184 in set-methods.
186 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
187 sequences of escape sequences, representing the UTF-8 coded characters.
189 @descr Along with an EncodeMechanism parameter, the set-methods all
190 take an rtl_TextEncoding parameter, which is ignored unless the
191 EncodeMechanism is EncodeMechanism::WasEncoded.
193 enum class EncodeMechanism
195 /** All escape sequences that are already present are ignored, and are
196 interpreted as literal sequences of three characters.
198 All,
200 /** Sequences of escape sequences, that represent characters from the
201 specified character set and that can be converted to UTF-32
202 characters, are first decoded. If they have to be encoded, they
203 are converted to UTF-8 characters and are than translated into
204 (sequences of) escape sequences. Other escape sequences are
205 copied verbatim (but using upper case hex digits).
207 WasEncoded,
209 /** All escape sequences that are already present are copied verbatim
210 (but using upper case hex digits).
212 NotCanonical
215 /** The way strings that represent (parts of) URIs are returned from get-
216 methods.
218 @descr Along with a DecodeMechanism parameter, the get-methods all
219 take an rtl_TextEncoding parameter, which is ignored unless the
220 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
222 enum class DecodeMechanism
224 /** The (part of the) URI is returned unchanged. Since URIs are
225 written using a subset of US-ASCII, the returned string is
226 guaranteed to contain only US-ASCII characters.
228 NONE,
230 /** All sequences of escape sequences that represent UTF-8 coded
231 UTF-32 characters with a numerical value greater than 0x7F, are
232 replaced by the respective UTF-16 characters. All other escape
233 sequences are not decoded.
235 ToIUri,
237 /** All (sequences of) escape sequences that represent characters from
238 the specified character set, and that can be converted to UTF-32,
239 are replaced by the respective UTF-16 characters. All other
240 escape sequences are not decoded.
242 WithCharset,
244 /** All (sequences of) escape sequences that represent characters from
245 the specified character set, that can be converted to UTF-32, and
246 that (in the case of ASCII characters) can safely be decoded
247 without altering the meaning of the (part of the) URI, are
248 replaced by the respective UTF-16 characters. All other escape
249 sequences are not decoded.
251 Unambiguous
254 // General Structure:
256 INetURLObject():
257 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) {}
259 bool HasError() const { return m_eScheme == INetProtocol::NotValid; }
261 OUString GetMainURL(DecodeMechanism eMechanism,
262 rtl_TextEncoding eCharset
263 = RTL_TEXTENCODING_UTF8) const
264 { return decode(m_aAbsURIRef, eMechanism, eCharset); }
266 OUString GetURLNoPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
267 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
268 const;
270 OUString GetURLNoMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
271 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
272 const;
274 OUString
275 getAbbreviated(css::uno::Reference< css::util::XStringWidth > const & rStringWidth,
276 sal_Int32 nWidth,
277 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
278 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
279 const;
281 bool operator ==(INetURLObject const & rObject) const;
283 bool operator !=(INetURLObject const & rObject) const
284 { return !(*this == rObject); }
286 // Strict Parsing:
288 inline explicit INetURLObject(
289 OUString const & rTheAbsURIRef,
290 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
291 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
293 inline bool SetURL(OUString const & rTheAbsURIRef,
294 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
295 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
297 bool ConcatData(INetProtocol eTheScheme, OUString const & rTheUser,
298 OUString const & rThePassword,
299 OUString const & rTheHost, sal_uInt32 nThePort,
300 OUString const & rThePath);
302 // Smart Parsing:
304 inline INetURLObject(OUString const & rTheAbsURIRef,
305 INetProtocol eTheSmartScheme,
306 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
307 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
308 FSysStyle eStyle = FSysStyle::Detect);
310 void SetSmartProtocol(INetProtocol eTheSmartScheme)
311 { m_eSmartScheme = eTheSmartScheme; }
313 inline bool
314 SetSmartURL(OUString const & rTheAbsURIRef,
315 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
316 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
317 FSysStyle eStyle = FSysStyle::Detect);
319 inline INetURLObject
320 smartRel2Abs(OUString const & rTheRelURIRef,
321 bool & rWasAbsolute,
322 bool bIgnoreFragment = false,
323 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
324 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
325 bool bRelativeNonURIs = false,
326 FSysStyle eStyle = FSysStyle::Detect) const;
328 // Relative URLs:
330 inline bool
331 GetNewAbsURL(OUString const & rTheRelURIRef,
332 INetURLObject * pTheAbsURIRef)
333 const;
335 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
336 (because of syntactic reasons), either rTheRelURIRef or an empty
337 string is returned: If all of the parameters eEncodeMechanism,
338 eDecodeMechanism and eCharset have their respective default values,
339 then rTheRelURIRef is returned unmodified; otherwise, an empty string
340 is returned.
342 static OUString
343 GetAbsURL(OUString const & rTheBaseURIRef,
344 OUString const & rTheRelURIRef,
345 bool bIgnoreFragment = false,
346 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
347 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
348 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
350 static inline OUString
351 GetRelURL(OUString const & rTheBaseURIRef,
352 OUString const & rTheAbsURIRef,
353 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
354 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
355 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
356 FSysStyle eStyle = FSysStyle::Detect);
358 // External URLs:
360 OUString getExternalURL() const;
362 static inline bool translateToExternal(OUString const & rTheIntURIRef,
363 OUString & rTheExtURIRef,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8);
369 static inline bool translateToInternal(OUString const & rTheExtURIRef,
370 OUString & rTheIntURIRef,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8);
376 // Scheme:
378 struct SchemeInfo;
380 INetProtocol GetProtocol() const { return m_eScheme; }
382 bool isSchemeEqualTo(INetProtocol scheme) const { return scheme == m_eScheme; }
384 bool isSchemeEqualTo(OUString const & scheme) const;
386 /** Check if the scheme is one of the WebDAV scheme
387 * we know about.
389 * @return true is one othe scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static OUString GetScheme(INetProtocol eTheScheme);
401 /** Return the a human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static OUString GetSchemeName(INetProtocol eTheScheme);
409 static INetProtocol CompareProtocolScheme(OUString const &
410 rTheAbsURIRef);
412 // User Info:
414 bool HasUserData() const { return m_aUser.isPresent(); }
416 OUString GetUser(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
417 rtl_TextEncoding eCharset
418 = RTL_TEXTENCODING_UTF8) const
419 { return decode(m_aUser, eMechanism, eCharset); }
421 OUString GetPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
422 rtl_TextEncoding eCharset
423 = RTL_TEXTENCODING_UTF8) const
424 { return decode(m_aAuth, eMechanism, eCharset); }
426 bool SetUser(OUString const & rTheUser)
427 { return setUser(rTheUser, RTL_TEXTENCODING_UTF8); }
429 inline bool SetPass(OUString const & rThePassword);
431 inline bool SetUserAndPass(OUString const & rTheUser,
432 OUString const & rThePassword);
434 // Host and Port:
436 bool HasPort() const { return m_aPort.isPresent(); }
438 OUString GetHost(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
439 rtl_TextEncoding eCharset
440 = RTL_TEXTENCODING_UTF8) const
441 { return decode(m_aHost, eMechanism, eCharset); }
443 OUString GetHostPort(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
444 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
446 sal_uInt32 GetPort() const;
448 bool SetHost(OUString const & rTheHost)
449 { return setHost(rTheHost, RTL_TEXTENCODING_UTF8); }
451 bool SetPort(sal_uInt32 nThePort);
453 // Path:
455 bool HasURLPath() const { return !m_aPath.isEmpty(); }
457 OUString GetURLPath(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
458 rtl_TextEncoding eCharset
459 = RTL_TEXTENCODING_UTF8) const
460 { return decode(m_aPath, eMechanism, eCharset); }
462 bool SetURLPath(OUString const & rThePath,
463 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
464 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
465 { return setPath(rThePath, eMechanism, eCharset); }
467 // Hierarchical Path:
469 /** A constant to address the last segment in various methods dealing with
470 hierarchical paths.
472 @descr It is often more efficient to address the last segment using
473 this constant, than to determine its ordinal value using
474 getSegmentCount().
476 enum { LAST_SEGMENT = -1 };
478 /** The number of segments in the hierarchical path.
480 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
481 form
483 hierarchical-path = 1*("/" segment)
485 segment = name *(";" param)
487 name = [base ["." extension]]
489 base = 1*pchar
491 extension = *<any pchar except ".">
493 param = *pchar
495 @param bIgnoreFinalSlash If true, a final slash at the end of the
496 hierarchical path does not denote an empty segment, but is ignored.
498 @return The number of segments in the hierarchical path. If the path
499 is not hierarchical, 0 is returned.
501 sal_Int32 getSegmentCount(bool bIgnoreFinalSlash = true) const;
503 /** Remove a segment from the hierarchical path.
505 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
506 if addressing the last segment.
508 @param bIgnoreFinalSlash If true, a final slash at the end of the
509 hierarchical path does not denote an empty segment, but is ignored.
511 @return True if the segment has successfully been removed (and the
512 resulting URI is still valid). If the path is not hierarchical, or
513 the specified segment does not exist, false is returned. If false is
514 returned, the object is not modified.
516 bool removeSegment(sal_Int32 nIndex = LAST_SEGMENT,
517 bool bIgnoreFinalSlash = true);
519 /** Insert a new segment into the hierarchical path.
520 A final slash at the end of the
521 hierarchical path does not denote an empty segment, but is ignored.
523 @param rTheName The name part of the new segment. The new segment
524 will contain no parameters.
526 @param bAppendFinalSlash If the new segment is appended at the end of
527 the hierarchical path, this parameter specifies whether to add a final
528 slash after it or not.
530 @param nIndex The non-negative index of the segment before which
531 to insert the new segment. LAST_SEGMENT or an nIndex that equals
532 getSegmentCount() inserts the new segment at the end of the
533 hierarchical path.
535 @param eMechanism See the general discussion for set-methods.
537 @param eCharset See the general discussion for set-methods.
539 @return True if the segment has successfully been inserted (and the
540 resulting URI is still valid). If the path is not hierarchical, or
541 the specified place to insert the new segment does not exist, false is
542 returned. If false is returned, the object is not modified.
544 bool insertName(OUString const & rTheName,
545 bool bAppendFinalSlash = false,
546 sal_Int32 nIndex = LAST_SEGMENT,
547 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
548 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
550 /** Get the name of a segment of the hierarchical path.
552 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
553 if addressing the last segment.
555 @param bIgnoreFinalSlash If true, a final slash at the end of the
556 hierarchical path does not denote an empty segment, but is ignored.
558 @param eMechanism See the general discussion for get-methods.
560 @param eCharset See the general discussion for get-methods.
562 @return The name part of the specified segment. If the path is not
563 hierarchical, or the specified segment does not exits, an empty string
564 is returned.
566 OUString getName(sal_Int32 nIndex = LAST_SEGMENT,
567 bool bIgnoreFinalSlash = true,
568 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
569 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
570 const;
572 /** Set the name of a segment (preserving any parameters and any query or
573 fragment part).
575 @param rTheName The new name.
577 @return True if the name has successfully been modified (and the
578 resulting URI is still valid). If the path is not hierarchical, or
579 the specified segment does not exist, false is returned. If false is
580 returned, the object is not modified.
582 bool setName(OUString const & rTheName);
584 /** Get the base of the name of a segment.
586 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
587 if addressing the last segment.
589 @param bIgnoreFinalSlash If true, a final slash at the end of the
590 hierarchical path does not denote an empty segment, but is ignored.
592 @param eMechanism See the general discussion for get-methods.
594 @param eCharset See the general discussion for get-methods.
596 @return The base part of the specified segment. If the path is
597 not hierarchical, or the specified segment does not exits, an empty
598 string is returned.
600 OUString getBase(sal_Int32 nIndex = LAST_SEGMENT,
601 bool bIgnoreFinalSlash = true,
602 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
603 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
604 const;
606 /** Set the base of the name of a segment (preserving the extension).
607 A final slash at the end of the
608 hierarchical path does not denote an empty segment, but is ignored.
610 @param rTheBase The new base.
612 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
613 if addressing the last segment.
615 @param eMechanism See the general discussion for set-methods.
617 @param eCharset See the general discussion for set-methods.
619 @return True if the base has successfully been modified (and the
620 resulting URI is still valid). If the path is not hierarchical, or
621 the specified segment does not exist, false is returned. If false is
622 returned, the object is not modified.
624 bool setBase(OUString const & rTheBase,
625 sal_Int32 nIndex = LAST_SEGMENT,
626 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
627 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
629 /** Determine whether the name of the last segment has an extension.
631 @return True if the name of the specified segment has an extension.
632 If the path is not hierarchical, or the specified segment does not
633 exist, false is returned.
635 bool hasExtension() const;
637 /** Get the extension of the name of a segment.
639 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
640 if addressing the last segment.
642 @param bIgnoreFinalSlash If true, a final slash at the end of the
643 hierarchical path does not denote an empty segment, but is ignored.
645 @param eMechanism See the general discussion for get-methods.
647 @param eCharset See the general discussion for get-methods.
649 @return The extension part of the specified segment. If the path is
650 not hierarchical, or the specified segment does not exits, an empty
651 string is returned.
653 OUString getExtension(sal_Int32 nIndex = LAST_SEGMENT,
654 bool bIgnoreFinalSlash = true,
655 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
656 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
657 const;
659 /** Set the extension of the name of a segment (replacing an already
660 existing extension).
662 @param rTheExtension The new extension.
664 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
665 if addressing the last segment.
667 @param bIgnoreFinalSlash If true, a final slash at the end of the
668 hierarchical path does not denote an empty segment, but is ignored.
670 @param eCharset See the general discussion for set-methods.
672 @return True if the extension has successfully been modified (and the
673 resulting URI is still valid). If the path is not hierarchical, or
674 the specified segment does not exist, false is returned. If false is
675 returned, the object is not modified.
677 bool setExtension(OUString const & rTheExtension,
678 sal_Int32 nIndex = LAST_SEGMENT,
679 bool bIgnoreFinalSlash = true,
680 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
682 /** Remove the extension of the name of a segment.
684 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
685 if addressing the last segment.
687 @param bIgnoreFinalSlash If true, a final slash at the end of the
688 hierarchical path does not denote an empty segment, but is ignored.
690 @return True if the extension has successfully been removed (and the
691 resulting URI is still valid), or if the name did not have an
692 extension. If the path is not hierarchical, or the specified segment
693 does not exist, false is returned. If false is returned, the object
694 is not modified.
696 bool removeExtension(sal_Int32 nIndex = LAST_SEGMENT,
697 bool bIgnoreFinalSlash = true);
699 /** Determine whether the hierarchical path ends in a final slash.
701 @return True if the hierarchical path ends in a final slash. If the
702 path is not hierarchical, false is returned.
704 bool hasFinalSlash() const;
706 /** Make the hierarchical path end in a final slash (if it does not
707 already do so).
709 @return True if a final slash has successfully been appended (and the
710 resulting URI is still valid), or if the hierarchical path already
711 ended in a final slash. If the path is not hierarchical, false is
712 returned. If false is returned, the object is not modified.
714 bool setFinalSlash();
716 /** Remove a final slash from the hierarchical path.
718 @return True if a final slash has successfully been removed (and the
719 resulting URI is still valid), or if the hierarchical path already did
720 not end in a final slash. If the path is not hierarchical, false is
721 returned. If false is returned, the object is not modified.
723 bool removeFinalSlash();
725 // Query:
727 bool HasParam() const { return m_aQuery.isPresent(); }
729 OUString GetParam(rtl_TextEncoding eCharset
730 = RTL_TEXTENCODING_UTF8) const
731 { return decode(m_aQuery, DecodeMechanism::NONE, eCharset); }
733 inline bool SetParam(OUString const & rTheQuery,
734 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
735 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
737 // Fragment:
739 bool HasMark() const { return m_aFragment.isPresent(); }
741 OUString GetMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
742 rtl_TextEncoding eCharset
743 = RTL_TEXTENCODING_UTF8) const
744 { return decode(m_aFragment, eMechanism, eCharset); }
746 inline bool SetMark(OUString const & rTheFragment,
747 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
748 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
750 // File URLs:
752 /** Create an INetURLObject from a file system path.
754 @param rFSysPath A file system path. An URL is not allowed here!
756 @param eStyle The notation of rFSysPath.
758 inline INetURLObject(OUString const & rFSysPath, FSysStyle eStyle);
760 /** Set this INetURLObject to a file URL constructed from a file system
761 path.
763 @param rFSysPath A file system path. An URL is not allowed here!
765 @param eStyle The notation of rFSysPath.
767 @return True if this INetURLObject has successfully been changed. If
768 false is returned, this INetURLObject has not been modified.
770 bool setFSysPath(OUString const & rFSysPath, FSysStyle eStyle);
772 /** Return the file system path represented by a file URL (ignoring any
773 fragment part).
775 @param eStyle The notation of the returned file system path.
777 @param pDelimiter Upon successful return, this parameter can return
778 the character that is the 'main' delimiter within the returned file
779 system path (e.g., "/" for Unix, "\" for DOS). This is
780 especially useful for routines that later try to shorten the returned
781 file system path at a 'good' position, e.g. to fit it into some
782 limited display space.
784 @return The file system path represented by this file URL. If this
785 file URL does not represent a file system path according to the
786 specified notation, or if this is not a file URL at all, an empty
787 string is returned.
789 OUString getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter = nullptr)
790 const;
792 // Data URLs:
793 std::unique_ptr<SvMemoryStream> getData();
795 // Coding:
797 enum Part
799 PART_USER_PASSWORD = 0x00001,
800 PART_FPATH = 0x00008,
801 PART_AUTHORITY = 0x00010,
802 PART_REL_SEGMENT_EXTRA = 0x00020,
803 PART_URIC = 0x00040,
804 PART_HTTP_PATH = 0x00080,
805 PART_MESSAGE_ID_PATH = 0x00100,
806 PART_MAILTO = 0x00200,
807 PART_PATH_BEFORE_QUERY = 0x00400,
808 PART_PCHAR = 0x00800,
809 PART_VISIBLE = 0x01000,
810 PART_VISIBLE_NONSPECIAL = 0x02000,
811 PART_UNO_PARAM_VALUE = 0x04000,
812 PART_UNAMBIGUOUS = 0x08000,
813 PART_URIC_NO_SLASH = 0x10000,
814 PART_HTTP_QUERY = 0x20000, //TODO! unused?
817 enum class EscapeType
819 NONE,
820 Octet,
821 Utf32
824 /** Encode some text as part of a URI.
826 @param rText Some text (for its interpretation, see the general
827 discussion for set-methods).
829 @param ePart The part says which characters are 'forbidden' and must
830 be encoded (replaced by escape sequences). Characters outside the US-
831 ASCII range are always 'forbidden.'
833 @param eMechanism See the general discussion for set-methods.
835 @param eCharset See the general discussion for set-methods.
837 @return The text, encoded according to the given mechanism and
838 charset ('forbidden' characters replaced by escape sequences).
840 static inline OUString encode(OUString const & rText, Part ePart,
841 EncodeMechanism eMechanism,
842 rtl_TextEncoding eCharset
843 = RTL_TEXTENCODING_UTF8);
845 /** Decode some text.
847 @param rText Some (encoded) text.
849 @param eMechanism See the general discussion for get-methods.
851 @param eCharset See the general discussion for get-methods.
853 @return The text, decoded according to the given mechanism and
854 charset (escape sequences replaced by 'raw' characters).
856 static inline OUString decode(OUString const & rText,
857 DecodeMechanism eMechanism,
858 rtl_TextEncoding eCharset
859 = RTL_TEXTENCODING_UTF8);
861 static inline OUString decode(OUStringBuffer const & rText,
862 DecodeMechanism eMechanism,
863 rtl_TextEncoding eCharset
864 = RTL_TEXTENCODING_UTF8);
866 static void appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4);
868 static void appendUCS4(OUStringBuffer & rTheText, sal_uInt32 nUCS4,
869 EscapeType eEscapeType, bool bOctets, Part ePart,
870 rtl_TextEncoding eCharset, bool bKeepVisibleEscapes);
872 static sal_uInt32 getUTF32(sal_Unicode const *& rBegin,
873 sal_Unicode const * pEnd, bool bOctets,
874 EncodeMechanism eMechanism,
875 rtl_TextEncoding eCharset,
876 EscapeType & rEscapeType);
878 // Specialized helpers:
880 static sal_uInt32 scanDomain(sal_Unicode const *& rBegin,
881 sal_Unicode const * pEnd,
882 bool bEager = true);
884 // OBSOLETE Hierarchical Path:
886 OUString GetPartBeforeLastName() const;
888 /** Get the last segment in the path.
890 @param eMechanism See the general discussion for get-methods.
892 @param eCharset See the general discussion for get-methods.
894 @return For a hierarchical URL, the last segment (everything after
895 the last unencoded '/'). Note that this last segment may be empty. If
896 the URL is not hierarchical, an empty string is returned.
898 OUString GetLastName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
899 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
900 const;
902 /** Get the 'extension' of the last segment in the path.
904 @return For a hierarchical URL, everything after the first unencoded
905 '.' in the last segment of the path. Note that this 'extension' may
906 be empty. If the URL is not hierarchical, or if the last segment does
907 not contain an unencoded '.', an empty string is returned.
909 OUString GetFileExtension() const;
911 bool Append(OUString const & rTheSegment,
912 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
913 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
915 void CutLastName();
917 // OBSOLETE File URLs:
919 OUString PathToFileName() const;
921 OUString GetFull() const;
923 OUString GetPath() const;
925 void SetBase(OUString const & rTheBase);
927 OUString GetBase() const;
929 void SetName(OUString const & rTheName,
930 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
931 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
933 OUString GetName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
934 rtl_TextEncoding eCharset
935 = RTL_TEXTENCODING_UTF8) const
936 { return GetLastName(eMechanism, eCharset); }
938 void SetExtension(OUString const & rTheExtension);
940 OUString GetExtension() const
941 { return GetFileExtension(); }
943 OUString CutExtension();
945 static bool IsCaseSensitive() { return true; }
948 private:
949 // General Structure:
951 class SubString
953 sal_Int32 m_nBegin;
954 sal_Int32 m_nLength;
956 public:
957 explicit SubString(sal_Int32 nTheBegin = -1,
958 sal_Int32 nTheLength = 0):
959 m_nBegin(nTheBegin), m_nLength(nTheLength) {}
961 bool isPresent() const { return m_nBegin != -1; }
963 bool isEmpty() const { return m_nLength == 0; }
965 sal_Int32 getBegin() const { return m_nBegin; }
967 sal_Int32 getLength() const { return m_nLength; }
969 sal_Int32 getEnd() const { return m_nBegin + m_nLength; }
971 inline sal_Int32 clear();
973 inline sal_Int32 set(OUStringBuffer & rString,
974 OUString const & rSubString,
975 sal_Int32 nTheBegin);
977 inline sal_Int32 set(OUString & rString,
978 OUString const & rSubString);
980 inline sal_Int32 set(OUStringBuffer & rString,
981 OUString const & rSubString);
983 inline void operator +=(sal_Int32 nDelta);
985 int compare(SubString const & rOther,
986 OUStringBuffer const & rThisString,
987 OUStringBuffer const & rOtherString) const;
990 OUStringBuffer m_aAbsURIRef;
991 SubString m_aScheme;
992 SubString m_aUser;
993 SubString m_aAuth;
994 SubString m_aHost;
995 SubString m_aPort;
996 SubString m_aPath;
997 SubString m_aQuery;
998 SubString m_aFragment;
999 INetProtocol m_eScheme;
1000 INetProtocol m_eSmartScheme;
1002 TOOLS_DLLPRIVATE void setInvalid();
1004 bool setAbsURIRef(
1005 OUString const & rTheAbsURIRef,
1006 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart,
1007 FSysStyle eStyle);
1009 // Relative URLs:
1011 bool convertRelToAbs(
1012 OUString const & rTheRelURIRef,
1013 INetURLObject & rTheAbsURIRef, bool & rWasAbsolute,
1014 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1015 bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs,
1016 FSysStyle eStyle) const;
1018 bool convertAbsToRel(
1019 OUString const & rTheAbsURIRef,
1020 OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism,
1021 DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset,
1022 FSysStyle eStyle) const;
1024 // External URLs:
1026 static bool convertIntToExt(
1027 OUString const & rTheIntURIRef, bool bOctets,
1028 OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism,
1029 rtl_TextEncoding eCharset);
1031 static bool convertExtToInt(
1032 OUString const & rTheExtURIRef, bool bOctets,
1033 OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism,
1034 rtl_TextEncoding eCharset);
1036 // Scheme:
1038 struct PrefixInfo;
1040 TOOLS_DLLPRIVATE static inline SchemeInfo const & getSchemeInfo(
1041 INetProtocol eTheScheme);
1043 TOOLS_DLLPRIVATE inline SchemeInfo const & getSchemeInfo() const;
1045 TOOLS_DLLPRIVATE static PrefixInfo const * getPrefix(
1046 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1048 // Authority:
1050 TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const;
1052 TOOLS_DLLPRIVATE SubString getAuthority() const;
1054 // User Info:
1056 bool setUser(
1057 OUString const & rTheUser,
1058 rtl_TextEncoding eCharset);
1060 bool clearPassword();
1062 bool setPassword(
1063 OUString const & rThePassword,
1064 rtl_TextEncoding eCharset);
1066 // Host and Port:
1068 TOOLS_DLLPRIVATE static bool parseHost(
1069 sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
1070 OUString & rCanonic);
1072 TOOLS_DLLPRIVATE static bool parseHostOrNetBiosName(
1073 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
1074 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1075 bool bNetBiosName, OUStringBuffer* pCanonic);
1077 bool setHost(
1078 OUString const & rTheHost,
1079 rtl_TextEncoding eCharset);
1081 // Path:
1083 TOOLS_DLLPRIVATE static bool parsePath(
1084 INetProtocol eScheme, sal_Unicode const ** pBegin,
1085 sal_Unicode const * pEnd, bool bOctets, EncodeMechanism eMechanism,
1086 rtl_TextEncoding eCharset, bool bSkippedInitialSlash,
1087 sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter,
1088 sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter,
1089 OUStringBuffer &rSynPath);
1091 bool setPath(
1092 OUString const & rThePath,
1093 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1095 // Hierarchical Path:
1097 TOOLS_DLLPRIVATE bool checkHierarchical() const;
1099 TOOLS_DLLPRIVATE SubString getSegment(
1100 sal_Int32 nIndex, bool bIgnoreFinalSlash) const;
1102 // Query:
1104 bool clearQuery();
1106 bool setQuery(
1107 OUString const & rTheQuery,
1108 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1110 // Fragment:
1112 bool clearFragment();
1114 bool setFragment(
1115 OUString const & rTheMark,
1116 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1118 // FILE URLs:
1120 TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const;
1122 // Coding:
1124 TOOLS_DLLPRIVATE static inline void appendEscape(
1125 OUStringBuffer & rTheText, sal_uInt32 nOctet);
1127 static OUString encodeText(
1128 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
1129 Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1130 bool bKeepVisibleEscapes);
1132 static inline OUString encodeText(
1133 OUString const & rTheText, bool bOctets, Part ePart,
1134 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1135 bool bKeepVisibleEscapes);
1137 static OUString decode(
1138 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1139 DecodeMechanism, rtl_TextEncoding eCharset);
1141 inline OUString decode(
1142 SubString const & rSubString,
1143 DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const;
1145 // Specialized helpers:
1147 TOOLS_DLLPRIVATE static bool scanIPv6reference(
1148 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1150 private:
1151 void changeScheme(INetProtocol eTargetScheme);
1154 // static
1155 inline OUString INetURLObject::encodeText(OUString const & rTheText,
1156 bool bOctets, Part ePart,
1157 EncodeMechanism eMechanism,
1158 rtl_TextEncoding eCharset,
1159 bool bKeepVisibleEscapes)
1161 return encodeText(rTheText.getStr(),
1162 rTheText.getStr() + rTheText.getLength(), bOctets, ePart,
1163 eMechanism, eCharset, bKeepVisibleEscapes);
1166 inline OUString INetURLObject::decode(SubString const & rSubString,
1167 DecodeMechanism eMechanism,
1168 rtl_TextEncoding eCharset) const
1170 return rSubString.isPresent() ?
1171 decode(m_aAbsURIRef.getStr() + rSubString.getBegin(),
1172 m_aAbsURIRef.getStr() + rSubString.getEnd(),
1173 eMechanism, eCharset) :
1174 OUString();
1177 inline INetURLObject::INetURLObject(OUString const & rTheAbsURIRef,
1178 EncodeMechanism eMechanism,
1179 rtl_TextEncoding eCharset):
1180 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1182 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1183 FSysStyle(0));
1186 inline bool INetURLObject::SetURL(OUString const & rTheAbsURIRef,
1187 EncodeMechanism eMechanism,
1188 rtl_TextEncoding eCharset)
1190 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1191 FSysStyle(0));
1194 inline INetURLObject::INetURLObject(OUString const & rTheAbsURIRef,
1195 INetProtocol eTheSmartScheme,
1196 EncodeMechanism eMechanism,
1197 rtl_TextEncoding eCharset,
1198 FSysStyle eStyle):
1199 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(eTheSmartScheme)
1201 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, eStyle);
1204 inline bool INetURLObject::SetSmartURL(OUString const & rTheAbsURIRef,
1205 EncodeMechanism eMechanism,
1206 rtl_TextEncoding eCharset,
1207 FSysStyle eStyle)
1209 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true,
1210 eStyle);
1213 inline INetURLObject
1214 INetURLObject::smartRel2Abs(OUString const & rTheRelURIRef,
1215 bool & rWasAbsolute,
1216 bool bIgnoreFragment,
1217 EncodeMechanism eMechanism,
1218 rtl_TextEncoding eCharset,
1219 bool bRelativeNonURIs,
1220 FSysStyle eStyle) const
1222 INetURLObject aTheAbsURIRef;
1223 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, rWasAbsolute,
1224 eMechanism, eCharset, bIgnoreFragment, true,
1225 bRelativeNonURIs, eStyle);
1226 return aTheAbsURIRef;
1229 inline bool INetURLObject::GetNewAbsURL(OUString const & rTheRelURIRef,
1230 INetURLObject * pTheAbsURIRef)
1231 const
1233 INetURLObject aTheAbsURIRef;
1234 bool bWasAbsolute;
1235 if (!convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute,
1236 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false/*bIgnoreFragment*/, false, false,
1237 FSysStyle::Detect))
1238 return false;
1239 if (pTheAbsURIRef)
1240 *pTheAbsURIRef = aTheAbsURIRef;
1241 return true;
1244 // static
1245 inline OUString INetURLObject::GetRelURL(OUString const & rTheBaseURIRef,
1246 OUString const & rTheAbsURIRef,
1247 EncodeMechanism eEncodeMechanism,
1248 DecodeMechanism eDecodeMechanism,
1249 rtl_TextEncoding eCharset,
1250 FSysStyle eStyle)
1252 OUString aTheRelURIRef;
1253 INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
1254 convertAbsToRel(rTheAbsURIRef, aTheRelURIRef, eEncodeMechanism,
1255 eDecodeMechanism, eCharset, eStyle);
1256 return aTheRelURIRef;
1259 // static
1260 inline bool INetURLObject::translateToExternal(OUString const &
1261 rTheIntURIRef,
1262 OUString & rTheExtURIRef,
1263 DecodeMechanism
1264 eDecodeMechanism,
1265 rtl_TextEncoding eCharset)
1267 return convertIntToExt(rTheIntURIRef, false, rTheExtURIRef,
1268 eDecodeMechanism, eCharset);
1271 // static
1272 inline bool INetURLObject::translateToInternal(OUString const &
1273 rTheExtURIRef,
1274 OUString & rTheIntURIRef,
1275 DecodeMechanism
1276 eDecodeMechanism,
1277 rtl_TextEncoding eCharset)
1279 return convertExtToInt(rTheExtURIRef, false, rTheIntURIRef,
1280 eDecodeMechanism, eCharset);
1283 inline bool INetURLObject::SetPass(OUString const & rThePassword)
1285 return rThePassword.isEmpty() ?
1286 clearPassword() :
1287 setPassword(rThePassword, RTL_TEXTENCODING_UTF8);
1290 inline bool INetURLObject::SetUserAndPass(OUString const & rTheUser,
1291 OUString const & rThePassword)
1293 return setUser(rTheUser, RTL_TEXTENCODING_UTF8)
1294 && (rThePassword.isEmpty() ?
1295 clearPassword() :
1296 setPassword(rThePassword, RTL_TEXTENCODING_UTF8));
1299 inline bool INetURLObject::SetParam(OUString const & rTheQuery,
1300 EncodeMechanism eMechanism,
1301 rtl_TextEncoding eCharset)
1303 return rTheQuery.isEmpty() ?
1304 clearQuery() :
1305 setQuery(rTheQuery, eMechanism, eCharset);
1308 inline bool INetURLObject::SetMark(OUString const & rTheFragment,
1309 EncodeMechanism eMechanism,
1310 rtl_TextEncoding eCharset)
1312 return rTheFragment.isEmpty() ?
1313 clearFragment() :
1314 setFragment(rTheFragment, eMechanism, eCharset);
1317 inline INetURLObject::INetURLObject(OUString const & rFSysPath,
1318 FSysStyle eStyle):
1319 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1321 setFSysPath(rFSysPath, eStyle);
1324 // static
1325 inline OUString INetURLObject::encode(OUString const & rText, Part ePart,
1326 EncodeMechanism eMechanism,
1327 rtl_TextEncoding eCharset)
1329 return encodeText(rText, false, ePart, eMechanism, eCharset, false);
1332 // static
1333 inline OUString INetURLObject::decode(OUString const & rText,
1334 DecodeMechanism eMechanism,
1335 rtl_TextEncoding eCharset)
1337 return decode(rText.getStr(), rText.getStr() + rText.getLength(),
1338 eMechanism, eCharset);
1341 inline OUString INetURLObject::decode(OUStringBuffer const & rText,
1342 DecodeMechanism eMechanism,
1343 rtl_TextEncoding eCharset)
1345 return decode(rText.getStr(), rText.getStr() + rText.getLength(),
1346 eMechanism, eCharset);
1349 #endif
1351 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */