build fix
[LibreOffice.git] / include / tools / urlobj.hxx
blob52675f9ded1304d595f2b94d0f765f13d89a9952
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <com/sun/star/uno/Reference.hxx>
24 #include <rtl/string.h>
25 #include <rtl/ustrbuf.hxx>
26 #include <rtl/textenc.h>
27 #include <sal/types.h>
29 #include <memory>
31 class SvMemoryStream;
33 namespace com { namespace sun { namespace star { namespace util {
34 class XStringWidth;
35 } } } }
37 // Common URL prefixes for various schemes:
38 #define INET_FTP_SCHEME "ftp://"
39 #define INET_HTTP_SCHEME "http://"
40 #define INET_HTTPS_SCHEME "https://"
41 #define INET_FILE_SCHEME "file://"
42 #define INET_MAILTO_SCHEME "mailto:"
43 #define INET_HID_SCHEME "hid:"
45 #define URL_PREFIX_PRIV_SOFFICE "private:"
47 // Schemes:
48 enum class INetProtocol
50 NotValid,
51 Ftp,
52 Http,
53 File,
54 Mailto,
55 VndSunStarWebdav,
56 PrivSoffice,
57 VndSunStarHelp,
58 Https,
59 Slot,
60 Macro,
61 Javascript,
62 Data,
63 Cid,
64 VndSunStarHier,
65 Uno,
66 Component,
67 VndSunStarPkg,
68 Ldap,
69 Db,
70 VndSunStarCmd,
71 Telnet,
72 VndSunStarExpand,
73 VndSunStarTdoc,
74 Generic,
75 Smb,
76 Hid,
77 Sftp,
78 Cmis,
79 LAST = Cmis
82 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
84 public:
85 // Get- and Set-Methods:
87 /** The way input strings that represent (parts of) URIs are interpreted
88 in set-methods.
90 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
91 sequences of escape sequences, representing the UTF-8 coded characters.
93 @descr Along with an EncodeMechanism parameter, the set-methods all
94 take an rtl_TextEncoding parameter, which is ignored unless the
95 EncodeMechanism is EncodeMechanism::WasEncoded.
97 enum class EncodeMechanism
99 /** All escape sequences that are already present are ignored, and are
100 interpreted as literal sequences of three characters.
102 All,
104 /** Sequences of escape sequences, that represent characters from the
105 specified character set and that can be converted to UTF-32
106 characters, are first decoded. If they have to be encoded, they
107 are converted to UTF-8 characters and are than translated into
108 (sequences of) escape sequences. Other escape sequences are
109 copied verbatim (but using upper case hex digits).
111 WasEncoded,
113 /** All escape sequences that are already present are copied verbatim
114 (but using upper case hex digits).
116 NotCanonical
119 /** The way strings that represent (parts of) URIs are returned from get-
120 methods.
122 @descr Along with a DecodeMechanism parameter, the get-methods all
123 take an rtl_TextEncoding parameter, which is ignored unless the
124 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
126 enum class DecodeMechanism
128 /** The (part of the) URI is returned unchanged. Since URIs are
129 written using a subset of US-ASCII, the returned string is
130 guaranteed to contain only US-ASCII characters.
132 NONE,
134 /** All sequences of escape sequences that represent UTF-8 coded
135 UTF-32 characters with a numerical value greater than 0x7F, are
136 replaced by the respective UTF-16 characters. All other escape
137 sequences are not decoded.
139 ToIUri,
141 /** All (sequences of) escape sequences that represent characters from
142 the specified character set, and that can be converted to UTF-32,
143 are replaced by the respective UTF-16 characters. All other
144 escape sequences are not decoded.
146 WithCharset,
148 /** All (sequences of) escape sequences that represent characters from
149 the specified character set, that can be converted to UTF-32, and
150 that (in the case of ASCII characters) can safely be decoded
151 without altering the meaning of the (part of the) URI, are
152 replaced by the respective UTF-16 characters. All other escape
153 sequences are not decoded.
155 Unambiguous
158 // General Structure:
160 inline INetURLObject():
161 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) {}
163 inline bool HasError() const { return m_eScheme == INetProtocol::NotValid; }
165 inline OUString GetMainURL(DecodeMechanism eMechanism,
166 rtl_TextEncoding eCharset
167 = RTL_TEXTENCODING_UTF8) const
168 { return decode(m_aAbsURIRef, eMechanism, eCharset); }
170 OUString GetURLNoPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
171 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
172 const;
174 OUString GetURLNoMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
175 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
176 const;
178 OUString
179 getAbbreviated(css::uno::Reference< css::util::XStringWidth > const & rStringWidth,
180 sal_Int32 nWidth,
181 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
182 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
183 const;
185 bool operator ==(INetURLObject const & rObject) const;
187 inline bool operator !=(INetURLObject const & rObject) const
188 { return !(*this == rObject); }
190 // Strict Parsing:
192 inline explicit INetURLObject(
193 OUString const & rTheAbsURIRef,
194 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
195 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
197 inline bool SetURL(OUString const & rTheAbsURIRef,
198 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
199 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
201 bool ConcatData(INetProtocol eTheScheme, OUString const & rTheUser,
202 OUString const & rThePassword,
203 OUString const & rTheHost, sal_uInt32 nThePort,
204 OUString const & rThePath);
206 // Smart Parsing:
208 /** The supported notations for file system paths.
210 enum FSysStyle
212 /** VOS notation (e.g., "//server/dir/file").
214 FSYS_VOS = 0x1,
216 /** Unix notation (e.g., "/dir/file").
218 FSYS_UNX = 0x2,
220 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
222 FSYS_DOS = 0x4,
224 /** Detect the used notation.
226 @descr For the following descriptions, please note that
227 whereas FSYS_DEFAULT includes all style bits, combinations of only
228 a few style bits are also possible, and are also described.
230 @descr When used to translate a file system path to a file URL,
231 the subset of the following productions for which the appropriate
232 style bit is set are checked in order (using the conventions of
233 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
234 character):
236 Production T1 (VOS local; FSYS_VOS only):
237 "//." ["/" *UCS4]
238 becomes
239 "file:///" *UCS4
241 Production T2 (VOS host; FSYS_VOS only):
242 "//" [host] ["/" *UCS4]
243 becomes
244 "file://" host "/" *UCS4
246 Production T3 (UNC; FSYS_DOS only):
247 "\\" [host] ["\" *UCS4]
248 becomes
249 "file://" host "/" *UCS4
250 replacing "\" by "/" within <*UCS4>
252 Production T4 (Unix-like DOS; FSYS_DOS only):
253 ALPHA ":" ["/" *UCS4]
254 becomes
255 "file:///" ALPHA ":/" *UCS4
256 replacing "\" by "/" within <*UCS4>
258 Production T5 (DOS; FSYS_DOS only):
259 ALPHA ":" ["\" *UCS4]
260 becomes
261 "file:///" ALPHA ":/" *UCS4
262 replacing "\" by "/" within <*UCS4>
264 Production T6 (any):
265 *UCS4
266 becomes
267 "file:///" *UCS4
268 replacing the delimiter by "/" within <*UCS4>. The delimiter is
269 that character from the set { "/", "\" } which appears most
270 often in <*UCS4> (if FSYS_UNX is not among the style bits, "/"
271 is removed from the set; if FSYS_DOS is not among the style
272 bits, "\" is removed from the set). If two or more
273 characters appear the same number of times, the character
274 mentioned first in that set is chosen. If the first character
275 of <*UCS4> is the delimiter, that character is not copied.
277 @descr When used to translate a file URL to a file system path,
278 the following productions are checked in order (using the
279 conventions of RFC 2234, RFC 2396, and RFC 2732):
281 Production F1 (VOS; FSYS_VOS):
282 "file://" host "/" fpath ["#" fragment]
283 becomes
284 "//" host "/" fpath
286 Production F2 (DOS; FSYS_DOS):
287 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
288 becomes
289 ALPHA ":" ["\" fpath]
290 replacing "/" by "\" in <fpath>
292 Production F3 (Unix; FSYS_UNX):
293 "file:///" fpath ["#" fragment]
294 becomes
295 "/" fpath
297 FSYS_DETECT = FSYS_VOS | FSYS_UNX | FSYS_DOS
300 inline INetURLObject(OUString const & rTheAbsURIRef,
301 INetProtocol eTheSmartScheme,
302 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
303 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
304 FSysStyle eStyle = FSYS_DETECT);
306 inline void SetSmartProtocol(INetProtocol eTheSmartScheme)
307 { m_eSmartScheme = eTheSmartScheme; }
309 inline bool
310 SetSmartURL(OUString const & rTheAbsURIRef,
311 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
312 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
313 FSysStyle eStyle = FSYS_DETECT);
315 inline INetURLObject
316 smartRel2Abs(OUString const & rTheRelURIRef,
317 bool & rWasAbsolute,
318 bool bIgnoreFragment = false,
319 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
320 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
321 bool bRelativeNonURIs = false,
322 FSysStyle eStyle = FSYS_DETECT) const;
324 // Relative URLs:
326 inline bool
327 GetNewAbsURL(OUString const & rTheRelURIRef,
328 INetURLObject * pTheAbsURIRef)
329 const;
331 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
332 (because of syntactic reasons), either rTheRelURIRef or an empty
333 string is returned: If all of the parameters eEncodeMechanism,
334 eDecodeMechanism and eCharset have their respective default values,
335 then rTheRelURIRef is returned unmodified; otherwise, an empty string
336 is returned.
338 static OUString
339 GetAbsURL(OUString const & rTheBaseURIRef,
340 OUString const & rTheRelURIRef,
341 bool bIgnoreFragment = false,
342 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
343 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
344 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
346 static inline OUString
347 GetRelURL(OUString const & rTheBaseURIRef,
348 OUString const & rTheAbsURIRef,
349 EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded,
350 DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri,
351 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8,
352 FSysStyle eStyle = FSYS_DETECT);
354 // External URLs:
356 OUString getExternalURL() const;
358 static inline bool translateToExternal(OUString const & rTheIntURIRef,
359 OUString & rTheExtURIRef,
360 DecodeMechanism eDecodeMechanism
361 = DecodeMechanism::ToIUri,
362 rtl_TextEncoding eCharset
363 = RTL_TEXTENCODING_UTF8);
365 static inline bool translateToInternal(OUString const & rTheExtURIRef,
366 OUString & rTheIntURIRef,
367 DecodeMechanism eDecodeMechanism
368 = DecodeMechanism::ToIUri,
369 rtl_TextEncoding eCharset
370 = RTL_TEXTENCODING_UTF8);
372 // Scheme:
374 struct SchemeInfo;
376 inline INetProtocol GetProtocol() const { return m_eScheme; }
378 bool isSchemeEqualTo(INetProtocol scheme) const { return scheme == m_eScheme; }
380 bool isSchemeEqualTo(OUString const & scheme) const;
382 /** Check if the scheme is one of the WebDAV scheme
383 * we know about.
385 * @return true is one othe scheme either public scheme or private scheme.
387 bool isAnyKnownWebDAVScheme() const;
389 /** Return the URL 'prefix' for a given scheme.
391 @param eTheScheme One of the supported URL schemes.
393 @return The 'prefix' of URLs of the given scheme.
395 static OUString GetScheme(INetProtocol eTheScheme);
397 /** Return the a human-readable name for a given scheme.
399 @param eTheScheme One of the supported URL schemes.
401 @return The protocol name of URLs of the given scheme.
403 static OUString GetSchemeName(INetProtocol eTheScheme);
405 static INetProtocol CompareProtocolScheme(OUString const &
406 rTheAbsURIRef);
408 // User Info:
410 inline bool HasUserData() const { return m_aUser.isPresent(); }
412 inline OUString GetUser(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
413 rtl_TextEncoding eCharset
414 = RTL_TEXTENCODING_UTF8) const
415 { return decode(m_aUser, eMechanism, eCharset); }
417 inline OUString GetPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
418 rtl_TextEncoding eCharset
419 = RTL_TEXTENCODING_UTF8) const
420 { return decode(m_aAuth, eMechanism, eCharset); }
422 inline bool SetUser(OUString const & rTheUser)
423 { return setUser(rTheUser, RTL_TEXTENCODING_UTF8); }
425 inline bool SetPass(OUString const & rThePassword);
427 inline bool SetUserAndPass(OUString const & rTheUser,
428 OUString const & rThePassword);
430 // Host and Port:
432 inline bool HasPort() const { return m_aPort.isPresent(); }
434 inline OUString GetHost(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
435 rtl_TextEncoding eCharset
436 = RTL_TEXTENCODING_UTF8) const
437 { return decode(m_aHost, eMechanism, eCharset); }
439 OUString GetHostPort(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
440 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
442 sal_uInt32 GetPort() const;
444 inline bool SetHost(OUString const & rTheHost)
445 { return setHost(rTheHost, RTL_TEXTENCODING_UTF8); }
447 bool SetPort(sal_uInt32 nThePort);
449 // Path:
451 inline bool HasURLPath() const { return !m_aPath.isEmpty(); }
453 inline OUString GetURLPath(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
454 rtl_TextEncoding eCharset
455 = RTL_TEXTENCODING_UTF8) const
456 { return decode(m_aPath, eMechanism, eCharset); }
458 inline bool SetURLPath(OUString const & rThePath,
459 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
460 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
461 { return setPath(rThePath, eMechanism, eCharset); }
463 // Hierarchical Path:
465 /** A constant to address the last segment in various methods dealing with
466 hierarchical paths.
468 @descr It is often more efficient to address the last segment using
469 this constant, than to determine its ordinal value using
470 getSegmentCount().
472 enum { LAST_SEGMENT = -1 };
474 /** The number of segments in the hierarchical path.
476 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
477 form
479 hierarchical-path = 1*("/" segment)
481 segment = name *(";" param)
483 name = [base ["." extension]]
485 base = 1*pchar
487 extension = *<any pchar except ".">
489 param = *pchar
491 @param bIgnoreFinalSlash If true, a final slash at the end of the
492 hierarchical path does not denote an empty segment, but is ignored.
494 @return The number of segments in the hierarchical path. If the path
495 is not hierarchical, 0 is returned.
497 sal_Int32 getSegmentCount(bool bIgnoreFinalSlash = true) const;
499 /** Remove a segment from the hierarchical path.
501 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
502 if addressing the last segment.
504 @param bIgnoreFinalSlash If true, a final slash at the end of the
505 hierarchical path does not denote an empty segment, but is ignored.
507 @return True if the segment has successfully been removed (and the
508 resulting URI is still valid). If the path is not hierarchical, or
509 the specified segment does not exist, false is returned. If false is
510 returned, the object is not modified.
512 bool removeSegment(sal_Int32 nIndex = LAST_SEGMENT,
513 bool bIgnoreFinalSlash = true);
515 /** Insert a new segment into the hierarchical path.
516 A final slash at the end of the
517 hierarchical path does not denote an empty segment, but is ignored.
519 @param rTheName The name part of the new segment. The new segment
520 will contain no parameters.
522 @param bAppendFinalSlash If the new segment is appended at the end of
523 the hierarchical path, this parameter specifies whether to add a final
524 slash after it or not.
526 @param nIndex The non-negative index of the segment before which
527 to insert the new segment. LAST_SEGMENT or an nIndex that equals
528 getSegmentCount() inserts the new segment at the end of the
529 hierarchical path.
531 @param eMechanism See the general discussion for set-methods.
533 @param eCharset See the general discussion for set-methods.
535 @return True if the segment has successfully been inserted (and the
536 resulting URI is still valid). If the path is not hierarchical, or
537 the specified place to insert the new segment does not exist, false is
538 returned. If false is returned, the object is not modified.
540 bool insertName(OUString const & rTheName,
541 bool bAppendFinalSlash = false,
542 sal_Int32 nIndex = LAST_SEGMENT,
543 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
544 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
546 /** Get the name of a segment of the hierarchical path.
548 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
549 if addressing the last segment.
551 @param bIgnoreFinalSlash If true, a final slash at the end of the
552 hierarchical path does not denote an empty segment, but is ignored.
554 @param eMechanism See the general discussion for get-methods.
556 @param eCharset See the general discussion for get-methods.
558 @return The name part of the specified segment. If the path is not
559 hierarchical, or the specified segment does not exits, an empty string
560 is returned.
562 OUString getName(sal_Int32 nIndex = LAST_SEGMENT,
563 bool bIgnoreFinalSlash = true,
564 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
565 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
566 const;
568 /** Set the name of a segment (preserving any parameters and any query or
569 fragment part).
571 @param rTheName The new name.
573 @return True if the name has successfully been modified (and the
574 resulting URI is still valid). If the path is not hierarchical, or
575 the specified segment does not exist, false is returned. If false is
576 returned, the object is not modified.
578 bool setName(OUString const & rTheName);
580 /** Get the base of the name of a segment.
582 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
583 if addressing the last segment.
585 @param bIgnoreFinalSlash If true, a final slash at the end of the
586 hierarchical path does not denote an empty segment, but is ignored.
588 @param eMechanism See the general discussion for get-methods.
590 @param eCharset See the general discussion for get-methods.
592 @return The base part of the specified segment. If the path is
593 not hierarchical, or the specified segment does not exits, an empty
594 string is returned.
596 OUString getBase(sal_Int32 nIndex = LAST_SEGMENT,
597 bool bIgnoreFinalSlash = true,
598 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
599 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
600 const;
602 /** Set the base of the name of a segment (preserving the extension).
603 A final slash at the end of the
604 hierarchical path does not denote an empty segment, but is ignored.
606 @param rTheBase The new base.
608 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
609 if addressing the last segment.
611 @param eMechanism See the general discussion for set-methods.
613 @param eCharset See the general discussion for set-methods.
615 @return True if the base has successfully been modified (and the
616 resulting URI is still valid). If the path is not hierarchical, or
617 the specified segment does not exist, false is returned. If false is
618 returned, the object is not modified.
620 bool setBase(OUString const & rTheBase,
621 sal_Int32 nIndex = LAST_SEGMENT,
622 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
623 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
625 /** Determine whether the name of the last segment has an extension.
627 @return True if the name of the specified segment has an extension.
628 If the path is not hierarchical, or the specified segment does not
629 exist, false is returned.
631 bool hasExtension() const;
633 /** Get the extension of the name of a segment.
635 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
636 if addressing the last segment.
638 @param bIgnoreFinalSlash If true, a final slash at the end of the
639 hierarchical path does not denote an empty segment, but is ignored.
641 @param eMechanism See the general discussion for get-methods.
643 @param eCharset See the general discussion for get-methods.
645 @return The extension part of the specified segment. If the path is
646 not hierarchical, or the specified segment does not exits, an empty
647 string is returned.
649 OUString getExtension(sal_Int32 nIndex = LAST_SEGMENT,
650 bool bIgnoreFinalSlash = true,
651 DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
652 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
653 const;
655 /** Set the extension of the name of a segment (replacing an already
656 existing extension).
658 @param rTheExtension The new extension.
660 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
661 if addressing the last segment.
663 @param bIgnoreFinalSlash If true, a final slash at the end of the
664 hierarchical path does not denote an empty segment, but is ignored.
666 @param eCharset See the general discussion for set-methods.
668 @return True if the extension has successfully been modified (and the
669 resulting URI is still valid). If the path is not hierarchical, or
670 the specified segment does not exist, false is returned. If false is
671 returned, the object is not modified.
673 bool setExtension(OUString const & rTheExtension,
674 sal_Int32 nIndex = LAST_SEGMENT,
675 bool bIgnoreFinalSlash = true,
676 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
678 /** Remove the extension of the name of a segment.
680 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
681 if addressing the last segment.
683 @param bIgnoreFinalSlash If true, a final slash at the end of the
684 hierarchical path does not denote an empty segment, but is ignored.
686 @return True if the extension has successfully been removed (and the
687 resulting URI is still valid), or if the name did not have an
688 extension. If the path is not hierarchical, or the specified segment
689 does not exist, false is returned. If false is returned, the object
690 is not modified.
692 bool removeExtension(sal_Int32 nIndex = LAST_SEGMENT,
693 bool bIgnoreFinalSlash = true);
695 /** Determine whether the hierarchical path ends in a final slash.
697 @return True if the hierarchical path ends in a final slash. If the
698 path is not hierarchical, false is returned.
700 bool hasFinalSlash() const;
702 /** Make the hierarchical path end in a final slash (if it does not
703 already do so).
705 @return True if a final slash has successfully been appended (and the
706 resulting URI is still valid), or if the hierarchical path already
707 ended in a final slash. If the path is not hierarchical, false is
708 returned. If false is returned, the object is not modified.
710 bool setFinalSlash();
712 /** Remove a final slash from the hierarchical path.
714 @return True if a final slash has successfully been removed (and the
715 resulting URI is still valid), or if the hierarchical path already did
716 not end in a final slash. If the path is not hierarchical, false is
717 returned. If false is returned, the object is not modified.
719 bool removeFinalSlash();
721 // Query:
723 inline bool HasParam() const { return m_aQuery.isPresent(); }
725 inline OUString GetParam(rtl_TextEncoding eCharset
726 = RTL_TEXTENCODING_UTF8) const
727 { return decode(m_aQuery, DecodeMechanism::NONE, eCharset); }
729 inline bool SetParam(OUString const & rTheQuery,
730 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
731 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
733 // Fragment:
735 inline bool HasMark() const { return m_aFragment.isPresent(); }
737 inline OUString GetMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
738 rtl_TextEncoding eCharset
739 = RTL_TEXTENCODING_UTF8) const
740 { return decode(m_aFragment, eMechanism, eCharset); }
742 inline bool SetMark(OUString const & rTheFragment,
743 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
744 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
746 // File URLs:
748 /** Create an INetURLObject from a file system path.
750 @param rFSysPath A file system path. An URL is not allowed here!
752 @param eStyle The notation of rFSysPath.
754 inline INetURLObject(OUString const & rFSysPath, FSysStyle eStyle);
756 /** Set this INetURLObject to a file URL constructed from a file system
757 path.
759 @param rFSysPath A file system path. An URL is not allowed here!
761 @param eStyle The notation of rFSysPath.
763 @return True if this INetURLObject has successfully been changed. If
764 false is returned, this INetURLObject has not been modified.
766 bool setFSysPath(OUString const & rFSysPath, FSysStyle eStyle);
768 /** Return the file system path represented by a file URL (ignoring any
769 fragment part).
771 @param eStyle The notation of the returned file system path.
773 @param pDelimiter Upon successful return, this parameter can return
774 the character that is the 'main' delimiter within the returned file
775 system path (e.g., "/" for Unix, "\" for DOS). This is
776 especially useful for routines that later try to shorten the returned
777 file system path at a 'good' position, e.g. to fit it into some
778 limited display space.
780 @return The file system path represented by this file URL. If this
781 file URL does not represent a file system path according to the
782 specified notation, or if this is not a file URL at all, an empty
783 string is returned.
785 OUString getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter = nullptr)
786 const;
788 // Data URLs:
789 std::unique_ptr<SvMemoryStream> getData();
791 // Coding:
793 enum Part
795 PART_USER_PASSWORD = 0x00001,
796 PART_FPATH = 0x00008,
797 PART_AUTHORITY = 0x00010,
798 PART_REL_SEGMENT_EXTRA = 0x00020,
799 PART_URIC = 0x00040,
800 PART_HTTP_PATH = 0x00080,
801 PART_MESSAGE_ID_PATH = 0x00100,
802 PART_MAILTO = 0x00200,
803 PART_PATH_BEFORE_QUERY = 0x00400,
804 PART_PCHAR = 0x00800,
805 PART_VISIBLE = 0x01000,
806 PART_VISIBLE_NONSPECIAL = 0x02000,
807 PART_UNO_PARAM_VALUE = 0x04000,
808 PART_UNAMBIGUOUS = 0x08000,
809 PART_URIC_NO_SLASH = 0x10000,
810 PART_HTTP_QUERY = 0x20000, //TODO! unused?
813 enum EscapeType
815 ESCAPE_NO,
816 ESCAPE_OCTET,
817 ESCAPE_UTF32
820 /** Encode some text as part of a URI.
822 @param rText Some text (for its interpretation, see the general
823 discussion for set-methods).
825 @param ePart The part says which characters are 'forbidden' and must
826 be encoded (replaced by escape sequences). Characters outside the US-
827 ASCII range are always 'forbidden.'
829 @param eMechanism See the general discussion for set-methods.
831 @param eCharset See the general discussion for set-methods.
833 @return The text, encoded according to the given mechanism and
834 charset ('forbidden' characters replaced by escape sequences).
836 static inline OUString encode(OUString const & rText, Part ePart,
837 EncodeMechanism eMechanism,
838 rtl_TextEncoding eCharset
839 = RTL_TEXTENCODING_UTF8);
841 /** Decode some text.
843 @param rText Some (encoded) text.
845 @param eMechanism See the general discussion for get-methods.
847 @param eCharset See the general discussion for get-methods.
849 @return The text, decoded according to the given mechanism and
850 charset (escape sequences replaced by 'raw' characters).
852 static inline OUString decode(OUString const & rText,
853 DecodeMechanism eMechanism,
854 rtl_TextEncoding eCharset
855 = RTL_TEXTENCODING_UTF8);
857 static inline OUString decode(OUStringBuffer const & rText,
858 DecodeMechanism eMechanism,
859 rtl_TextEncoding eCharset
860 = RTL_TEXTENCODING_UTF8);
862 static void appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4);
864 static void appendUCS4(OUStringBuffer & rTheText, sal_uInt32 nUCS4,
865 EscapeType eEscapeType, bool bOctets, Part ePart,
866 rtl_TextEncoding eCharset, bool bKeepVisibleEscapes);
868 static sal_uInt32 getUTF32(sal_Unicode const *& rBegin,
869 sal_Unicode const * pEnd, bool bOctets,
870 EncodeMechanism eMechanism,
871 rtl_TextEncoding eCharset,
872 EscapeType & rEscapeType);
874 // Specialized helpers:
876 static sal_uInt32 scanDomain(sal_Unicode const *& rBegin,
877 sal_Unicode const * pEnd,
878 bool bEager = true);
880 // OBSOLETE Hierarchical Path:
882 OUString GetPartBeforeLastName() const;
884 /** Get the last segment in the path.
886 @param eMechanism See the general discussion for get-methods.
888 @param eCharset See the general discussion for get-methods.
890 @return For a hierarchical URL, the last segment (everything after
891 the last unencoded '/'). Not that this last segment may be empty. If
892 the URL is not hierarchical, an empty string is returned.
894 OUString GetLastName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
895 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8)
896 const;
898 /** Get the 'extension' of the last segment in the path.
900 @return For a hierarchical URL, everything after the first unencoded
901 '.' in the last segment of the path. Note that this 'extension' may
902 be empty. If the URL is not hierarchical, or if the last segment does
903 not contain an unencoded '.', an empty string is returned.
905 OUString GetFileExtension() const;
907 bool Append(OUString const & rTheSegment,
908 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
909 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
911 void CutLastName();
913 // OBSOLETE File URLs:
915 OUString PathToFileName() const;
917 OUString GetFull() const;
919 OUString GetPath() const;
921 void SetBase(OUString const & rTheBase);
923 OUString GetBase() const;
925 void SetName(OUString const & rTheName,
926 EncodeMechanism eMechanism = EncodeMechanism::WasEncoded,
927 rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
929 inline OUString GetName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri,
930 rtl_TextEncoding eCharset
931 = RTL_TEXTENCODING_UTF8) const
932 { return GetLastName(eMechanism, eCharset); }
934 void SetExtension(OUString const & rTheExtension);
936 inline OUString GetExtension() const
937 { return GetFileExtension(); }
939 OUString CutExtension();
941 static bool IsCaseSensitive() { return true; }
944 private:
945 // General Structure:
947 class SubString
949 sal_Int32 m_nBegin;
950 sal_Int32 m_nLength;
952 public:
953 explicit inline SubString(sal_Int32 nTheBegin = -1,
954 sal_Int32 nTheLength = 0):
955 m_nBegin(nTheBegin), m_nLength(nTheLength) {}
957 inline bool isPresent() const { return m_nBegin != -1; }
959 inline bool isEmpty() const { return m_nLength == 0; }
961 inline sal_Int32 getBegin() const { return m_nBegin; }
963 inline sal_Int32 getLength() const { return m_nLength; }
965 inline sal_Int32 getEnd() const { return m_nBegin + m_nLength; }
967 inline sal_Int32 clear();
969 inline sal_Int32 set(OUStringBuffer & rString,
970 OUString const & rSubString,
971 sal_Int32 nTheBegin);
973 inline sal_Int32 set(OUString & rString,
974 OUString const & rSubString);
976 inline sal_Int32 set(OUStringBuffer & rString,
977 OUString const & rSubString);
979 inline void operator +=(sal_Int32 nDelta);
981 int compare(SubString const & rOther,
982 OUStringBuffer const & rThisString,
983 OUStringBuffer const & rOtherString) const;
986 OUStringBuffer m_aAbsURIRef;
987 SubString m_aScheme;
988 SubString m_aUser;
989 SubString m_aAuth;
990 SubString m_aHost;
991 SubString m_aPort;
992 SubString m_aPath;
993 SubString m_aQuery;
994 SubString m_aFragment;
995 INetProtocol m_eScheme;
996 INetProtocol m_eSmartScheme;
998 TOOLS_DLLPRIVATE void setInvalid();
1000 bool setAbsURIRef(
1001 OUString const & rTheAbsURIRef,
1002 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart,
1003 FSysStyle eStyle);
1005 // Relative URLs:
1007 bool convertRelToAbs(
1008 OUString const & rTheRelURIRef,
1009 INetURLObject & rTheAbsURIRef, bool & rWasAbsolute,
1010 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1011 bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs,
1012 FSysStyle eStyle) const;
1014 bool convertAbsToRel(
1015 OUString const & rTheAbsURIRef,
1016 OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism,
1017 DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset,
1018 FSysStyle eStyle) const;
1020 // External URLs:
1022 static bool convertIntToExt(
1023 OUString const & rTheIntURIRef, bool bOctets,
1024 OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism,
1025 rtl_TextEncoding eCharset);
1027 static bool convertExtToInt(
1028 OUString const & rTheExtURIRef, bool bOctets,
1029 OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism,
1030 rtl_TextEncoding eCharset);
1032 // Scheme:
1034 struct PrefixInfo;
1036 TOOLS_DLLPRIVATE static inline SchemeInfo const & getSchemeInfo(
1037 INetProtocol eTheScheme);
1039 TOOLS_DLLPRIVATE inline SchemeInfo const & getSchemeInfo() const;
1041 TOOLS_DLLPRIVATE static PrefixInfo const * getPrefix(
1042 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1044 // Authority:
1046 TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const;
1048 TOOLS_DLLPRIVATE SubString getAuthority() const;
1050 // User Info:
1052 bool setUser(
1053 OUString const & rTheUser,
1054 rtl_TextEncoding eCharset);
1056 bool clearPassword();
1058 bool setPassword(
1059 OUString const & rThePassword,
1060 rtl_TextEncoding eCharset);
1062 // Host and Port:
1064 TOOLS_DLLPRIVATE static bool parseHost(
1065 sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
1066 OUString & rCanonic);
1068 TOOLS_DLLPRIVATE static bool parseHostOrNetBiosName(
1069 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
1070 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1071 bool bNetBiosName, OUStringBuffer* pCanonic);
1073 bool setHost(
1074 OUString const & rTheHost,
1075 rtl_TextEncoding eCharset);
1077 // Path:
1079 TOOLS_DLLPRIVATE static bool parsePath(
1080 INetProtocol eScheme, sal_Unicode const ** pBegin,
1081 sal_Unicode const * pEnd, bool bOctets, EncodeMechanism eMechanism,
1082 rtl_TextEncoding eCharset, bool bSkippedInitialSlash,
1083 sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter,
1084 sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter,
1085 OUStringBuffer &rSynPath);
1087 bool setPath(
1088 OUString const & rThePath,
1089 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1091 // Hierarchical Path:
1093 TOOLS_DLLPRIVATE bool checkHierarchical() const;
1095 TOOLS_DLLPRIVATE SubString getSegment(
1096 sal_Int32 nIndex, bool bIgnoreFinalSlash) const;
1098 // Query:
1100 bool clearQuery();
1102 bool setQuery(
1103 OUString const & rTheQuery,
1104 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1106 // Fragment:
1108 bool clearFragment();
1110 bool setFragment(
1111 OUString const & rTheMark,
1112 EncodeMechanism eMechanism, rtl_TextEncoding eCharset);
1114 // FILE URLs:
1116 TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const;
1118 // Coding:
1120 TOOLS_DLLPRIVATE static inline void appendEscape(
1121 OUStringBuffer & rTheText, sal_uInt32 nOctet);
1123 static OUString encodeText(
1124 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
1125 Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1126 bool bKeepVisibleEscapes);
1128 static inline OUString encodeText(
1129 OUString const & rTheText, bool bOctets, Part ePart,
1130 EncodeMechanism eMechanism, rtl_TextEncoding eCharset,
1131 bool bKeepVisibleEscapes);
1133 static OUString decode(
1134 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
1135 DecodeMechanism, rtl_TextEncoding eCharset);
1137 inline OUString decode(
1138 SubString const & rSubString,
1139 DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const;
1141 // Specialized helpers:
1143 TOOLS_DLLPRIVATE static bool scanIPv6reference(
1144 sal_Unicode const *& rBegin, sal_Unicode const * pEnd);
1146 private:
1147 void changeScheme(INetProtocol eTargetScheme);
1150 // static
1151 inline OUString INetURLObject::encodeText(OUString const & rTheText,
1152 bool bOctets, Part ePart,
1153 EncodeMechanism eMechanism,
1154 rtl_TextEncoding eCharset,
1155 bool bKeepVisibleEscapes)
1157 return encodeText(rTheText.getStr(),
1158 rTheText.getStr() + rTheText.getLength(), bOctets, ePart,
1159 eMechanism, eCharset, bKeepVisibleEscapes);
1162 inline OUString INetURLObject::decode(SubString const & rSubString,
1163 DecodeMechanism eMechanism,
1164 rtl_TextEncoding eCharset) const
1166 return rSubString.isPresent() ?
1167 decode(m_aAbsURIRef.getStr() + rSubString.getBegin(),
1168 m_aAbsURIRef.getStr() + rSubString.getEnd(),
1169 eMechanism, eCharset) :
1170 OUString();
1173 inline INetURLObject::INetURLObject(OUString const & rTheAbsURIRef,
1174 EncodeMechanism eMechanism,
1175 rtl_TextEncoding eCharset):
1176 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1178 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1179 FSysStyle(0));
1182 inline bool INetURLObject::SetURL(OUString const & rTheAbsURIRef,
1183 EncodeMechanism eMechanism,
1184 rtl_TextEncoding eCharset)
1186 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false,
1187 FSysStyle(0));
1190 inline INetURLObject::INetURLObject(OUString const & rTheAbsURIRef,
1191 INetProtocol eTheSmartScheme,
1192 EncodeMechanism eMechanism,
1193 rtl_TextEncoding eCharset,
1194 FSysStyle eStyle):
1195 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(eTheSmartScheme)
1197 setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, eStyle);
1200 inline bool INetURLObject::SetSmartURL(OUString const & rTheAbsURIRef,
1201 EncodeMechanism eMechanism,
1202 rtl_TextEncoding eCharset,
1203 FSysStyle eStyle)
1205 return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true,
1206 eStyle);
1209 inline INetURLObject
1210 INetURLObject::smartRel2Abs(OUString const & rTheRelURIRef,
1211 bool & rWasAbsolute,
1212 bool bIgnoreFragment,
1213 EncodeMechanism eMechanism,
1214 rtl_TextEncoding eCharset,
1215 bool bRelativeNonURIs,
1216 FSysStyle eStyle) const
1218 INetURLObject aTheAbsURIRef;
1219 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, rWasAbsolute,
1220 eMechanism, eCharset, bIgnoreFragment, true,
1221 bRelativeNonURIs, eStyle);
1222 return aTheAbsURIRef;
1225 inline bool INetURLObject::GetNewAbsURL(OUString const & rTheRelURIRef,
1226 INetURLObject * pTheAbsURIRef)
1227 const
1229 INetURLObject aTheAbsURIRef;
1230 bool bWasAbsolute;
1231 if (!convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute,
1232 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false/*bIgnoreFragment*/, false, false,
1233 FSYS_DETECT))
1234 return false;
1235 if (pTheAbsURIRef)
1236 *pTheAbsURIRef = aTheAbsURIRef;
1237 return true;
1240 // static
1241 inline OUString INetURLObject::GetRelURL(OUString const & rTheBaseURIRef,
1242 OUString const & rTheAbsURIRef,
1243 EncodeMechanism eEncodeMechanism,
1244 DecodeMechanism eDecodeMechanism,
1245 rtl_TextEncoding eCharset,
1246 FSysStyle eStyle)
1248 OUString aTheRelURIRef;
1249 INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
1250 convertAbsToRel(rTheAbsURIRef, aTheRelURIRef, eEncodeMechanism,
1251 eDecodeMechanism, eCharset, eStyle);
1252 return aTheRelURIRef;
1255 // static
1256 inline bool INetURLObject::translateToExternal(OUString const &
1257 rTheIntURIRef,
1258 OUString & rTheExtURIRef,
1259 DecodeMechanism
1260 eDecodeMechanism,
1261 rtl_TextEncoding eCharset)
1263 return convertIntToExt(rTheIntURIRef, false, rTheExtURIRef,
1264 eDecodeMechanism, eCharset);
1267 // static
1268 inline bool INetURLObject::translateToInternal(OUString const &
1269 rTheExtURIRef,
1270 OUString & rTheIntURIRef,
1271 DecodeMechanism
1272 eDecodeMechanism,
1273 rtl_TextEncoding eCharset)
1275 return convertExtToInt(rTheExtURIRef, false, rTheIntURIRef,
1276 eDecodeMechanism, eCharset);
1279 inline bool INetURLObject::SetPass(OUString const & rThePassword)
1281 return rThePassword.isEmpty() ?
1282 clearPassword() :
1283 setPassword(rThePassword, RTL_TEXTENCODING_UTF8);
1286 inline bool INetURLObject::SetUserAndPass(OUString const & rTheUser,
1287 OUString const & rThePassword)
1289 return setUser(rTheUser, RTL_TEXTENCODING_UTF8)
1290 && (rThePassword.isEmpty() ?
1291 clearPassword() :
1292 setPassword(rThePassword, RTL_TEXTENCODING_UTF8));
1295 inline bool INetURLObject::SetParam(OUString const & rTheQuery,
1296 EncodeMechanism eMechanism,
1297 rtl_TextEncoding eCharset)
1299 return rTheQuery.isEmpty() ?
1300 clearQuery() :
1301 setQuery(rTheQuery, eMechanism, eCharset);
1304 inline bool INetURLObject::SetMark(OUString const & rTheFragment,
1305 EncodeMechanism eMechanism,
1306 rtl_TextEncoding eCharset)
1308 return rTheFragment.isEmpty() ?
1309 clearFragment() :
1310 setFragment(rTheFragment, eMechanism, eCharset);
1313 inline INetURLObject::INetURLObject(OUString const & rFSysPath,
1314 FSysStyle eStyle):
1315 m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http)
1317 setFSysPath(rFSysPath, eStyle);
1320 // static
1321 inline OUString INetURLObject::encode(OUString const & rText, Part ePart,
1322 EncodeMechanism eMechanism,
1323 rtl_TextEncoding eCharset)
1325 return encodeText(rText, false, ePart, eMechanism, eCharset, false);
1328 // static
1329 inline OUString INetURLObject::decode(OUString const & rText,
1330 DecodeMechanism eMechanism,
1331 rtl_TextEncoding eCharset)
1333 return decode(rText.getStr(), rText.getStr() + rText.getLength(),
1334 eMechanism, eCharset);
1337 inline OUString INetURLObject::decode(OUStringBuffer const & rText,
1338 DecodeMechanism eMechanism,
1339 rtl_TextEncoding eCharset)
1341 return decode(rText.getStr(), rText.getStr() + rText.getLength(),
1342 eMechanism, eCharset);
1345 #endif
1347 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */