1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <com/sun/star/uno/Reference.hxx>
24 #include <rtl/string.h>
25 #include <rtl/ustrbuf.hxx>
26 #include <rtl/textenc.h>
27 #include <sal/types.h>
33 namespace com
{ namespace sun
{ namespace star
{ namespace util
{
37 // Common URL prefixes for various schemes:
38 #define INET_FTP_SCHEME "ftp://"
39 #define INET_HTTP_SCHEME "http://"
40 #define INET_HTTPS_SCHEME "https://"
41 #define INET_FILE_SCHEME "file://"
42 #define INET_MAILTO_SCHEME "mailto:"
43 #define INET_HID_SCHEME "hid:"
45 #define URL_PREFIX_PRIV_SOFFICE "private:"
48 enum class INetProtocol
82 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
85 // Get- and Set-Methods:
87 /** The way input strings that represent (parts of) URIs are interpreted
90 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
91 sequences of escape sequences, representing the UTF-8 coded characters.
93 @descr Along with an EncodeMechanism parameter, the set-methods all
94 take an rtl_TextEncoding parameter, which is ignored unless the
95 EncodeMechanism is EncodeMechanism::WasEncoded.
97 enum class EncodeMechanism
99 /** All escape sequences that are already present are ignored, and are
100 interpreted as literal sequences of three characters.
104 /** Sequences of escape sequences, that represent characters from the
105 specified character set and that can be converted to UTF-32
106 characters, are first decoded. If they have to be encoded, they
107 are converted to UTF-8 characters and are than translated into
108 (sequences of) escape sequences. Other escape sequences are
109 copied verbatim (but using upper case hex digits).
113 /** All escape sequences that are already present are copied verbatim
114 (but using upper case hex digits).
119 /** The way strings that represent (parts of) URIs are returned from get-
122 @descr Along with a DecodeMechanism parameter, the get-methods all
123 take an rtl_TextEncoding parameter, which is ignored unless the
124 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
126 enum class DecodeMechanism
128 /** The (part of the) URI is returned unchanged. Since URIs are
129 written using a subset of US-ASCII, the returned string is
130 guaranteed to contain only US-ASCII characters.
134 /** All sequences of escape sequences that represent UTF-8 coded
135 UTF-32 characters with a numerical value greater than 0x7F, are
136 replaced by the respective UTF-16 characters. All other escape
137 sequences are not decoded.
141 /** All (sequences of) escape sequences that represent characters from
142 the specified character set, and that can be converted to UTF-32,
143 are replaced by the respective UTF-16 characters. All other
144 escape sequences are not decoded.
148 /** All (sequences of) escape sequences that represent characters from
149 the specified character set, that can be converted to UTF-32, and
150 that (in the case of ASCII characters) can safely be decoded
151 without altering the meaning of the (part of the) URI, are
152 replaced by the respective UTF-16 characters. All other escape
153 sequences are not decoded.
158 // General Structure:
160 inline INetURLObject():
161 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
) {}
163 inline bool HasError() const { return m_eScheme
== INetProtocol::NotValid
; }
165 inline OUString
GetMainURL(DecodeMechanism eMechanism
,
166 rtl_TextEncoding eCharset
167 = RTL_TEXTENCODING_UTF8
) const
168 { return decode(m_aAbsURIRef
, eMechanism
, eCharset
); }
170 OUString
GetURLNoPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
171 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
174 OUString
GetURLNoMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
175 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
179 getAbbreviated(css::uno::Reference
< css::util::XStringWidth
> const & rStringWidth
,
181 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
182 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
185 bool operator ==(INetURLObject
const & rObject
) const;
187 inline bool operator !=(INetURLObject
const & rObject
) const
188 { return !(*this == rObject
); }
192 inline explicit INetURLObject(
193 OUString
const & rTheAbsURIRef
,
194 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
195 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
197 inline bool SetURL(OUString
const & rTheAbsURIRef
,
198 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
199 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
201 bool ConcatData(INetProtocol eTheScheme
, OUString
const & rTheUser
,
202 OUString
const & rThePassword
,
203 OUString
const & rTheHost
, sal_uInt32 nThePort
,
204 OUString
const & rThePath
);
208 /** The supported notations for file system paths.
212 /** VOS notation (e.g., "//server/dir/file").
216 /** Unix notation (e.g., "/dir/file").
220 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
224 /** Detect the used notation.
226 @descr For the following descriptions, please note that
227 whereas FSYS_DEFAULT includes all style bits, combinations of only
228 a few style bits are also possible, and are also described.
230 @descr When used to translate a file system path to a file URL,
231 the subset of the following productions for which the appropriate
232 style bit is set are checked in order (using the conventions of
233 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
236 Production T1 (VOS local; FSYS_VOS only):
241 Production T2 (VOS host; FSYS_VOS only):
242 "//" [host] ["/" *UCS4]
244 "file://" host "/" *UCS4
246 Production T3 (UNC; FSYS_DOS only):
247 "\\" [host] ["\" *UCS4]
249 "file://" host "/" *UCS4
250 replacing "\" by "/" within <*UCS4>
252 Production T4 (Unix-like DOS; FSYS_DOS only):
253 ALPHA ":" ["/" *UCS4]
255 "file:///" ALPHA ":/" *UCS4
256 replacing "\" by "/" within <*UCS4>
258 Production T5 (DOS; FSYS_DOS only):
259 ALPHA ":" ["\" *UCS4]
261 "file:///" ALPHA ":/" *UCS4
262 replacing "\" by "/" within <*UCS4>
268 replacing the delimiter by "/" within <*UCS4>. The delimiter is
269 that character from the set { "/", "\" } which appears most
270 often in <*UCS4> (if FSYS_UNX is not among the style bits, "/"
271 is removed from the set; if FSYS_DOS is not among the style
272 bits, "\" is removed from the set). If two or more
273 characters appear the same number of times, the character
274 mentioned first in that set is chosen. If the first character
275 of <*UCS4> is the delimiter, that character is not copied.
277 @descr When used to translate a file URL to a file system path,
278 the following productions are checked in order (using the
279 conventions of RFC 2234, RFC 2396, and RFC 2732):
281 Production F1 (VOS; FSYS_VOS):
282 "file://" host "/" fpath ["#" fragment]
286 Production F2 (DOS; FSYS_DOS):
287 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
289 ALPHA ":" ["\" fpath]
290 replacing "/" by "\" in <fpath>
292 Production F3 (Unix; FSYS_UNX):
293 "file:///" fpath ["#" fragment]
297 FSYS_DETECT
= FSYS_VOS
| FSYS_UNX
| FSYS_DOS
300 inline INetURLObject(OUString
const & rTheAbsURIRef
,
301 INetProtocol eTheSmartScheme
,
302 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
303 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
304 FSysStyle eStyle
= FSYS_DETECT
);
306 inline void SetSmartProtocol(INetProtocol eTheSmartScheme
)
307 { m_eSmartScheme
= eTheSmartScheme
; }
310 SetSmartURL(OUString
const & rTheAbsURIRef
,
311 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
312 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
313 FSysStyle eStyle
= FSYS_DETECT
);
316 smartRel2Abs(OUString
const & rTheRelURIRef
,
318 bool bIgnoreFragment
= false,
319 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
320 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
321 bool bRelativeNonURIs
= false,
322 FSysStyle eStyle
= FSYS_DETECT
) const;
327 GetNewAbsURL(OUString
const & rTheRelURIRef
,
328 INetURLObject
* pTheAbsURIRef
)
331 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
332 (because of syntactic reasons), either rTheRelURIRef or an empty
333 string is returned: If all of the parameters eEncodeMechanism,
334 eDecodeMechanism and eCharset have their respective default values,
335 then rTheRelURIRef is returned unmodified; otherwise, an empty string
339 GetAbsURL(OUString
const & rTheBaseURIRef
,
340 OUString
const & rTheRelURIRef
,
341 bool bIgnoreFragment
= false,
342 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
343 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
344 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
346 static inline OUString
347 GetRelURL(OUString
const & rTheBaseURIRef
,
348 OUString
const & rTheAbsURIRef
,
349 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
350 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
351 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
352 FSysStyle eStyle
= FSYS_DETECT
);
356 OUString
getExternalURL() const;
358 static inline bool translateToExternal(OUString
const & rTheIntURIRef
,
359 OUString
& rTheExtURIRef
,
360 DecodeMechanism eDecodeMechanism
361 = DecodeMechanism::ToIUri
,
362 rtl_TextEncoding eCharset
363 = RTL_TEXTENCODING_UTF8
);
365 static inline bool translateToInternal(OUString
const & rTheExtURIRef
,
366 OUString
& rTheIntURIRef
,
367 DecodeMechanism eDecodeMechanism
368 = DecodeMechanism::ToIUri
,
369 rtl_TextEncoding eCharset
370 = RTL_TEXTENCODING_UTF8
);
376 inline INetProtocol
GetProtocol() const { return m_eScheme
; }
378 bool isSchemeEqualTo(INetProtocol scheme
) const { return scheme
== m_eScheme
; }
380 bool isSchemeEqualTo(OUString
const & scheme
) const;
382 /** Check if the scheme is one of the WebDAV scheme
385 * @return true is one othe scheme either public scheme or private scheme.
387 bool isAnyKnownWebDAVScheme() const;
389 /** Return the URL 'prefix' for a given scheme.
391 @param eTheScheme One of the supported URL schemes.
393 @return The 'prefix' of URLs of the given scheme.
395 static OUString
GetScheme(INetProtocol eTheScheme
);
397 /** Return the a human-readable name for a given scheme.
399 @param eTheScheme One of the supported URL schemes.
401 @return The protocol name of URLs of the given scheme.
403 static OUString
GetSchemeName(INetProtocol eTheScheme
);
405 static INetProtocol
CompareProtocolScheme(OUString
const &
410 inline bool HasUserData() const { return m_aUser
.isPresent(); }
412 inline OUString
GetUser(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
413 rtl_TextEncoding eCharset
414 = RTL_TEXTENCODING_UTF8
) const
415 { return decode(m_aUser
, eMechanism
, eCharset
); }
417 inline OUString
GetPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
418 rtl_TextEncoding eCharset
419 = RTL_TEXTENCODING_UTF8
) const
420 { return decode(m_aAuth
, eMechanism
, eCharset
); }
422 inline bool SetUser(OUString
const & rTheUser
)
423 { return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
); }
425 inline bool SetPass(OUString
const & rThePassword
);
427 inline bool SetUserAndPass(OUString
const & rTheUser
,
428 OUString
const & rThePassword
);
432 inline bool HasPort() const { return m_aPort
.isPresent(); }
434 inline OUString
GetHost(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
435 rtl_TextEncoding eCharset
436 = RTL_TEXTENCODING_UTF8
) const
437 { return decode(m_aHost
, eMechanism
, eCharset
); }
439 OUString
GetHostPort(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
440 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
442 sal_uInt32
GetPort() const;
444 inline bool SetHost(OUString
const & rTheHost
)
445 { return setHost(rTheHost
, RTL_TEXTENCODING_UTF8
); }
447 bool SetPort(sal_uInt32 nThePort
);
451 inline bool HasURLPath() const { return !m_aPath
.isEmpty(); }
453 inline OUString
GetURLPath(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
454 rtl_TextEncoding eCharset
455 = RTL_TEXTENCODING_UTF8
) const
456 { return decode(m_aPath
, eMechanism
, eCharset
); }
458 inline bool SetURLPath(OUString
const & rThePath
,
459 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
460 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
461 { return setPath(rThePath
, eMechanism
, eCharset
); }
463 // Hierarchical Path:
465 /** A constant to address the last segment in various methods dealing with
468 @descr It is often more efficient to address the last segment using
469 this constant, than to determine its ordinal value using
472 enum { LAST_SEGMENT
= -1 };
474 /** The number of segments in the hierarchical path.
476 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
479 hierarchical-path = 1*("/" segment)
481 segment = name *(";" param)
483 name = [base ["." extension]]
487 extension = *<any pchar except ".">
491 @param bIgnoreFinalSlash If true, a final slash at the end of the
492 hierarchical path does not denote an empty segment, but is ignored.
494 @return The number of segments in the hierarchical path. If the path
495 is not hierarchical, 0 is returned.
497 sal_Int32
getSegmentCount(bool bIgnoreFinalSlash
= true) const;
499 /** Remove a segment from the hierarchical path.
501 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
502 if addressing the last segment.
504 @param bIgnoreFinalSlash If true, a final slash at the end of the
505 hierarchical path does not denote an empty segment, but is ignored.
507 @return True if the segment has successfully been removed (and the
508 resulting URI is still valid). If the path is not hierarchical, or
509 the specified segment does not exist, false is returned. If false is
510 returned, the object is not modified.
512 bool removeSegment(sal_Int32 nIndex
= LAST_SEGMENT
,
513 bool bIgnoreFinalSlash
= true);
515 /** Insert a new segment into the hierarchical path.
516 A final slash at the end of the
517 hierarchical path does not denote an empty segment, but is ignored.
519 @param rTheName The name part of the new segment. The new segment
520 will contain no parameters.
522 @param bAppendFinalSlash If the new segment is appended at the end of
523 the hierarchical path, this parameter specifies whether to add a final
524 slash after it or not.
526 @param nIndex The non-negative index of the segment before which
527 to insert the new segment. LAST_SEGMENT or an nIndex that equals
528 getSegmentCount() inserts the new segment at the end of the
531 @param eMechanism See the general discussion for set-methods.
533 @param eCharset See the general discussion for set-methods.
535 @return True if the segment has successfully been inserted (and the
536 resulting URI is still valid). If the path is not hierarchical, or
537 the specified place to insert the new segment does not exist, false is
538 returned. If false is returned, the object is not modified.
540 bool insertName(OUString
const & rTheName
,
541 bool bAppendFinalSlash
= false,
542 sal_Int32 nIndex
= LAST_SEGMENT
,
543 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
544 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
546 /** Get the name of a segment of the hierarchical path.
548 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
549 if addressing the last segment.
551 @param bIgnoreFinalSlash If true, a final slash at the end of the
552 hierarchical path does not denote an empty segment, but is ignored.
554 @param eMechanism See the general discussion for get-methods.
556 @param eCharset See the general discussion for get-methods.
558 @return The name part of the specified segment. If the path is not
559 hierarchical, or the specified segment does not exits, an empty string
562 OUString
getName(sal_Int32 nIndex
= LAST_SEGMENT
,
563 bool bIgnoreFinalSlash
= true,
564 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
565 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
568 /** Set the name of a segment (preserving any parameters and any query or
571 @param rTheName The new name.
573 @return True if the name has successfully been modified (and the
574 resulting URI is still valid). If the path is not hierarchical, or
575 the specified segment does not exist, false is returned. If false is
576 returned, the object is not modified.
578 bool setName(OUString
const & rTheName
);
580 /** Get the base of the name of a segment.
582 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
583 if addressing the last segment.
585 @param bIgnoreFinalSlash If true, a final slash at the end of the
586 hierarchical path does not denote an empty segment, but is ignored.
588 @param eMechanism See the general discussion for get-methods.
590 @param eCharset See the general discussion for get-methods.
592 @return The base part of the specified segment. If the path is
593 not hierarchical, or the specified segment does not exits, an empty
596 OUString
getBase(sal_Int32 nIndex
= LAST_SEGMENT
,
597 bool bIgnoreFinalSlash
= true,
598 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
599 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
602 /** Set the base of the name of a segment (preserving the extension).
603 A final slash at the end of the
604 hierarchical path does not denote an empty segment, but is ignored.
606 @param rTheBase The new base.
608 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
609 if addressing the last segment.
611 @param eMechanism See the general discussion for set-methods.
613 @param eCharset See the general discussion for set-methods.
615 @return True if the base has successfully been modified (and the
616 resulting URI is still valid). If the path is not hierarchical, or
617 the specified segment does not exist, false is returned. If false is
618 returned, the object is not modified.
620 bool setBase(OUString
const & rTheBase
,
621 sal_Int32 nIndex
= LAST_SEGMENT
,
622 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
623 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
625 /** Determine whether the name of the last segment has an extension.
627 @return True if the name of the specified segment has an extension.
628 If the path is not hierarchical, or the specified segment does not
629 exist, false is returned.
631 bool hasExtension() const;
633 /** Get the extension of the name of a segment.
635 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
636 if addressing the last segment.
638 @param bIgnoreFinalSlash If true, a final slash at the end of the
639 hierarchical path does not denote an empty segment, but is ignored.
641 @param eMechanism See the general discussion for get-methods.
643 @param eCharset See the general discussion for get-methods.
645 @return The extension part of the specified segment. If the path is
646 not hierarchical, or the specified segment does not exits, an empty
649 OUString
getExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
650 bool bIgnoreFinalSlash
= true,
651 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
652 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
655 /** Set the extension of the name of a segment (replacing an already
658 @param rTheExtension The new extension.
660 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
661 if addressing the last segment.
663 @param bIgnoreFinalSlash If true, a final slash at the end of the
664 hierarchical path does not denote an empty segment, but is ignored.
666 @param eCharset See the general discussion for set-methods.
668 @return True if the extension has successfully been modified (and the
669 resulting URI is still valid). If the path is not hierarchical, or
670 the specified segment does not exist, false is returned. If false is
671 returned, the object is not modified.
673 bool setExtension(OUString
const & rTheExtension
,
674 sal_Int32 nIndex
= LAST_SEGMENT
,
675 bool bIgnoreFinalSlash
= true,
676 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
678 /** Remove the extension of the name of a segment.
680 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
681 if addressing the last segment.
683 @param bIgnoreFinalSlash If true, a final slash at the end of the
684 hierarchical path does not denote an empty segment, but is ignored.
686 @return True if the extension has successfully been removed (and the
687 resulting URI is still valid), or if the name did not have an
688 extension. If the path is not hierarchical, or the specified segment
689 does not exist, false is returned. If false is returned, the object
692 bool removeExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
693 bool bIgnoreFinalSlash
= true);
695 /** Determine whether the hierarchical path ends in a final slash.
697 @return True if the hierarchical path ends in a final slash. If the
698 path is not hierarchical, false is returned.
700 bool hasFinalSlash() const;
702 /** Make the hierarchical path end in a final slash (if it does not
705 @return True if a final slash has successfully been appended (and the
706 resulting URI is still valid), or if the hierarchical path already
707 ended in a final slash. If the path is not hierarchical, false is
708 returned. If false is returned, the object is not modified.
710 bool setFinalSlash();
712 /** Remove a final slash from the hierarchical path.
714 @return True if a final slash has successfully been removed (and the
715 resulting URI is still valid), or if the hierarchical path already did
716 not end in a final slash. If the path is not hierarchical, false is
717 returned. If false is returned, the object is not modified.
719 bool removeFinalSlash();
723 inline bool HasParam() const { return m_aQuery
.isPresent(); }
725 inline OUString
GetParam(rtl_TextEncoding eCharset
726 = RTL_TEXTENCODING_UTF8
) const
727 { return decode(m_aQuery
, DecodeMechanism::NONE
, eCharset
); }
729 inline bool SetParam(OUString
const & rTheQuery
,
730 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
731 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
735 inline bool HasMark() const { return m_aFragment
.isPresent(); }
737 inline OUString
GetMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
738 rtl_TextEncoding eCharset
739 = RTL_TEXTENCODING_UTF8
) const
740 { return decode(m_aFragment
, eMechanism
, eCharset
); }
742 inline bool SetMark(OUString
const & rTheFragment
,
743 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
744 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
748 /** Create an INetURLObject from a file system path.
750 @param rFSysPath A file system path. An URL is not allowed here!
752 @param eStyle The notation of rFSysPath.
754 inline INetURLObject(OUString
const & rFSysPath
, FSysStyle eStyle
);
756 /** Set this INetURLObject to a file URL constructed from a file system
759 @param rFSysPath A file system path. An URL is not allowed here!
761 @param eStyle The notation of rFSysPath.
763 @return True if this INetURLObject has successfully been changed. If
764 false is returned, this INetURLObject has not been modified.
766 bool setFSysPath(OUString
const & rFSysPath
, FSysStyle eStyle
);
768 /** Return the file system path represented by a file URL (ignoring any
771 @param eStyle The notation of the returned file system path.
773 @param pDelimiter Upon successful return, this parameter can return
774 the character that is the 'main' delimiter within the returned file
775 system path (e.g., "/" for Unix, "\" for DOS). This is
776 especially useful for routines that later try to shorten the returned
777 file system path at a 'good' position, e.g. to fit it into some
778 limited display space.
780 @return The file system path represented by this file URL. If this
781 file URL does not represent a file system path according to the
782 specified notation, or if this is not a file URL at all, an empty
785 OUString
getFSysPath(FSysStyle eStyle
, sal_Unicode
* pDelimiter
= nullptr)
789 std::unique_ptr
<SvMemoryStream
> getData();
795 PART_USER_PASSWORD
= 0x00001,
796 PART_FPATH
= 0x00008,
797 PART_AUTHORITY
= 0x00010,
798 PART_REL_SEGMENT_EXTRA
= 0x00020,
800 PART_HTTP_PATH
= 0x00080,
801 PART_MESSAGE_ID_PATH
= 0x00100,
802 PART_MAILTO
= 0x00200,
803 PART_PATH_BEFORE_QUERY
= 0x00400,
804 PART_PCHAR
= 0x00800,
805 PART_VISIBLE
= 0x01000,
806 PART_VISIBLE_NONSPECIAL
= 0x02000,
807 PART_UNO_PARAM_VALUE
= 0x04000,
808 PART_UNAMBIGUOUS
= 0x08000,
809 PART_URIC_NO_SLASH
= 0x10000,
810 PART_HTTP_QUERY
= 0x20000, //TODO! unused?
820 /** Encode some text as part of a URI.
822 @param rText Some text (for its interpretation, see the general
823 discussion for set-methods).
825 @param ePart The part says which characters are 'forbidden' and must
826 be encoded (replaced by escape sequences). Characters outside the US-
827 ASCII range are always 'forbidden.'
829 @param eMechanism See the general discussion for set-methods.
831 @param eCharset See the general discussion for set-methods.
833 @return The text, encoded according to the given mechanism and
834 charset ('forbidden' characters replaced by escape sequences).
836 static inline OUString
encode(OUString
const & rText
, Part ePart
,
837 EncodeMechanism eMechanism
,
838 rtl_TextEncoding eCharset
839 = RTL_TEXTENCODING_UTF8
);
841 /** Decode some text.
843 @param rText Some (encoded) text.
845 @param eMechanism See the general discussion for get-methods.
847 @param eCharset See the general discussion for get-methods.
849 @return The text, decoded according to the given mechanism and
850 charset (escape sequences replaced by 'raw' characters).
852 static inline OUString
decode(OUString
const & rText
,
853 DecodeMechanism eMechanism
,
854 rtl_TextEncoding eCharset
855 = RTL_TEXTENCODING_UTF8
);
857 static inline OUString
decode(OUStringBuffer
const & rText
,
858 DecodeMechanism eMechanism
,
859 rtl_TextEncoding eCharset
860 = RTL_TEXTENCODING_UTF8
);
862 static void appendUCS4Escape(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
);
864 static void appendUCS4(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
,
865 EscapeType eEscapeType
, bool bOctets
, Part ePart
,
866 rtl_TextEncoding eCharset
, bool bKeepVisibleEscapes
);
868 static sal_uInt32
getUTF32(sal_Unicode
const *& rBegin
,
869 sal_Unicode
const * pEnd
, bool bOctets
,
870 EncodeMechanism eMechanism
,
871 rtl_TextEncoding eCharset
,
872 EscapeType
& rEscapeType
);
874 // Specialized helpers:
876 static sal_uInt32
scanDomain(sal_Unicode
const *& rBegin
,
877 sal_Unicode
const * pEnd
,
880 // OBSOLETE Hierarchical Path:
882 OUString
GetPartBeforeLastName() const;
884 /** Get the last segment in the path.
886 @param eMechanism See the general discussion for get-methods.
888 @param eCharset See the general discussion for get-methods.
890 @return For a hierarchical URL, the last segment (everything after
891 the last unencoded '/'). Not that this last segment may be empty. If
892 the URL is not hierarchical, an empty string is returned.
894 OUString
GetLastName(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
895 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
898 /** Get the 'extension' of the last segment in the path.
900 @return For a hierarchical URL, everything after the first unencoded
901 '.' in the last segment of the path. Note that this 'extension' may
902 be empty. If the URL is not hierarchical, or if the last segment does
903 not contain an unencoded '.', an empty string is returned.
905 OUString
GetFileExtension() const;
907 bool Append(OUString
const & rTheSegment
,
908 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
909 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
913 // OBSOLETE File URLs:
915 OUString
PathToFileName() const;
917 OUString
GetFull() const;
919 OUString
GetPath() const;
921 void SetBase(OUString
const & rTheBase
);
923 OUString
GetBase() const;
925 void SetName(OUString
const & rTheName
,
926 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
927 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
929 inline OUString
GetName(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
930 rtl_TextEncoding eCharset
931 = RTL_TEXTENCODING_UTF8
) const
932 { return GetLastName(eMechanism
, eCharset
); }
934 void SetExtension(OUString
const & rTheExtension
);
936 inline OUString
GetExtension() const
937 { return GetFileExtension(); }
939 OUString
CutExtension();
941 static bool IsCaseSensitive() { return true; }
945 // General Structure:
953 explicit inline SubString(sal_Int32 nTheBegin
= -1,
954 sal_Int32 nTheLength
= 0):
955 m_nBegin(nTheBegin
), m_nLength(nTheLength
) {}
957 inline bool isPresent() const { return m_nBegin
!= -1; }
959 inline bool isEmpty() const { return m_nLength
== 0; }
961 inline sal_Int32
getBegin() const { return m_nBegin
; }
963 inline sal_Int32
getLength() const { return m_nLength
; }
965 inline sal_Int32
getEnd() const { return m_nBegin
+ m_nLength
; }
967 inline sal_Int32
clear();
969 inline sal_Int32
set(OUStringBuffer
& rString
,
970 OUString
const & rSubString
,
971 sal_Int32 nTheBegin
);
973 inline sal_Int32
set(OUString
& rString
,
974 OUString
const & rSubString
);
976 inline sal_Int32
set(OUStringBuffer
& rString
,
977 OUString
const & rSubString
);
979 inline void operator +=(sal_Int32 nDelta
);
981 int compare(SubString
const & rOther
,
982 OUStringBuffer
const & rThisString
,
983 OUStringBuffer
const & rOtherString
) const;
986 OUStringBuffer m_aAbsURIRef
;
994 SubString m_aFragment
;
995 INetProtocol m_eScheme
;
996 INetProtocol m_eSmartScheme
;
998 TOOLS_DLLPRIVATE
void setInvalid();
1001 OUString
const & rTheAbsURIRef
,
1002 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
, bool bSmart
,
1007 bool convertRelToAbs(
1008 OUString
const & rTheRelURIRef
,
1009 INetURLObject
& rTheAbsURIRef
, bool & rWasAbsolute
,
1010 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1011 bool bIgnoreFragment
, bool bSmart
, bool bRelativeNonURIs
,
1012 FSysStyle eStyle
) const;
1014 bool convertAbsToRel(
1015 OUString
const & rTheAbsURIRef
,
1016 OUString
& rTheRelURIRef
, EncodeMechanism eEncodeMechanism
,
1017 DecodeMechanism eDecodeMechanism
, rtl_TextEncoding eCharset
,
1018 FSysStyle eStyle
) const;
1022 static bool convertIntToExt(
1023 OUString
const & rTheIntURIRef
, bool bOctets
,
1024 OUString
& rTheExtURIRef
, DecodeMechanism eDecodeMechanism
,
1025 rtl_TextEncoding eCharset
);
1027 static bool convertExtToInt(
1028 OUString
const & rTheExtURIRef
, bool bOctets
,
1029 OUString
& rTheIntURIRef
, DecodeMechanism eDecodeMechanism
,
1030 rtl_TextEncoding eCharset
);
1036 TOOLS_DLLPRIVATE
static inline SchemeInfo
const & getSchemeInfo(
1037 INetProtocol eTheScheme
);
1039 TOOLS_DLLPRIVATE
inline SchemeInfo
const & getSchemeInfo() const;
1041 TOOLS_DLLPRIVATE
static PrefixInfo
const * getPrefix(
1042 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1046 TOOLS_DLLPRIVATE sal_Int32
getAuthorityBegin() const;
1048 TOOLS_DLLPRIVATE SubString
getAuthority() const;
1053 OUString
const & rTheUser
,
1054 rtl_TextEncoding eCharset
);
1056 bool clearPassword();
1059 OUString
const & rThePassword
,
1060 rtl_TextEncoding eCharset
);
1064 TOOLS_DLLPRIVATE
static bool parseHost(
1065 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
,
1066 OUString
& rCanonic
);
1068 TOOLS_DLLPRIVATE
static bool parseHostOrNetBiosName(
1069 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
, bool bOctets
,
1070 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1071 bool bNetBiosName
, OUStringBuffer
* pCanonic
);
1074 OUString
const & rTheHost
,
1075 rtl_TextEncoding eCharset
);
1079 TOOLS_DLLPRIVATE
static bool parsePath(
1080 INetProtocol eScheme
, sal_Unicode
const ** pBegin
,
1081 sal_Unicode
const * pEnd
, bool bOctets
, EncodeMechanism eMechanism
,
1082 rtl_TextEncoding eCharset
, bool bSkippedInitialSlash
,
1083 sal_uInt32 nSegmentDelimiter
, sal_uInt32 nAltSegmentDelimiter
,
1084 sal_uInt32 nQueryDelimiter
, sal_uInt32 nFragmentDelimiter
,
1085 OUStringBuffer
&rSynPath
);
1088 OUString
const & rThePath
,
1089 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1091 // Hierarchical Path:
1093 TOOLS_DLLPRIVATE
bool checkHierarchical() const;
1095 TOOLS_DLLPRIVATE SubString
getSegment(
1096 sal_Int32 nIndex
, bool bIgnoreFinalSlash
) const;
1103 OUString
const & rTheQuery
,
1104 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1108 bool clearFragment();
1111 OUString
const & rTheMark
,
1112 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1116 TOOLS_DLLPRIVATE
bool hasDosVolume(FSysStyle eStyle
) const;
1120 TOOLS_DLLPRIVATE
static inline void appendEscape(
1121 OUStringBuffer
& rTheText
, sal_uInt32 nOctet
);
1123 static OUString
encodeText(
1124 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
, bool bOctets
,
1125 Part ePart
, EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1126 bool bKeepVisibleEscapes
);
1128 static inline OUString
encodeText(
1129 OUString
const & rTheText
, bool bOctets
, Part ePart
,
1130 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1131 bool bKeepVisibleEscapes
);
1133 static OUString
decode(
1134 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1135 DecodeMechanism
, rtl_TextEncoding eCharset
);
1137 inline OUString
decode(
1138 SubString
const & rSubString
,
1139 DecodeMechanism eMechanism
, rtl_TextEncoding eCharset
) const;
1141 // Specialized helpers:
1143 TOOLS_DLLPRIVATE
static bool scanIPv6reference(
1144 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1147 void changeScheme(INetProtocol eTargetScheme
);
1151 inline OUString
INetURLObject::encodeText(OUString
const & rTheText
,
1152 bool bOctets
, Part ePart
,
1153 EncodeMechanism eMechanism
,
1154 rtl_TextEncoding eCharset
,
1155 bool bKeepVisibleEscapes
)
1157 return encodeText(rTheText
.getStr(),
1158 rTheText
.getStr() + rTheText
.getLength(), bOctets
, ePart
,
1159 eMechanism
, eCharset
, bKeepVisibleEscapes
);
1162 inline OUString
INetURLObject::decode(SubString
const & rSubString
,
1163 DecodeMechanism eMechanism
,
1164 rtl_TextEncoding eCharset
) const
1166 return rSubString
.isPresent() ?
1167 decode(m_aAbsURIRef
.getStr() + rSubString
.getBegin(),
1168 m_aAbsURIRef
.getStr() + rSubString
.getEnd(),
1169 eMechanism
, eCharset
) :
1173 inline INetURLObject::INetURLObject(OUString
const & rTheAbsURIRef
,
1174 EncodeMechanism eMechanism
,
1175 rtl_TextEncoding eCharset
):
1176 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
)
1178 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1182 inline bool INetURLObject::SetURL(OUString
const & rTheAbsURIRef
,
1183 EncodeMechanism eMechanism
,
1184 rtl_TextEncoding eCharset
)
1186 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1190 inline INetURLObject::INetURLObject(OUString
const & rTheAbsURIRef
,
1191 INetProtocol eTheSmartScheme
,
1192 EncodeMechanism eMechanism
,
1193 rtl_TextEncoding eCharset
,
1195 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(eTheSmartScheme
)
1197 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true, eStyle
);
1200 inline bool INetURLObject::SetSmartURL(OUString
const & rTheAbsURIRef
,
1201 EncodeMechanism eMechanism
,
1202 rtl_TextEncoding eCharset
,
1205 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true,
1209 inline INetURLObject
1210 INetURLObject::smartRel2Abs(OUString
const & rTheRelURIRef
,
1211 bool & rWasAbsolute
,
1212 bool bIgnoreFragment
,
1213 EncodeMechanism eMechanism
,
1214 rtl_TextEncoding eCharset
,
1215 bool bRelativeNonURIs
,
1216 FSysStyle eStyle
) const
1218 INetURLObject aTheAbsURIRef
;
1219 convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, rWasAbsolute
,
1220 eMechanism
, eCharset
, bIgnoreFragment
, true,
1221 bRelativeNonURIs
, eStyle
);
1222 return aTheAbsURIRef
;
1225 inline bool INetURLObject::GetNewAbsURL(OUString
const & rTheRelURIRef
,
1226 INetURLObject
* pTheAbsURIRef
)
1229 INetURLObject aTheAbsURIRef
;
1231 if (!convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, bWasAbsolute
,
1232 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false/*bIgnoreFragment*/, false, false,
1236 *pTheAbsURIRef
= aTheAbsURIRef
;
1241 inline OUString
INetURLObject::GetRelURL(OUString
const & rTheBaseURIRef
,
1242 OUString
const & rTheAbsURIRef
,
1243 EncodeMechanism eEncodeMechanism
,
1244 DecodeMechanism eDecodeMechanism
,
1245 rtl_TextEncoding eCharset
,
1248 OUString aTheRelURIRef
;
1249 INetURLObject(rTheBaseURIRef
, eEncodeMechanism
, eCharset
).
1250 convertAbsToRel(rTheAbsURIRef
, aTheRelURIRef
, eEncodeMechanism
,
1251 eDecodeMechanism
, eCharset
, eStyle
);
1252 return aTheRelURIRef
;
1256 inline bool INetURLObject::translateToExternal(OUString
const &
1258 OUString
& rTheExtURIRef
,
1261 rtl_TextEncoding eCharset
)
1263 return convertIntToExt(rTheIntURIRef
, false, rTheExtURIRef
,
1264 eDecodeMechanism
, eCharset
);
1268 inline bool INetURLObject::translateToInternal(OUString
const &
1270 OUString
& rTheIntURIRef
,
1273 rtl_TextEncoding eCharset
)
1275 return convertExtToInt(rTheExtURIRef
, false, rTheIntURIRef
,
1276 eDecodeMechanism
, eCharset
);
1279 inline bool INetURLObject::SetPass(OUString
const & rThePassword
)
1281 return rThePassword
.isEmpty() ?
1283 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
);
1286 inline bool INetURLObject::SetUserAndPass(OUString
const & rTheUser
,
1287 OUString
const & rThePassword
)
1289 return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
)
1290 && (rThePassword
.isEmpty() ?
1292 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
));
1295 inline bool INetURLObject::SetParam(OUString
const & rTheQuery
,
1296 EncodeMechanism eMechanism
,
1297 rtl_TextEncoding eCharset
)
1299 return rTheQuery
.isEmpty() ?
1301 setQuery(rTheQuery
, eMechanism
, eCharset
);
1304 inline bool INetURLObject::SetMark(OUString
const & rTheFragment
,
1305 EncodeMechanism eMechanism
,
1306 rtl_TextEncoding eCharset
)
1308 return rTheFragment
.isEmpty() ?
1310 setFragment(rTheFragment
, eMechanism
, eCharset
);
1313 inline INetURLObject::INetURLObject(OUString
const & rFSysPath
,
1315 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
)
1317 setFSysPath(rFSysPath
, eStyle
);
1321 inline OUString
INetURLObject::encode(OUString
const & rText
, Part ePart
,
1322 EncodeMechanism eMechanism
,
1323 rtl_TextEncoding eCharset
)
1325 return encodeText(rText
, false, ePart
, eMechanism
, eCharset
, false);
1329 inline OUString
INetURLObject::decode(OUString
const & rText
,
1330 DecodeMechanism eMechanism
,
1331 rtl_TextEncoding eCharset
)
1333 return decode(rText
.getStr(), rText
.getStr() + rText
.getLength(),
1334 eMechanism
, eCharset
);
1337 inline OUString
INetURLObject::decode(OUStringBuffer
const & rText
,
1338 DecodeMechanism eMechanism
,
1339 rtl_TextEncoding eCharset
)
1341 return decode(rText
.getStr(), rText
.getStr() + rText
.getLength(),
1342 eMechanism
, eCharset
);
1347 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */