1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <com/sun/star/uno/Reference.hxx>
24 #include <rtl/string.h>
25 #include <rtl/ustrbuf.hxx>
26 #include <rtl/textenc.h>
27 #include <sal/types.h>
28 #include <o3tl/typed_flags_set.hxx>
34 namespace com
{ namespace sun
{ namespace star
{ namespace util
{
38 // Common URL prefixes for various schemes:
39 #define INET_FTP_SCHEME "ftp://"
40 #define INET_HTTP_SCHEME "http://"
41 #define INET_HTTPS_SCHEME "https://"
42 #define INET_FILE_SCHEME "file://"
43 #define INET_MAILTO_SCHEME "mailto:"
44 #define INET_HID_SCHEME "hid:"
46 #define URL_PREFIX_PRIV_SOFFICE "private:"
49 enum class INetProtocol
83 /** The supported notations for file system paths.
87 /** VOS notation (e.g., "//server/dir/file").
91 /** Unix notation (e.g., "/dir/file").
95 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
99 /** Detect the used notation.
101 @descr For the following descriptions, please note that
102 whereas FSYS_DEFAULT includes all style bits, combinations of only
103 a few style bits are also possible, and are also described.
105 @descr When used to translate a file system path to a file URL,
106 the subset of the following productions for which the appropriate
107 style bit is set are checked in order (using the conventions of
108 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
111 Production T1 (VOS local; FSysStyle::Vos only):
116 Production T2 (VOS host; FSysStyle::Vos only):
117 "//" [host] ["/" *UCS4]
119 "file://" host "/" *UCS4
121 Production T3 (UNC; FSysStyle::Dos only):
122 "\\" [host] ["\" *UCS4]
124 "file://" host "/" *UCS4
125 replacing "\" by "/" within <*UCS4>
127 Production T4 (Unix-like DOS; FSysStyle::Dos only):
128 ALPHA ":" ["/" *UCS4]
130 "file:///" ALPHA ":/" *UCS4
131 replacing "\" by "/" within <*UCS4>
133 Production T5 (DOS; FSysStyle::Dos only):
134 ALPHA ":" ["\" *UCS4]
136 "file:///" ALPHA ":/" *UCS4
137 replacing "\" by "/" within <*UCS4>
143 replacing the delimiter by "/" within <*UCS4>. The delimiter is
144 that character from the set { "/", "\" } which appears most
145 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
146 is removed from the set; if FSysStyle::Dos is not among the style
147 bits, "\" is removed from the set). If two or more
148 characters appear the same number of times, the character
149 mentioned first in that set is chosen. If the first character
150 of <*UCS4> is the delimiter, that character is not copied.
152 @descr When used to translate a file URL to a file system path,
153 the following productions are checked in order (using the
154 conventions of RFC 2234, RFC 2396, and RFC 2732):
156 Production F1 (VOS; FSysStyle::Vos):
157 "file://" host "/" fpath ["#" fragment]
161 Production F2 (DOS; FSysStyle::Dos):
162 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
164 ALPHA ":" ["\" fpath]
165 replacing "/" by "\" in <fpath>
167 Production F3 (Unix; FSysStyle::Unix):
168 "file:///" fpath ["#" fragment]
172 Detect
= Vos
| Unix
| Dos
175 template<> struct typed_flags
<FSysStyle
> : is_typed_flags
<FSysStyle
, 0x07> {};
178 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
181 // Get- and Set-Methods:
183 /** The way input strings that represent (parts of) URIs are interpreted
186 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
187 sequences of escape sequences, representing the UTF-8 coded characters.
189 @descr Along with an EncodeMechanism parameter, the set-methods all
190 take an rtl_TextEncoding parameter, which is ignored unless the
191 EncodeMechanism is EncodeMechanism::WasEncoded.
193 enum class EncodeMechanism
195 /** All escape sequences that are already present are ignored, and are
196 interpreted as literal sequences of three characters.
200 /** Sequences of escape sequences, that represent characters from the
201 specified character set and that can be converted to UTF-32
202 characters, are first decoded. If they have to be encoded, they
203 are converted to UTF-8 characters and are than translated into
204 (sequences of) escape sequences. Other escape sequences are
205 copied verbatim (but using upper case hex digits).
209 /** All escape sequences that are already present are copied verbatim
210 (but using upper case hex digits).
215 /** The way strings that represent (parts of) URIs are returned from get-
218 @descr Along with a DecodeMechanism parameter, the get-methods all
219 take an rtl_TextEncoding parameter, which is ignored unless the
220 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
222 enum class DecodeMechanism
224 /** The (part of the) URI is returned unchanged. Since URIs are
225 written using a subset of US-ASCII, the returned string is
226 guaranteed to contain only US-ASCII characters.
230 /** All sequences of escape sequences that represent UTF-8 coded
231 UTF-32 characters with a numerical value greater than 0x7F, are
232 replaced by the respective UTF-16 characters. All other escape
233 sequences are not decoded.
237 /** All (sequences of) escape sequences that represent characters from
238 the specified character set, and that can be converted to UTF-32,
239 are replaced by the respective UTF-16 characters. All other
240 escape sequences are not decoded.
244 /** All (sequences of) escape sequences that represent characters from
245 the specified character set, that can be converted to UTF-32, and
246 that (in the case of ASCII characters) can safely be decoded
247 without altering the meaning of the (part of the) URI, are
248 replaced by the respective UTF-16 characters. All other escape
249 sequences are not decoded.
254 // General Structure:
257 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
) {}
259 bool HasError() const { return m_eScheme
== INetProtocol::NotValid
; }
261 OUString
GetMainURL(DecodeMechanism eMechanism
,
262 rtl_TextEncoding eCharset
263 = RTL_TEXTENCODING_UTF8
) const
264 { return decode(m_aAbsURIRef
, eMechanism
, eCharset
); }
266 OUString
GetURLNoPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
267 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
270 OUString
GetURLNoMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
271 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
275 getAbbreviated(css::uno::Reference
< css::util::XStringWidth
> const & rStringWidth
,
277 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
278 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
281 bool operator ==(INetURLObject
const & rObject
) const;
283 bool operator !=(INetURLObject
const & rObject
) const
284 { return !(*this == rObject
); }
288 inline explicit INetURLObject(
289 OUString
const & rTheAbsURIRef
,
290 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
291 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
293 inline bool SetURL(OUString
const & rTheAbsURIRef
,
294 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
295 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
297 bool ConcatData(INetProtocol eTheScheme
, OUString
const & rTheUser
,
298 OUString
const & rThePassword
,
299 OUString
const & rTheHost
, sal_uInt32 nThePort
,
300 OUString
const & rThePath
);
304 inline INetURLObject(OUString
const & rTheAbsURIRef
,
305 INetProtocol eTheSmartScheme
,
306 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
307 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
308 FSysStyle eStyle
= FSysStyle::Detect
);
310 void SetSmartProtocol(INetProtocol eTheSmartScheme
)
311 { m_eSmartScheme
= eTheSmartScheme
; }
314 SetSmartURL(OUString
const & rTheAbsURIRef
,
315 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
316 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
317 FSysStyle eStyle
= FSysStyle::Detect
);
320 smartRel2Abs(OUString
const & rTheRelURIRef
,
322 bool bIgnoreFragment
= false,
323 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
324 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
325 bool bRelativeNonURIs
= false,
326 FSysStyle eStyle
= FSysStyle::Detect
) const;
331 GetNewAbsURL(OUString
const & rTheRelURIRef
,
332 INetURLObject
* pTheAbsURIRef
)
335 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
336 (because of syntactic reasons), either rTheRelURIRef or an empty
337 string is returned: If all of the parameters eEncodeMechanism,
338 eDecodeMechanism and eCharset have their respective default values,
339 then rTheRelURIRef is returned unmodified; otherwise, an empty string
343 GetAbsURL(OUString
const & rTheBaseURIRef
,
344 OUString
const & rTheRelURIRef
,
345 bool bIgnoreFragment
= false,
346 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
347 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
348 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
350 static inline OUString
351 GetRelURL(OUString
const & rTheBaseURIRef
,
352 OUString
const & rTheAbsURIRef
,
353 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
354 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
355 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
356 FSysStyle eStyle
= FSysStyle::Detect
);
360 OUString
getExternalURL() const;
362 static inline bool translateToExternal(OUString
const & rTheIntURIRef
,
363 OUString
& rTheExtURIRef
,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri
,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8
);
369 static inline bool translateToInternal(OUString
const & rTheExtURIRef
,
370 OUString
& rTheIntURIRef
,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri
,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8
);
380 INetProtocol
GetProtocol() const { return m_eScheme
; }
382 bool isSchemeEqualTo(INetProtocol scheme
) const { return scheme
== m_eScheme
; }
384 bool isSchemeEqualTo(OUString
const & scheme
) const;
386 /** Check if the scheme is one of the WebDAV scheme
389 * @return true is one othe scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static OUString
GetScheme(INetProtocol eTheScheme
);
401 /** Return the a human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static OUString
GetSchemeName(INetProtocol eTheScheme
);
409 static INetProtocol
CompareProtocolScheme(OUString
const &
414 bool HasUserData() const { return m_aUser
.isPresent(); }
416 OUString
GetUser(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
417 rtl_TextEncoding eCharset
418 = RTL_TEXTENCODING_UTF8
) const
419 { return decode(m_aUser
, eMechanism
, eCharset
); }
421 OUString
GetPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
422 rtl_TextEncoding eCharset
423 = RTL_TEXTENCODING_UTF8
) const
424 { return decode(m_aAuth
, eMechanism
, eCharset
); }
426 bool SetUser(OUString
const & rTheUser
)
427 { return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
); }
429 inline bool SetPass(OUString
const & rThePassword
);
431 inline bool SetUserAndPass(OUString
const & rTheUser
,
432 OUString
const & rThePassword
);
436 bool HasPort() const { return m_aPort
.isPresent(); }
438 OUString
GetHost(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
439 rtl_TextEncoding eCharset
440 = RTL_TEXTENCODING_UTF8
) const
441 { return decode(m_aHost
, eMechanism
, eCharset
); }
443 OUString
GetHostPort(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
444 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
446 sal_uInt32
GetPort() const;
448 bool SetHost(OUString
const & rTheHost
)
449 { return setHost(rTheHost
, RTL_TEXTENCODING_UTF8
); }
451 bool SetPort(sal_uInt32 nThePort
);
455 bool HasURLPath() const { return !m_aPath
.isEmpty(); }
457 OUString
GetURLPath(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
458 rtl_TextEncoding eCharset
459 = RTL_TEXTENCODING_UTF8
) const
460 { return decode(m_aPath
, eMechanism
, eCharset
); }
462 bool SetURLPath(OUString
const & rThePath
,
463 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
464 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
465 { return setPath(rThePath
, eMechanism
, eCharset
); }
467 // Hierarchical Path:
469 /** A constant to address the last segment in various methods dealing with
472 @descr It is often more efficient to address the last segment using
473 this constant, than to determine its ordinal value using
476 enum { LAST_SEGMENT
= -1 };
478 /** The number of segments in the hierarchical path.
480 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
483 hierarchical-path = 1*("/" segment)
485 segment = name *(";" param)
487 name = [base ["." extension]]
491 extension = *<any pchar except ".">
495 @param bIgnoreFinalSlash If true, a final slash at the end of the
496 hierarchical path does not denote an empty segment, but is ignored.
498 @return The number of segments in the hierarchical path. If the path
499 is not hierarchical, 0 is returned.
501 sal_Int32
getSegmentCount(bool bIgnoreFinalSlash
= true) const;
503 /** Remove a segment from the hierarchical path.
505 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
506 if addressing the last segment.
508 @param bIgnoreFinalSlash If true, a final slash at the end of the
509 hierarchical path does not denote an empty segment, but is ignored.
511 @return True if the segment has successfully been removed (and the
512 resulting URI is still valid). If the path is not hierarchical, or
513 the specified segment does not exist, false is returned. If false is
514 returned, the object is not modified.
516 bool removeSegment(sal_Int32 nIndex
= LAST_SEGMENT
,
517 bool bIgnoreFinalSlash
= true);
519 /** Insert a new segment into the hierarchical path.
520 A final slash at the end of the
521 hierarchical path does not denote an empty segment, but is ignored.
523 @param rTheName The name part of the new segment. The new segment
524 will contain no parameters.
526 @param bAppendFinalSlash If the new segment is appended at the end of
527 the hierarchical path, this parameter specifies whether to add a final
528 slash after it or not.
530 @param nIndex The non-negative index of the segment before which
531 to insert the new segment. LAST_SEGMENT or an nIndex that equals
532 getSegmentCount() inserts the new segment at the end of the
535 @param eMechanism See the general discussion for set-methods.
537 @param eCharset See the general discussion for set-methods.
539 @return True if the segment has successfully been inserted (and the
540 resulting URI is still valid). If the path is not hierarchical, or
541 the specified place to insert the new segment does not exist, false is
542 returned. If false is returned, the object is not modified.
544 bool insertName(OUString
const & rTheName
,
545 bool bAppendFinalSlash
= false,
546 sal_Int32 nIndex
= LAST_SEGMENT
,
547 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
548 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
550 /** Get the name of a segment of the hierarchical path.
552 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
553 if addressing the last segment.
555 @param bIgnoreFinalSlash If true, a final slash at the end of the
556 hierarchical path does not denote an empty segment, but is ignored.
558 @param eMechanism See the general discussion for get-methods.
560 @param eCharset See the general discussion for get-methods.
562 @return The name part of the specified segment. If the path is not
563 hierarchical, or the specified segment does not exits, an empty string
566 OUString
getName(sal_Int32 nIndex
= LAST_SEGMENT
,
567 bool bIgnoreFinalSlash
= true,
568 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
569 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
572 /** Set the name of a segment (preserving any parameters and any query or
575 @param rTheName The new name.
577 @return True if the name has successfully been modified (and the
578 resulting URI is still valid). If the path is not hierarchical, or
579 the specified segment does not exist, false is returned. If false is
580 returned, the object is not modified.
582 bool setName(OUString
const & rTheName
);
584 /** Get the base of the name of a segment.
586 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
587 if addressing the last segment.
589 @param bIgnoreFinalSlash If true, a final slash at the end of the
590 hierarchical path does not denote an empty segment, but is ignored.
592 @param eMechanism See the general discussion for get-methods.
594 @param eCharset See the general discussion for get-methods.
596 @return The base part of the specified segment. If the path is
597 not hierarchical, or the specified segment does not exits, an empty
600 OUString
getBase(sal_Int32 nIndex
= LAST_SEGMENT
,
601 bool bIgnoreFinalSlash
= true,
602 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
603 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
606 /** Set the base of the name of a segment (preserving the extension).
607 A final slash at the end of the
608 hierarchical path does not denote an empty segment, but is ignored.
610 @param rTheBase The new base.
612 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
613 if addressing the last segment.
615 @param eMechanism See the general discussion for set-methods.
617 @param eCharset See the general discussion for set-methods.
619 @return True if the base has successfully been modified (and the
620 resulting URI is still valid). If the path is not hierarchical, or
621 the specified segment does not exist, false is returned. If false is
622 returned, the object is not modified.
624 bool setBase(OUString
const & rTheBase
,
625 sal_Int32 nIndex
= LAST_SEGMENT
,
626 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
627 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
629 /** Determine whether the name of the last segment has an extension.
631 @return True if the name of the specified segment has an extension.
632 If the path is not hierarchical, or the specified segment does not
633 exist, false is returned.
635 bool hasExtension() const;
637 /** Get the extension of the name of a segment.
639 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
640 if addressing the last segment.
642 @param bIgnoreFinalSlash If true, a final slash at the end of the
643 hierarchical path does not denote an empty segment, but is ignored.
645 @param eMechanism See the general discussion for get-methods.
647 @param eCharset See the general discussion for get-methods.
649 @return The extension part of the specified segment. If the path is
650 not hierarchical, or the specified segment does not exits, an empty
653 OUString
getExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
654 bool bIgnoreFinalSlash
= true,
655 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
656 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
659 /** Set the extension of the name of a segment (replacing an already
662 @param rTheExtension The new extension.
664 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
665 if addressing the last segment.
667 @param bIgnoreFinalSlash If true, a final slash at the end of the
668 hierarchical path does not denote an empty segment, but is ignored.
670 @param eCharset See the general discussion for set-methods.
672 @return True if the extension has successfully been modified (and the
673 resulting URI is still valid). If the path is not hierarchical, or
674 the specified segment does not exist, false is returned. If false is
675 returned, the object is not modified.
677 bool setExtension(OUString
const & rTheExtension
,
678 sal_Int32 nIndex
= LAST_SEGMENT
,
679 bool bIgnoreFinalSlash
= true,
680 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
682 /** Remove the extension of the name of a segment.
684 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
685 if addressing the last segment.
687 @param bIgnoreFinalSlash If true, a final slash at the end of the
688 hierarchical path does not denote an empty segment, but is ignored.
690 @return True if the extension has successfully been removed (and the
691 resulting URI is still valid), or if the name did not have an
692 extension. If the path is not hierarchical, or the specified segment
693 does not exist, false is returned. If false is returned, the object
696 bool removeExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
697 bool bIgnoreFinalSlash
= true);
699 /** Determine whether the hierarchical path ends in a final slash.
701 @return True if the hierarchical path ends in a final slash. If the
702 path is not hierarchical, false is returned.
704 bool hasFinalSlash() const;
706 /** Make the hierarchical path end in a final slash (if it does not
709 @return True if a final slash has successfully been appended (and the
710 resulting URI is still valid), or if the hierarchical path already
711 ended in a final slash. If the path is not hierarchical, false is
712 returned. If false is returned, the object is not modified.
714 bool setFinalSlash();
716 /** Remove a final slash from the hierarchical path.
718 @return True if a final slash has successfully been removed (and the
719 resulting URI is still valid), or if the hierarchical path already did
720 not end in a final slash. If the path is not hierarchical, false is
721 returned. If false is returned, the object is not modified.
723 bool removeFinalSlash();
727 bool HasParam() const { return m_aQuery
.isPresent(); }
729 OUString
GetParam(rtl_TextEncoding eCharset
730 = RTL_TEXTENCODING_UTF8
) const
731 { return decode(m_aQuery
, DecodeMechanism::NONE
, eCharset
); }
733 inline bool SetParam(OUString
const & rTheQuery
,
734 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
735 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
739 bool HasMark() const { return m_aFragment
.isPresent(); }
741 OUString
GetMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
742 rtl_TextEncoding eCharset
743 = RTL_TEXTENCODING_UTF8
) const
744 { return decode(m_aFragment
, eMechanism
, eCharset
); }
746 inline bool SetMark(OUString
const & rTheFragment
,
747 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
748 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
752 /** Create an INetURLObject from a file system path.
754 @param rFSysPath A file system path. An URL is not allowed here!
756 @param eStyle The notation of rFSysPath.
758 inline INetURLObject(OUString
const & rFSysPath
, FSysStyle eStyle
);
760 /** Set this INetURLObject to a file URL constructed from a file system
763 @param rFSysPath A file system path. An URL is not allowed here!
765 @param eStyle The notation of rFSysPath.
767 @return True if this INetURLObject has successfully been changed. If
768 false is returned, this INetURLObject has not been modified.
770 bool setFSysPath(OUString
const & rFSysPath
, FSysStyle eStyle
);
772 /** Return the file system path represented by a file URL (ignoring any
775 @param eStyle The notation of the returned file system path.
777 @param pDelimiter Upon successful return, this parameter can return
778 the character that is the 'main' delimiter within the returned file
779 system path (e.g., "/" for Unix, "\" for DOS). This is
780 especially useful for routines that later try to shorten the returned
781 file system path at a 'good' position, e.g. to fit it into some
782 limited display space.
784 @return The file system path represented by this file URL. If this
785 file URL does not represent a file system path according to the
786 specified notation, or if this is not a file URL at all, an empty
789 OUString
getFSysPath(FSysStyle eStyle
, sal_Unicode
* pDelimiter
= nullptr)
793 std::unique_ptr
<SvMemoryStream
> getData();
799 PART_USER_PASSWORD
= 0x00001,
800 PART_FPATH
= 0x00008,
801 PART_AUTHORITY
= 0x00010,
802 PART_REL_SEGMENT_EXTRA
= 0x00020,
804 PART_HTTP_PATH
= 0x00080,
805 PART_MESSAGE_ID_PATH
= 0x00100,
806 PART_MAILTO
= 0x00200,
807 PART_PATH_BEFORE_QUERY
= 0x00400,
808 PART_PCHAR
= 0x00800,
809 PART_VISIBLE
= 0x01000,
810 PART_VISIBLE_NONSPECIAL
= 0x02000,
811 PART_UNO_PARAM_VALUE
= 0x04000,
812 PART_UNAMBIGUOUS
= 0x08000,
813 PART_URIC_NO_SLASH
= 0x10000,
814 PART_HTTP_QUERY
= 0x20000, //TODO! unused?
817 enum class EscapeType
824 /** Encode some text as part of a URI.
826 @param rText Some text (for its interpretation, see the general
827 discussion for set-methods).
829 @param ePart The part says which characters are 'forbidden' and must
830 be encoded (replaced by escape sequences). Characters outside the US-
831 ASCII range are always 'forbidden.'
833 @param eMechanism See the general discussion for set-methods.
835 @param eCharset See the general discussion for set-methods.
837 @return The text, encoded according to the given mechanism and
838 charset ('forbidden' characters replaced by escape sequences).
840 static inline OUString
encode(OUString
const & rText
, Part ePart
,
841 EncodeMechanism eMechanism
,
842 rtl_TextEncoding eCharset
843 = RTL_TEXTENCODING_UTF8
);
845 /** Decode some text.
847 @param rText Some (encoded) text.
849 @param eMechanism See the general discussion for get-methods.
851 @param eCharset See the general discussion for get-methods.
853 @return The text, decoded according to the given mechanism and
854 charset (escape sequences replaced by 'raw' characters).
856 static inline OUString
decode(OUString
const & rText
,
857 DecodeMechanism eMechanism
,
858 rtl_TextEncoding eCharset
859 = RTL_TEXTENCODING_UTF8
);
861 static inline OUString
decode(OUStringBuffer
const & rText
,
862 DecodeMechanism eMechanism
,
863 rtl_TextEncoding eCharset
864 = RTL_TEXTENCODING_UTF8
);
866 static void appendUCS4Escape(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
);
868 static void appendUCS4(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
,
869 EscapeType eEscapeType
, bool bOctets
, Part ePart
,
870 rtl_TextEncoding eCharset
, bool bKeepVisibleEscapes
);
872 static sal_uInt32
getUTF32(sal_Unicode
const *& rBegin
,
873 sal_Unicode
const * pEnd
, bool bOctets
,
874 EncodeMechanism eMechanism
,
875 rtl_TextEncoding eCharset
,
876 EscapeType
& rEscapeType
);
878 // Specialized helpers:
880 static sal_uInt32
scanDomain(sal_Unicode
const *& rBegin
,
881 sal_Unicode
const * pEnd
,
884 // OBSOLETE Hierarchical Path:
886 OUString
GetPartBeforeLastName() const;
888 /** Get the last segment in the path.
890 @param eMechanism See the general discussion for get-methods.
892 @param eCharset See the general discussion for get-methods.
894 @return For a hierarchical URL, the last segment (everything after
895 the last unencoded '/'). Note that this last segment may be empty. If
896 the URL is not hierarchical, an empty string is returned.
898 OUString
GetLastName(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
899 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
902 /** Get the 'extension' of the last segment in the path.
904 @return For a hierarchical URL, everything after the first unencoded
905 '.' in the last segment of the path. Note that this 'extension' may
906 be empty. If the URL is not hierarchical, or if the last segment does
907 not contain an unencoded '.', an empty string is returned.
909 OUString
GetFileExtension() const;
911 bool Append(OUString
const & rTheSegment
,
912 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
913 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
917 // OBSOLETE File URLs:
919 OUString
PathToFileName() const;
921 OUString
GetFull() const;
923 OUString
GetPath() const;
925 void SetBase(OUString
const & rTheBase
);
927 OUString
GetBase() const;
929 void SetName(OUString
const & rTheName
,
930 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
931 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
933 OUString
GetName(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
934 rtl_TextEncoding eCharset
935 = RTL_TEXTENCODING_UTF8
) const
936 { return GetLastName(eMechanism
, eCharset
); }
938 void SetExtension(OUString
const & rTheExtension
);
940 OUString
GetExtension() const
941 { return GetFileExtension(); }
943 OUString
CutExtension();
945 static bool IsCaseSensitive() { return true; }
949 // General Structure:
957 explicit SubString(sal_Int32 nTheBegin
= -1,
958 sal_Int32 nTheLength
= 0):
959 m_nBegin(nTheBegin
), m_nLength(nTheLength
) {}
961 bool isPresent() const { return m_nBegin
!= -1; }
963 bool isEmpty() const { return m_nLength
== 0; }
965 sal_Int32
getBegin() const { return m_nBegin
; }
967 sal_Int32
getLength() const { return m_nLength
; }
969 sal_Int32
getEnd() const { return m_nBegin
+ m_nLength
; }
971 inline sal_Int32
clear();
973 inline sal_Int32
set(OUStringBuffer
& rString
,
974 OUString
const & rSubString
,
975 sal_Int32 nTheBegin
);
977 inline sal_Int32
set(OUString
& rString
,
978 OUString
const & rSubString
);
980 inline sal_Int32
set(OUStringBuffer
& rString
,
981 OUString
const & rSubString
);
983 inline void operator +=(sal_Int32 nDelta
);
985 int compare(SubString
const & rOther
,
986 OUStringBuffer
const & rThisString
,
987 OUStringBuffer
const & rOtherString
) const;
990 OUStringBuffer m_aAbsURIRef
;
998 SubString m_aFragment
;
999 INetProtocol m_eScheme
;
1000 INetProtocol m_eSmartScheme
;
1002 TOOLS_DLLPRIVATE
void setInvalid();
1005 OUString
const & rTheAbsURIRef
,
1006 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
, bool bSmart
,
1011 bool convertRelToAbs(
1012 OUString
const & rTheRelURIRef
,
1013 INetURLObject
& rTheAbsURIRef
, bool & rWasAbsolute
,
1014 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1015 bool bIgnoreFragment
, bool bSmart
, bool bRelativeNonURIs
,
1016 FSysStyle eStyle
) const;
1018 bool convertAbsToRel(
1019 OUString
const & rTheAbsURIRef
,
1020 OUString
& rTheRelURIRef
, EncodeMechanism eEncodeMechanism
,
1021 DecodeMechanism eDecodeMechanism
, rtl_TextEncoding eCharset
,
1022 FSysStyle eStyle
) const;
1026 static bool convertIntToExt(
1027 OUString
const & rTheIntURIRef
, bool bOctets
,
1028 OUString
& rTheExtURIRef
, DecodeMechanism eDecodeMechanism
,
1029 rtl_TextEncoding eCharset
);
1031 static bool convertExtToInt(
1032 OUString
const & rTheExtURIRef
, bool bOctets
,
1033 OUString
& rTheIntURIRef
, DecodeMechanism eDecodeMechanism
,
1034 rtl_TextEncoding eCharset
);
1040 TOOLS_DLLPRIVATE
static inline SchemeInfo
const & getSchemeInfo(
1041 INetProtocol eTheScheme
);
1043 TOOLS_DLLPRIVATE
inline SchemeInfo
const & getSchemeInfo() const;
1045 TOOLS_DLLPRIVATE
static PrefixInfo
const * getPrefix(
1046 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1050 TOOLS_DLLPRIVATE sal_Int32
getAuthorityBegin() const;
1052 TOOLS_DLLPRIVATE SubString
getAuthority() const;
1057 OUString
const & rTheUser
,
1058 rtl_TextEncoding eCharset
);
1060 bool clearPassword();
1063 OUString
const & rThePassword
,
1064 rtl_TextEncoding eCharset
);
1068 TOOLS_DLLPRIVATE
static bool parseHost(
1069 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
,
1070 OUString
& rCanonic
);
1072 TOOLS_DLLPRIVATE
static bool parseHostOrNetBiosName(
1073 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
, bool bOctets
,
1074 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1075 bool bNetBiosName
, OUStringBuffer
* pCanonic
);
1078 OUString
const & rTheHost
,
1079 rtl_TextEncoding eCharset
);
1083 TOOLS_DLLPRIVATE
static bool parsePath(
1084 INetProtocol eScheme
, sal_Unicode
const ** pBegin
,
1085 sal_Unicode
const * pEnd
, bool bOctets
, EncodeMechanism eMechanism
,
1086 rtl_TextEncoding eCharset
, bool bSkippedInitialSlash
,
1087 sal_uInt32 nSegmentDelimiter
, sal_uInt32 nAltSegmentDelimiter
,
1088 sal_uInt32 nQueryDelimiter
, sal_uInt32 nFragmentDelimiter
,
1089 OUStringBuffer
&rSynPath
);
1092 OUString
const & rThePath
,
1093 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1095 // Hierarchical Path:
1097 TOOLS_DLLPRIVATE
bool checkHierarchical() const;
1099 TOOLS_DLLPRIVATE SubString
getSegment(
1100 sal_Int32 nIndex
, bool bIgnoreFinalSlash
) const;
1107 OUString
const & rTheQuery
,
1108 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1112 bool clearFragment();
1115 OUString
const & rTheMark
,
1116 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1120 TOOLS_DLLPRIVATE
bool hasDosVolume(FSysStyle eStyle
) const;
1124 TOOLS_DLLPRIVATE
static inline void appendEscape(
1125 OUStringBuffer
& rTheText
, sal_uInt32 nOctet
);
1127 static OUString
encodeText(
1128 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
, bool bOctets
,
1129 Part ePart
, EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1130 bool bKeepVisibleEscapes
);
1132 static inline OUString
encodeText(
1133 OUString
const & rTheText
, bool bOctets
, Part ePart
,
1134 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1135 bool bKeepVisibleEscapes
);
1137 static OUString
decode(
1138 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1139 DecodeMechanism
, rtl_TextEncoding eCharset
);
1141 inline OUString
decode(
1142 SubString
const & rSubString
,
1143 DecodeMechanism eMechanism
, rtl_TextEncoding eCharset
) const;
1145 // Specialized helpers:
1147 TOOLS_DLLPRIVATE
static bool scanIPv6reference(
1148 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1151 void changeScheme(INetProtocol eTargetScheme
);
1155 inline OUString
INetURLObject::encodeText(OUString
const & rTheText
,
1156 bool bOctets
, Part ePart
,
1157 EncodeMechanism eMechanism
,
1158 rtl_TextEncoding eCharset
,
1159 bool bKeepVisibleEscapes
)
1161 return encodeText(rTheText
.getStr(),
1162 rTheText
.getStr() + rTheText
.getLength(), bOctets
, ePart
,
1163 eMechanism
, eCharset
, bKeepVisibleEscapes
);
1166 inline OUString
INetURLObject::decode(SubString
const & rSubString
,
1167 DecodeMechanism eMechanism
,
1168 rtl_TextEncoding eCharset
) const
1170 return rSubString
.isPresent() ?
1171 decode(m_aAbsURIRef
.getStr() + rSubString
.getBegin(),
1172 m_aAbsURIRef
.getStr() + rSubString
.getEnd(),
1173 eMechanism
, eCharset
) :
1177 inline INetURLObject::INetURLObject(OUString
const & rTheAbsURIRef
,
1178 EncodeMechanism eMechanism
,
1179 rtl_TextEncoding eCharset
):
1180 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
)
1182 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1186 inline bool INetURLObject::SetURL(OUString
const & rTheAbsURIRef
,
1187 EncodeMechanism eMechanism
,
1188 rtl_TextEncoding eCharset
)
1190 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1194 inline INetURLObject::INetURLObject(OUString
const & rTheAbsURIRef
,
1195 INetProtocol eTheSmartScheme
,
1196 EncodeMechanism eMechanism
,
1197 rtl_TextEncoding eCharset
,
1199 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(eTheSmartScheme
)
1201 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true, eStyle
);
1204 inline bool INetURLObject::SetSmartURL(OUString
const & rTheAbsURIRef
,
1205 EncodeMechanism eMechanism
,
1206 rtl_TextEncoding eCharset
,
1209 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true,
1213 inline INetURLObject
1214 INetURLObject::smartRel2Abs(OUString
const & rTheRelURIRef
,
1215 bool & rWasAbsolute
,
1216 bool bIgnoreFragment
,
1217 EncodeMechanism eMechanism
,
1218 rtl_TextEncoding eCharset
,
1219 bool bRelativeNonURIs
,
1220 FSysStyle eStyle
) const
1222 INetURLObject aTheAbsURIRef
;
1223 convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, rWasAbsolute
,
1224 eMechanism
, eCharset
, bIgnoreFragment
, true,
1225 bRelativeNonURIs
, eStyle
);
1226 return aTheAbsURIRef
;
1229 inline bool INetURLObject::GetNewAbsURL(OUString
const & rTheRelURIRef
,
1230 INetURLObject
* pTheAbsURIRef
)
1233 INetURLObject aTheAbsURIRef
;
1235 if (!convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, bWasAbsolute
,
1236 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false/*bIgnoreFragment*/, false, false,
1240 *pTheAbsURIRef
= aTheAbsURIRef
;
1245 inline OUString
INetURLObject::GetRelURL(OUString
const & rTheBaseURIRef
,
1246 OUString
const & rTheAbsURIRef
,
1247 EncodeMechanism eEncodeMechanism
,
1248 DecodeMechanism eDecodeMechanism
,
1249 rtl_TextEncoding eCharset
,
1252 OUString aTheRelURIRef
;
1253 INetURLObject(rTheBaseURIRef
, eEncodeMechanism
, eCharset
).
1254 convertAbsToRel(rTheAbsURIRef
, aTheRelURIRef
, eEncodeMechanism
,
1255 eDecodeMechanism
, eCharset
, eStyle
);
1256 return aTheRelURIRef
;
1260 inline bool INetURLObject::translateToExternal(OUString
const &
1262 OUString
& rTheExtURIRef
,
1265 rtl_TextEncoding eCharset
)
1267 return convertIntToExt(rTheIntURIRef
, false, rTheExtURIRef
,
1268 eDecodeMechanism
, eCharset
);
1272 inline bool INetURLObject::translateToInternal(OUString
const &
1274 OUString
& rTheIntURIRef
,
1277 rtl_TextEncoding eCharset
)
1279 return convertExtToInt(rTheExtURIRef
, false, rTheIntURIRef
,
1280 eDecodeMechanism
, eCharset
);
1283 inline bool INetURLObject::SetPass(OUString
const & rThePassword
)
1285 return rThePassword
.isEmpty() ?
1287 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
);
1290 inline bool INetURLObject::SetUserAndPass(OUString
const & rTheUser
,
1291 OUString
const & rThePassword
)
1293 return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
)
1294 && (rThePassword
.isEmpty() ?
1296 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
));
1299 inline bool INetURLObject::SetParam(OUString
const & rTheQuery
,
1300 EncodeMechanism eMechanism
,
1301 rtl_TextEncoding eCharset
)
1303 return rTheQuery
.isEmpty() ?
1305 setQuery(rTheQuery
, eMechanism
, eCharset
);
1308 inline bool INetURLObject::SetMark(OUString
const & rTheFragment
,
1309 EncodeMechanism eMechanism
,
1310 rtl_TextEncoding eCharset
)
1312 return rTheFragment
.isEmpty() ?
1314 setFragment(rTheFragment
, eMechanism
, eCharset
);
1317 inline INetURLObject::INetURLObject(OUString
const & rFSysPath
,
1319 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
)
1321 setFSysPath(rFSysPath
, eStyle
);
1325 inline OUString
INetURLObject::encode(OUString
const & rText
, Part ePart
,
1326 EncodeMechanism eMechanism
,
1327 rtl_TextEncoding eCharset
)
1329 return encodeText(rText
, false, ePart
, eMechanism
, eCharset
, false);
1333 inline OUString
INetURLObject::decode(OUString
const & rText
,
1334 DecodeMechanism eMechanism
,
1335 rtl_TextEncoding eCharset
)
1337 return decode(rText
.getStr(), rText
.getStr() + rText
.getLength(),
1338 eMechanism
, eCharset
);
1341 inline OUString
INetURLObject::decode(OUStringBuffer
const & rText
,
1342 DecodeMechanism eMechanism
,
1343 rtl_TextEncoding eCharset
)
1345 return decode(rText
.getStr(), rText
.getStr() + rText
.getLength(),
1346 eMechanism
, eCharset
);
1351 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */