1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_TOOLS_URLOBJ_HXX
20 #define INCLUDED_TOOLS_URLOBJ_HXX
22 #include <tools/toolsdllapi.h>
23 #include <rtl/ustrbuf.hxx>
24 #include <rtl/textenc.h>
25 #include <sal/types.h>
26 #include <o3tl/typed_flags_set.hxx>
29 #include <string_view>
33 namespace com::sun::star::util
{
37 namespace com::sun::star::uno
{ template <typename
> class Reference
; }
39 // Common URL prefixes for various schemes:
40 inline constexpr OUString INET_FTP_SCHEME
= u
"ftp://"_ustr
;
41 inline constexpr OUString INET_HTTP_SCHEME
= u
"http://"_ustr
;
42 inline constexpr OUString INET_HTTPS_SCHEME
= u
"https://"_ustr
;
43 inline constexpr OUString INET_FILE_SCHEME
= u
"file://"_ustr
;
44 inline constexpr OUString INET_MAILTO_SCHEME
= u
"mailto:"_ustr
;
45 inline constexpr OUString INET_HID_SCHEME
= u
"hid:"_ustr
;
47 #define URL_PREFIX_PRIV_SOFFICE "private:"
50 enum class INetProtocol
84 /** The supported notations for file system paths.
88 /** VOS notation (e.g., "//server/dir/file").
92 /** Unix notation (e.g., "/dir/file").
96 /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
100 /** Detect the used notation.
102 @descr For the following descriptions, please note that
103 whereas FSYS_DEFAULT includes all style bits, combinations of only
104 a few style bits are also possible, and are also described.
106 @descr When used to translate a file system path to a file URL,
107 the subset of the following productions for which the appropriate
108 style bit is set are checked in order (using the conventions of
109 RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4
112 Production T1 (VOS local; FSysStyle::Vos only):
117 Production T2 (VOS host; FSysStyle::Vos only):
118 "//" [host] ["/" *UCS4]
120 "file://" host "/" *UCS4
122 Production T3 (UNC; FSysStyle::Dos only):
123 "\\" [host] ["\" *UCS4]
125 "file://" host "/" *UCS4
126 replacing "\" by "/" within <*UCS4>
128 Production T4 (Unix-like DOS; FSysStyle::Dos only):
129 ALPHA ":" ["/" *UCS4]
131 "file:///" ALPHA ":/" *UCS4
132 replacing "\" by "/" within <*UCS4>
134 Production T5 (DOS; FSysStyle::Dos only):
135 ALPHA ":" ["\" *UCS4]
137 "file:///" ALPHA ":/" *UCS4
138 replacing "\" by "/" within <*UCS4>
144 replacing the delimiter by "/" within <*UCS4>. The delimiter is
145 that character from the set { "/", "\" } which appears most
146 often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/"
147 is removed from the set; if FSysStyle::Dos is not among the style
148 bits, "\" is removed from the set). If two or more
149 characters appear the same number of times, the character
150 mentioned first in that set is chosen. If the first character
151 of <*UCS4> is the delimiter, that character is not copied.
153 @descr When used to translate a file URL to a file system path,
154 the following productions are checked in order (using the
155 conventions of RFC 2234, RFC 2396, and RFC 2732):
157 Production F1 (VOS; FSysStyle::Vos):
158 "file://" host "/" fpath ["#" fragment]
162 Production F2 (DOS; FSysStyle::Dos):
163 "file:///" ALPHA ":" ["/" fpath] ["#" fragment]
165 ALPHA ":" ["\" fpath]
166 replacing "/" by "\" in <fpath>
168 Production F3 (Unix; FSysStyle::Unix):
169 "file:///" fpath ["#" fragment]
173 Detect
= Vos
| Unix
| Dos
176 template<> struct typed_flags
<FSysStyle
> : is_typed_flags
<FSysStyle
, 0x07> {};
179 class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject
182 // Get- and Set-Methods:
184 /** The way input strings that represent (parts of) URIs are interpreted
187 @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by
188 sequences of escape sequences, representing the UTF-8 coded characters.
190 @descr Along with an EncodeMechanism parameter, the set-methods all
191 take an rtl_TextEncoding parameter, which is ignored unless the
192 EncodeMechanism is EncodeMechanism::WasEncoded.
194 enum class EncodeMechanism
196 /** All escape sequences that are already present are ignored, and are
197 interpreted as literal sequences of three characters.
201 /** Sequences of escape sequences, that represent characters from the
202 specified character set and that can be converted to UTF-32
203 characters, are first decoded. If they have to be encoded, they
204 are converted to UTF-8 characters and are than translated into
205 (sequences of) escape sequences. Other escape sequences are
206 copied verbatim (but using upper case hex digits).
210 /** All escape sequences that are already present are copied verbatim
211 (but using upper case hex digits).
216 /** The way strings that represent (parts of) URIs are returned from get-
219 @descr Along with a DecodeMechanism parameter, the get-methods all
220 take an rtl_TextEncoding parameter, which is ignored unless the
221 DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous.
223 enum class DecodeMechanism
225 /** The (part of the) URI is returned unchanged. Since URIs are
226 written using a subset of US-ASCII, the returned string is
227 guaranteed to contain only US-ASCII characters.
231 /** All sequences of escape sequences that represent UTF-8 coded
232 UTF-32 characters with a numerical value greater than 0x7F, are
233 replaced by the respective UTF-16 characters. All other escape
234 sequences are not decoded.
238 /** All (sequences of) escape sequences that represent characters from
239 the specified character set, and that can be converted to UTF-32,
240 are replaced by the respective UTF-16 characters. All other
241 escape sequences are not decoded.
245 /** All (sequences of) escape sequences that represent characters from
246 the specified character set, that can be converted to UTF-32, and
247 that (in the case of ASCII characters) can safely be decoded
248 without altering the meaning of the (part of the) URI, are
249 replaced by the respective UTF-16 characters. All other escape
250 sequences are not decoded.
255 // General Structure:
258 m_aAbsURIRef(256), m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
) {}
260 bool HasError() const { return m_eScheme
== INetProtocol::NotValid
; }
262 OUString
GetMainURL(DecodeMechanism eMechanism
,
263 rtl_TextEncoding eCharset
264 = RTL_TEXTENCODING_UTF8
) const
265 { return decode(m_aAbsURIRef
, eMechanism
, eCharset
); }
267 OUString
GetURLNoPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
268 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
271 OUString
GetURLNoMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
272 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
276 getAbbreviated(css::uno::Reference
< css::util::XStringWidth
> const & rStringWidth
,
278 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
279 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
282 bool operator ==(INetURLObject
const & rObject
) const;
284 bool operator !=(INetURLObject
const & rObject
) const
285 { return !(*this == rObject
); }
289 inline explicit INetURLObject(
290 std::u16string_view rTheAbsURIRef
,
291 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
292 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
294 inline bool SetURL(std::u16string_view rTheAbsURIRef
,
295 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
296 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
298 bool ConcatData(INetProtocol eTheScheme
, std::u16string_view rTheUser
,
299 std::u16string_view rThePassword
,
300 std::u16string_view rTheHost
, sal_uInt32 nThePort
,
301 std::u16string_view rThePath
);
305 inline INetURLObject(std::u16string_view rTheAbsURIRef
,
306 INetProtocol eTheSmartScheme
,
307 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
308 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
309 FSysStyle eStyle
= FSysStyle::Detect
);
311 void SetSmartProtocol(INetProtocol eTheSmartScheme
)
312 { m_eSmartScheme
= eTheSmartScheme
; }
315 SetSmartURL(std::u16string_view rTheAbsURIRef
,
316 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
317 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
318 FSysStyle eStyle
= FSysStyle::Detect
);
321 smartRel2Abs(OUString
const & rTheRelURIRef
,
323 bool bIgnoreFragment
= false,
324 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
325 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
326 bool bRelativeNonURIs
= false,
327 FSysStyle eStyle
= FSysStyle::Detect
) const;
332 GetNewAbsURL(OUString
const & rTheRelURIRef
,
333 INetURLObject
* pTheAbsURIRef
)
336 /** @descr If rTheRelURIRef cannot be converted to an absolute URL
337 (because of syntactic reasons), either rTheRelURIRef or an empty
338 string is returned: If all of the parameters eEncodeMechanism,
339 eDecodeMechanism and eCharset have their respective default values,
340 then rTheRelURIRef is returned unmodified; otherwise, an empty string
344 GetAbsURL(std::u16string_view rTheBaseURIRef
,
345 OUString
const & rTheRelURIRef
,
346 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
347 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
348 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
350 static inline OUString
351 GetRelURL(std::u16string_view rTheBaseURIRef
,
352 OUString
const & rTheAbsURIRef
,
353 EncodeMechanism eEncodeMechanism
= EncodeMechanism::WasEncoded
,
354 DecodeMechanism eDecodeMechanism
= DecodeMechanism::ToIUri
,
355 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
,
356 FSysStyle eStyle
= FSysStyle::Detect
);
360 OUString
getExternalURL() const;
362 static inline bool translateToExternal(std::u16string_view rTheIntURIRef
,
363 OUString
& rTheExtURIRef
,
364 DecodeMechanism eDecodeMechanism
365 = DecodeMechanism::ToIUri
,
366 rtl_TextEncoding eCharset
367 = RTL_TEXTENCODING_UTF8
);
369 static inline bool translateToInternal(std::u16string_view rTheExtURIRef
,
370 OUString
& rTheIntURIRef
,
371 DecodeMechanism eDecodeMechanism
372 = DecodeMechanism::ToIUri
,
373 rtl_TextEncoding eCharset
374 = RTL_TEXTENCODING_UTF8
);
380 INetProtocol
GetProtocol() const { return m_eScheme
; }
382 bool isSchemeEqualTo(INetProtocol scheme
) const { return scheme
== m_eScheme
; }
384 bool isSchemeEqualTo(std::u16string_view scheme
) const;
386 /** Check if the scheme is one of the WebDAV scheme
389 * @return true is one other scheme either public scheme or private scheme.
391 bool isAnyKnownWebDAVScheme() const;
393 /** Return the URL 'prefix' for a given scheme.
395 @param eTheScheme One of the supported URL schemes.
397 @return The 'prefix' of URLs of the given scheme.
399 static const OUString
& GetScheme(INetProtocol eTheScheme
);
401 /** Return the human-readable name for a given scheme.
403 @param eTheScheme One of the supported URL schemes.
405 @return The protocol name of URLs of the given scheme.
407 static const OUString
& GetSchemeName(INetProtocol eTheScheme
);
409 static INetProtocol
CompareProtocolScheme(std::u16string_view aTheAbsURIRef
);
413 bool HasUserData() const { return m_aUser
.isPresent(); }
415 OUString
GetUser(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
416 rtl_TextEncoding eCharset
417 = RTL_TEXTENCODING_UTF8
) const
418 { return decode(m_aUser
, eMechanism
, eCharset
); }
420 OUString
GetPass(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
421 rtl_TextEncoding eCharset
422 = RTL_TEXTENCODING_UTF8
) const
423 { return decode(m_aAuth
, eMechanism
, eCharset
); }
425 bool SetUser(std::u16string_view rTheUser
)
426 { return setUser(rTheUser
, RTL_TEXTENCODING_UTF8
); }
428 inline bool SetPass(std::u16string_view rThePassword
);
432 bool HasPort() const { return m_aPort
.isPresent(); }
434 OUString
GetHost(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
435 rtl_TextEncoding eCharset
436 = RTL_TEXTENCODING_UTF8
) const
437 { return decode(m_aHost
, eMechanism
, eCharset
); }
439 OUString
GetHostPort(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
440 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
) const;
442 sal_uInt32
GetPort() const;
444 bool SetHost(std::u16string_view rTheHost
)
445 { return setHost(rTheHost
, RTL_TEXTENCODING_UTF8
); }
447 bool SetPort(sal_uInt32 nThePort
);
451 bool HasURLPath() const { return !m_aPath
.isEmpty(); }
453 OUString
GetURLPath(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
454 rtl_TextEncoding eCharset
455 = RTL_TEXTENCODING_UTF8
) const
456 { return decode(m_aPath
, eMechanism
, eCharset
); }
458 bool SetURLPath(std::u16string_view rThePath
,
459 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
460 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
461 { return setPath(rThePath
, eMechanism
, eCharset
); }
463 // Hierarchical Path:
465 /** A constant to address the last segment in various methods dealing with
468 @descr It is often more efficient to address the last segment using
469 this constant, than to determine its ordinal value using
472 enum { LAST_SEGMENT
= -1 };
474 /** The number of segments in the hierarchical path.
476 @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the
479 hierarchical-path = 1*("/" segment)
481 segment = name *(";" param)
483 name = [base ["." extension]]
487 extension = *<any pchar except ".">
491 @param bIgnoreFinalSlash If true, a final slash at the end of the
492 hierarchical path does not denote an empty segment, but is ignored.
494 @return The number of segments in the hierarchical path. If the path
495 is not hierarchical, 0 is returned.
497 sal_Int32
getSegmentCount(bool bIgnoreFinalSlash
= true) const;
499 /** Remove a segment from the hierarchical path.
501 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
502 if addressing the last segment.
504 @param bIgnoreFinalSlash If true, a final slash at the end of the
505 hierarchical path does not denote an empty segment, but is ignored.
507 @return True if the segment has successfully been removed (and the
508 resulting URI is still valid). If the path is not hierarchical, or
509 the specified segment does not exist, false is returned. If false is
510 returned, the object is not modified.
512 bool removeSegment(sal_Int32 nIndex
= LAST_SEGMENT
,
513 bool bIgnoreFinalSlash
= true);
515 /** Insert a new segment into the hierarchical path.
516 A final slash at the end of the
517 hierarchical path does not denote an empty segment, but is ignored.
519 @param rTheName The name part of the new segment. The new segment
520 will contain no parameters.
522 @param bAppendFinalSlash If the new segment is appended at the end of
523 the hierarchical path, this parameter specifies whether to add a final
524 slash after it or not.
526 @param nIndex The non-negative index of the segment before which
527 to insert the new segment. LAST_SEGMENT or an nIndex that equals
528 getSegmentCount() inserts the new segment at the end of the
531 @param eMechanism See the general discussion for set-methods.
533 @param eCharset See the general discussion for set-methods.
535 @return True if the segment has successfully been inserted (and the
536 resulting URI is still valid). If the path is not hierarchical, or
537 the specified place to insert the new segment does not exist, false is
538 returned. If false is returned, the object is not modified.
540 bool insertName(std::u16string_view rTheName
,
541 bool bAppendFinalSlash
= false,
542 sal_Int32 nIndex
= LAST_SEGMENT
,
543 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
544 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
546 /** Get the name of a segment of the hierarchical path.
548 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
549 if addressing the last segment.
551 @param bIgnoreFinalSlash If true, a final slash at the end of the
552 hierarchical path does not denote an empty segment, but is ignored.
554 @param eMechanism See the general discussion for get-methods.
556 @param eCharset See the general discussion for get-methods.
558 @return The name part of the specified segment. If the path is not
559 hierarchical, or the specified segment does not exits, an empty string
562 OUString
getName(sal_Int32 nIndex
= LAST_SEGMENT
,
563 bool bIgnoreFinalSlash
= true,
564 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
565 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
568 /** Set the name of the last segment (preserving any parameters and any query or
571 @param rTheName The new name.
573 @param eMechanism See the general discussion for get-methods.
575 @param eCharset See the general discussion for get-methods.
577 @return True if the name has successfully been modified (and the
578 resulting URI is still valid). If the path is not hierarchical, or
579 a last segment does not exist, false is returned. If false is
580 returned, the object is not modified.
582 bool setName(std::u16string_view rTheName
,
583 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
584 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
586 /** Get the base of the name of a segment.
588 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
589 if addressing the last segment.
591 @param bIgnoreFinalSlash If true, a final slash at the end of the
592 hierarchical path does not denote an empty segment, but is ignored.
594 @param eMechanism See the general discussion for get-methods.
596 @param eCharset See the general discussion for get-methods.
598 @return The base part of the specified segment. If the path is
599 not hierarchical, or the specified segment does not exits, an empty
602 OUString
getBase(sal_Int32 nIndex
= LAST_SEGMENT
,
603 bool bIgnoreFinalSlash
= true,
604 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
605 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
608 /** Set the base of the name of a segment (preserving the extension).
609 A final slash at the end of the
610 hierarchical path does not denote an empty segment, but is ignored.
612 @param rTheBase The new base.
614 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
615 if addressing the last segment.
617 @param eMechanism See the general discussion for set-methods.
619 @param eCharset See the general discussion for set-methods.
621 @return True if the base has successfully been modified (and the
622 resulting URI is still valid). If the path is not hierarchical, or
623 the specified segment does not exist, false is returned. If false is
624 returned, the object is not modified.
626 bool setBase(std::u16string_view rTheBase
,
627 sal_Int32 nIndex
= LAST_SEGMENT
,
628 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
629 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
631 /** Determine whether the name of the last segment has an extension.
633 @return True if the name of the specified segment has an extension.
634 If the path is not hierarchical, or the specified segment does not
635 exist, false is returned.
637 bool hasExtension() const;
639 /** Get the extension of the name of a segment.
641 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
642 if addressing the last segment.
644 @param bIgnoreFinalSlash If true, a final slash at the end of the
645 hierarchical path does not denote an empty segment, but is ignored.
647 @param eMechanism See the general discussion for get-methods.
649 @param eCharset See the general discussion for get-methods.
651 @return The extension part of the specified segment. If the path is
652 not hierarchical, or the specified segment does not exits, an empty
655 OUString
getExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
656 bool bIgnoreFinalSlash
= true,
657 DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
658 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
661 /** Set the extension of the name of a segment (replacing an already
664 @param rTheExtension The new extension.
666 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
667 if addressing the last segment.
669 @param bIgnoreFinalSlash If true, a final slash at the end of the
670 hierarchical path does not denote an empty segment, but is ignored.
672 @param eCharset See the general discussion for set-methods.
674 @return True if the extension has successfully been modified (and the
675 resulting URI is still valid). If the path is not hierarchical, or
676 the specified segment does not exist, false is returned. If false is
677 returned, the object is not modified.
679 bool setExtension(std::u16string_view rTheExtension
,
680 sal_Int32 nIndex
= LAST_SEGMENT
,
681 bool bIgnoreFinalSlash
= true,
682 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
684 /** Remove the extension of the name of a segment.
686 @param nIndex The non-negative index of the segment, or LAST_SEGMENT
687 if addressing the last segment.
689 @param bIgnoreFinalSlash If true, a final slash at the end of the
690 hierarchical path does not denote an empty segment, but is ignored.
692 @return True if the extension has successfully been removed (and the
693 resulting URI is still valid), or if the name did not have an
694 extension. If the path is not hierarchical, or the specified segment
695 does not exist, false is returned. If false is returned, the object
698 bool removeExtension(sal_Int32 nIndex
= LAST_SEGMENT
,
699 bool bIgnoreFinalSlash
= true);
701 /** Determine whether the hierarchical path ends in a final slash.
703 @return True if the hierarchical path ends in a final slash. If the
704 path is not hierarchical, false is returned.
706 bool hasFinalSlash() const;
708 /** Make the hierarchical path end in a final slash (if it does not
711 @return True if a final slash has successfully been appended (and the
712 resulting URI is still valid), or if the hierarchical path already
713 ended in a final slash. If the path is not hierarchical, false is
714 returned. If false is returned, the object is not modified.
716 bool setFinalSlash();
718 /** Remove a final slash from the hierarchical path.
720 @return True if a final slash has successfully been removed (and the
721 resulting URI is still valid), or if the hierarchical path already did
722 not end in a final slash. If the path is not hierarchical, false is
723 returned. If false is returned, the object is not modified.
725 bool removeFinalSlash();
729 bool HasParam() const { return m_aQuery
.isPresent(); }
731 OUString
GetParam(rtl_TextEncoding eCharset
732 = RTL_TEXTENCODING_UTF8
) const
733 { return decode(m_aQuery
, DecodeMechanism::NONE
, eCharset
); }
735 inline bool SetParam(std::u16string_view rTheQuery
,
736 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
737 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
741 bool HasMark() const { return m_aFragment
.isPresent(); }
743 OUString
GetMark(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
744 rtl_TextEncoding eCharset
745 = RTL_TEXTENCODING_UTF8
) const
746 { return decode(m_aFragment
, eMechanism
, eCharset
); }
748 inline bool SetMark(std::u16string_view rTheFragment
,
749 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
750 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
754 /** Return the file system path represented by a file URL (ignoring any
757 @param eStyle The notation of the returned file system path.
759 @param pDelimiter Upon successful return, this parameter can return
760 the character that is the 'main' delimiter within the returned file
761 system path (e.g., "/" for Unix, "\" for DOS). This is
762 especially useful for routines that later try to shorten the returned
763 file system path at a 'good' position, e.g. to fit it into some
764 limited display space.
766 @return The file system path represented by this file URL. If this
767 file URL does not represent a file system path according to the
768 specified notation, or if this is not a file URL at all, an empty
771 OUString
getFSysPath(FSysStyle eStyle
, sal_Unicode
* pDelimiter
= nullptr)
775 std::unique_ptr
<SvMemoryStream
> getData() const;
781 PART_USER_PASSWORD
= 0x00001,
782 PART_FPATH
= 0x00008,
783 PART_AUTHORITY
= 0x00010,
784 PART_REL_SEGMENT_EXTRA
= 0x00020,
786 PART_HTTP_PATH
= 0x00080,
787 PART_MESSAGE_ID_PATH
= 0x00100,
788 PART_MAILTO
= 0x00200,
789 PART_PATH_BEFORE_QUERY
= 0x00400,
790 PART_PCHAR
= 0x00800,
791 PART_VISIBLE
= 0x01000,
792 PART_VISIBLE_NONSPECIAL
= 0x02000,
793 PART_UNO_PARAM_VALUE
= 0x04000,
794 PART_UNAMBIGUOUS
= 0x08000,
795 PART_URIC_NO_SLASH
= 0x10000,
796 PART_HTTP_QUERY
= 0x20000, //TODO! unused?
799 enum class EscapeType
806 /** Encode some text as part of a URI.
808 @param rText Some text (for its interpretation, see the general
809 discussion for set-methods).
811 @param ePart The part says which characters are 'forbidden' and must
812 be encoded (replaced by escape sequences). Characters outside the US-
813 ASCII range are always 'forbidden.'
815 @param eMechanism See the general discussion for set-methods.
817 @param eCharset See the general discussion for set-methods.
819 @return The text, encoded according to the given mechanism and
820 charset ('forbidden' characters replaced by escape sequences).
822 static OUString
encode( std::u16string_view rText
, Part ePart
,
823 EncodeMechanism eMechanism
,
824 rtl_TextEncoding eCharset
825 = RTL_TEXTENCODING_UTF8
);
828 /** Decode some text.
830 @param rText Some (encoded) text.
832 @param eMechanism See the general discussion for get-methods.
834 @param eCharset See the general discussion for get-methods.
836 @return The text, decoded according to the given mechanism and
837 charset (escape sequences replaced by 'raw' characters).
839 static inline OUString
decode(std::u16string_view rText
,
840 DecodeMechanism eMechanism
,
841 rtl_TextEncoding eCharset
842 = RTL_TEXTENCODING_UTF8
);
844 static void appendUCS4Escape(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
);
846 static void appendUCS4(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
,
847 EscapeType eEscapeType
, Part ePart
,
848 rtl_TextEncoding eCharset
, bool bKeepVisibleEscapes
);
850 static sal_uInt32
getUTF32(sal_Unicode
const *& rBegin
,
851 sal_Unicode
const * pEnd
,
852 EncodeMechanism eMechanism
,
853 rtl_TextEncoding eCharset
,
854 EscapeType
& rEscapeType
);
856 // Specialized helpers:
858 static sal_uInt32
scanDomain(sal_Unicode
const *& rBegin
,
859 sal_Unicode
const * pEnd
,
862 // OBSOLETE Hierarchical Path:
864 OUString
GetPartBeforeLastName() const;
866 /** Get the last segment in the path.
868 @param eMechanism See the general discussion for get-methods.
870 @param eCharset See the general discussion for get-methods.
872 @return For a hierarchical URL, the last segment (everything after
873 the last unencoded '/'). Note that this last segment may be empty. If
874 the URL is not hierarchical, an empty string is returned.
876 OUString
GetLastName(DecodeMechanism eMechanism
= DecodeMechanism::ToIUri
,
877 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
)
880 /** Get the 'extension' of the last segment in the path.
882 @return For a hierarchical URL, everything after the first unencoded
883 '.' in the last segment of the path. Note that this 'extension' may
884 be empty. If the URL is not hierarchical, or if the last segment does
885 not contain an unencoded '.', an empty string is returned.
887 OUString
GetFileExtension() const;
889 bool Append(std::u16string_view rTheSegment
,
890 EncodeMechanism eMechanism
= EncodeMechanism::WasEncoded
,
891 rtl_TextEncoding eCharset
= RTL_TEXTENCODING_UTF8
);
895 // OBSOLETE File URLs:
897 OUString
PathToFileName() const;
899 OUString
GetFull() const;
901 OUString
GetPath() const;
903 void SetBase(std::u16string_view rTheBase
);
905 OUString
GetBase() const;
907 void SetExtension(std::u16string_view rTheExtension
);
909 OUString
CutExtension();
911 static bool IsCaseSensitive() { return true; }
913 void changeScheme(INetProtocol eTargetScheme
);
915 // INetProtocol::Macro, INetProtocol::Uno, INetProtocol::Slot,
916 // vnd.sun.star.script, etc. All the types of URLs which shouldn't
917 // be accepted from an outside controlled source
918 bool IsExoticProtocol() const;
921 // General Structure:
923 class SAL_DLLPRIVATE SubString
929 explicit SubString(sal_Int32 nTheBegin
= -1,
930 sal_Int32 nTheLength
= 0):
931 m_nBegin(nTheBegin
), m_nLength(nTheLength
) {}
933 bool isPresent() const { return m_nBegin
!= -1; }
935 bool isEmpty() const { return m_nLength
== 0; }
937 sal_Int32
getBegin() const { return m_nBegin
; }
939 sal_Int32
getLength() const { return m_nLength
; }
941 sal_Int32
getEnd() const { return m_nBegin
+ m_nLength
; }
945 sal_Int32
set(OUStringBuffer
& rString
,
946 std::u16string_view rSubString
,
947 sal_Int32 nTheBegin
);
949 sal_Int32
set(OUString
& rString
,
950 std::u16string_view rSubString
);
952 sal_Int32
set(OUStringBuffer
& rString
,
953 std::u16string_view rSubString
);
955 inline void operator +=(sal_Int32 nDelta
);
957 int compare(SubString
const & rOther
,
958 OUStringBuffer
const & rThisString
,
959 OUStringBuffer
const & rOtherString
) const;
962 OUStringBuffer m_aAbsURIRef
;
970 SubString m_aFragment
;
971 INetProtocol m_eScheme
;
972 INetProtocol m_eSmartScheme
;
974 TOOLS_DLLPRIVATE
void setInvalid();
977 std::u16string_view rTheAbsURIRef
,
978 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
, bool bSmart
,
983 bool convertRelToAbs(
984 OUString
const & rTheRelURIRef
,
985 INetURLObject
& rTheAbsURIRef
, bool & rWasAbsolute
,
986 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
987 bool bIgnoreFragment
, bool bSmart
, bool bRelativeNonURIs
,
988 FSysStyle eStyle
) const;
990 bool convertAbsToRel(
991 OUString
const & rTheAbsURIRef
,
992 OUString
& rTheRelURIRef
, EncodeMechanism eEncodeMechanism
,
993 DecodeMechanism eDecodeMechanism
, rtl_TextEncoding eCharset
,
994 FSysStyle eStyle
) const;
998 static bool convertIntToExt(
999 std::u16string_view rTheIntURIRef
,
1000 OUString
& rTheExtURIRef
, DecodeMechanism eDecodeMechanism
,
1001 rtl_TextEncoding eCharset
);
1003 static bool convertExtToInt(
1004 std::u16string_view rTheExtURIRef
,
1005 OUString
& rTheIntURIRef
, DecodeMechanism eDecodeMechanism
,
1006 rtl_TextEncoding eCharset
);
1012 TOOLS_DLLPRIVATE
static inline SchemeInfo
const & getSchemeInfo(
1013 INetProtocol eTheScheme
);
1015 TOOLS_DLLPRIVATE
inline SchemeInfo
const & getSchemeInfo() const;
1017 TOOLS_DLLPRIVATE
static PrefixInfo
const * getPrefix(
1018 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1022 TOOLS_DLLPRIVATE sal_Int32
getAuthorityBegin() const;
1024 TOOLS_DLLPRIVATE SubString
getAuthority() const;
1029 std::u16string_view rTheUser
,
1030 rtl_TextEncoding eCharset
);
1032 bool clearPassword();
1035 std::u16string_view rThePassword
,
1036 rtl_TextEncoding eCharset
);
1040 TOOLS_DLLPRIVATE
static bool parseHost(
1041 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
,
1042 OUStringBuffer
* pCanonic
);
1044 TOOLS_DLLPRIVATE
static bool parseHostOrNetBiosName(
1045 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1046 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1047 bool bNetBiosName
, OUStringBuffer
* pCanonic
);
1050 std::u16string_view rTheHost
,
1051 rtl_TextEncoding eCharset
);
1055 TOOLS_DLLPRIVATE
static bool parsePath(
1056 INetProtocol eScheme
, sal_Unicode
const ** pBegin
,
1057 sal_Unicode
const * pEnd
, EncodeMechanism eMechanism
,
1058 rtl_TextEncoding eCharset
, bool bSkippedInitialSlash
,
1059 sal_uInt32 nSegmentDelimiter
, sal_uInt32 nAltSegmentDelimiter
,
1060 sal_uInt32 nQueryDelimiter
, sal_uInt32 nFragmentDelimiter
,
1061 OUStringBuffer
&rSynPath
);
1064 std::u16string_view rThePath
,
1065 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1067 // Hierarchical Path:
1069 TOOLS_DLLPRIVATE
bool checkHierarchical() const;
1071 TOOLS_DLLPRIVATE SubString
getSegment(
1072 sal_Int32 nIndex
, bool bIgnoreFinalSlash
) const;
1079 std::u16string_view rTheQuery
,
1080 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1084 bool clearFragment();
1087 std::u16string_view rTheMark
,
1088 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
);
1092 TOOLS_DLLPRIVATE
bool hasDosVolume(FSysStyle eStyle
) const;
1096 TOOLS_DLLPRIVATE
static inline void appendEscape(
1097 OUStringBuffer
& rTheText
, sal_uInt32 nOctet
);
1099 static void encodeText(
1100 OUStringBuffer
& rOutputBuffer
,
1101 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1102 Part ePart
, EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1103 bool bKeepVisibleEscapes
);
1105 static inline void encodeText(
1106 OUStringBuffer
& rOutputBuffer
,
1107 std::u16string_view rTheText
, Part ePart
,
1108 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
,
1109 bool bKeepVisibleEscapes
);
1111 static OUString
decode(
1112 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
1113 DecodeMechanism
, rtl_TextEncoding eCharset
);
1115 inline OUString
decode(
1116 SubString
const & rSubString
,
1117 DecodeMechanism eMechanism
, rtl_TextEncoding eCharset
) const;
1119 // Specialized helpers:
1121 TOOLS_DLLPRIVATE
static bool scanIPv6reference(
1122 sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
);
1126 inline void INetURLObject::encodeText( OUStringBuffer
& rOutputBuffer
,
1127 std::u16string_view rTheText
,
1129 EncodeMechanism eMechanism
,
1130 rtl_TextEncoding eCharset
,
1131 bool bKeepVisibleEscapes
)
1133 encodeText(rOutputBuffer
,
1135 rTheText
.data() + rTheText
.size(), ePart
,
1136 eMechanism
, eCharset
, bKeepVisibleEscapes
);
1139 inline OUString
INetURLObject::decode(SubString
const & rSubString
,
1140 DecodeMechanism eMechanism
,
1141 rtl_TextEncoding eCharset
) const
1143 return rSubString
.isPresent() ?
1144 decode(m_aAbsURIRef
.getStr() + rSubString
.getBegin(),
1145 m_aAbsURIRef
.getStr() + rSubString
.getEnd(),
1146 eMechanism
, eCharset
) :
1150 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef
,
1151 EncodeMechanism eMechanism
,
1152 rtl_TextEncoding eCharset
):
1153 m_aAbsURIRef(rTheAbsURIRef
.size() * 2), m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(INetProtocol::Http
)
1155 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1159 inline bool INetURLObject::SetURL(std::u16string_view rTheAbsURIRef
,
1160 EncodeMechanism eMechanism
,
1161 rtl_TextEncoding eCharset
)
1163 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, false,
1167 inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef
,
1168 INetProtocol eTheSmartScheme
,
1169 EncodeMechanism eMechanism
,
1170 rtl_TextEncoding eCharset
,
1172 m_eScheme(INetProtocol::NotValid
), m_eSmartScheme(eTheSmartScheme
)
1174 setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true, eStyle
);
1177 inline bool INetURLObject::SetSmartURL(std::u16string_view rTheAbsURIRef
,
1178 EncodeMechanism eMechanism
,
1179 rtl_TextEncoding eCharset
,
1182 return setAbsURIRef(rTheAbsURIRef
, eMechanism
, eCharset
, true,
1186 inline INetURLObject
1187 INetURLObject::smartRel2Abs(OUString
const & rTheRelURIRef
,
1188 bool & rWasAbsolute
,
1189 bool bIgnoreFragment
,
1190 EncodeMechanism eMechanism
,
1191 rtl_TextEncoding eCharset
,
1192 bool bRelativeNonURIs
,
1193 FSysStyle eStyle
) const
1195 INetURLObject aTheAbsURIRef
;
1196 convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, rWasAbsolute
,
1197 eMechanism
, eCharset
, bIgnoreFragment
, true,
1198 bRelativeNonURIs
, eStyle
);
1199 return aTheAbsURIRef
;
1202 inline bool INetURLObject::GetNewAbsURL(OUString
const & rTheRelURIRef
,
1203 INetURLObject
* pTheAbsURIRef
)
1206 INetURLObject aTheAbsURIRef
;
1208 if (!convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
, bWasAbsolute
,
1209 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false/*bIgnoreFragment*/, false, false,
1213 *pTheAbsURIRef
= std::move(aTheAbsURIRef
);
1218 inline OUString
INetURLObject::GetRelURL(std::u16string_view rTheBaseURIRef
,
1219 OUString
const & rTheAbsURIRef
,
1220 EncodeMechanism eEncodeMechanism
,
1221 DecodeMechanism eDecodeMechanism
,
1222 rtl_TextEncoding eCharset
,
1225 OUString aTheRelURIRef
;
1226 INetURLObject(rTheBaseURIRef
, eEncodeMechanism
, eCharset
).
1227 convertAbsToRel(rTheAbsURIRef
, aTheRelURIRef
, eEncodeMechanism
,
1228 eDecodeMechanism
, eCharset
, eStyle
);
1229 return aTheRelURIRef
;
1233 inline bool INetURLObject::translateToExternal(std::u16string_view
1235 OUString
& rTheExtURIRef
,
1238 rtl_TextEncoding eCharset
)
1240 return convertIntToExt(rTheIntURIRef
, rTheExtURIRef
,
1241 eDecodeMechanism
, eCharset
);
1245 inline bool INetURLObject::translateToInternal(std::u16string_view
1247 OUString
& rTheIntURIRef
,
1250 rtl_TextEncoding eCharset
)
1252 return convertExtToInt(rTheExtURIRef
, rTheIntURIRef
,
1253 eDecodeMechanism
, eCharset
);
1256 inline bool INetURLObject::SetPass(std::u16string_view rThePassword
)
1258 return rThePassword
.empty() ?
1260 setPassword(rThePassword
, RTL_TEXTENCODING_UTF8
);
1263 inline bool INetURLObject::SetParam(std::u16string_view rTheQuery
,
1264 EncodeMechanism eMechanism
,
1265 rtl_TextEncoding eCharset
)
1267 if (rTheQuery
.empty())
1272 return setQuery(rTheQuery
, eMechanism
, eCharset
);
1275 inline bool INetURLObject::SetMark(std::u16string_view rTheFragment
,
1276 EncodeMechanism eMechanism
,
1277 rtl_TextEncoding eCharset
)
1279 return rTheFragment
.empty() ?
1281 setFragment(rTheFragment
, eMechanism
, eCharset
);
1285 inline OUString
INetURLObject::encode(std::u16string_view rText
, Part ePart
,
1286 EncodeMechanism eMechanism
,
1287 rtl_TextEncoding eCharset
)
1289 OUStringBuffer aBuf
;
1290 encodeText(aBuf
, rText
, ePart
, eMechanism
, eCharset
, false);
1291 return aBuf
.makeStringAndClear();
1295 inline OUString
INetURLObject::decode(std::u16string_view rText
,
1296 DecodeMechanism eMechanism
,
1297 rtl_TextEncoding eCharset
)
1299 return decode(rText
.data(), rText
.data() + rText
.size(),
1300 eMechanism
, eCharset
);
1305 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */