1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <tools/urlobj.hxx>
23 #include <tools/debug.hxx>
24 #include <tools/inetmime.hxx>
25 #include <tools/stream.hxx>
26 #include <com/sun/star/uno/Reference.hxx>
27 #include <com/sun/star/util/XStringWidth.hpp>
28 #include <o3tl/enumarray.hxx>
29 #include <osl/diagnose.h>
30 #include <osl/file.hxx>
31 #include <rtl/character.hxx>
32 #include <rtl/string.h>
33 #include <rtl/textenc.h>
34 #include <rtl/ustring.hxx>
35 #include <sal/log.hxx>
36 #include <sal/types.h>
42 #include <string_view>
46 #include <com/sun/star/uno/Sequence.hxx>
47 #include <comphelper/base64.hxx>
53 /* The URI grammar (using RFC 2234 conventions).
55 Constructs of the form
56 {reference <rule1> using rule2}
57 stand for a rule matching the given rule1 specified in the given reference,
58 encoded to URI syntax using rule2 (as specified in this URI grammar).
61 ; RFC 1738, RFC 2396, RFC 2732, private
62 login = [user [":" password] "@"] hostport
63 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
64 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
65 hostport = host [":" port]
66 host = incomplete-hostname / hostname / IPv4address / IPv6reference
67 incomplete-hostname = *(domainlabel ".") domainlabel
68 hostname = *(domainlabel ".") toplabel ["."]
69 domainlabel = alphanum [*(alphanum / "-") alphanum]
70 toplabel = ALPHA [*(alphanum / "-") alphanum]
71 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
72 IPv6reference = "[" hexpart [":" IPv4address] "]"
73 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
74 hexseq = hex4 *(":" hex4)
77 escaped = "%" HEXDIG HEXDIG
78 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
79 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
80 alphanum = ALPHA / DIGIT
81 unreserved = alphanum / mark
82 uric = escaped / reserved / unreserved
83 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
87 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
92 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
93 segment = *(pchar / ";")
96 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
97 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
99 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
103 mailto-url = "MAILTO:" [to] [headers]
104 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
105 headers = "?" header *("&" header)
106 header = hname "=" hvalue
107 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
108 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
111 ; private (see RFC 1738, RFC 2396)
112 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
113 segment = *(pchar / ";")
117 private-url = "PRIVATE:" path ["?" *uric]
118 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
122 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
123 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
124 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
128 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
129 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
133 slot-url = "SLOT:" path ["?" *uric]
134 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
138 macro-url = "MACRO:" path ["?" *uric]
139 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
143 javascript-url = "JAVASCRIPT:" *uric
147 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
148 mediatype = [type "/" subtype] *(";" attribute "=" value)
149 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
150 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
151 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
152 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
156 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
160 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
161 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
165 uno-url = ".UNO:" path ["?" *uric]
166 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
170 component-url = ".COMPONENT:" path ["?" *uric]
171 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
175 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
176 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
180 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
181 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
182 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
183 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
184 extension = ["!"] ["X-"] extoken ["=" exvalue]
185 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
186 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
194 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
195 opaque_part = uric_no_slash *uric
196 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
200 telnet-url = "TELNET://" login ["/"]
204 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
205 opaque_part = uric_no_slash *uric
206 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
210 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
215 unknown-url = scheme ":" 1*uric
216 scheme = ALPHA *(alphanum / "+" / "-" / ".")
219 ; private (http://ubiqx.org/cifs/Appendix-D.html):
220 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
221 segment = *(pchar / ";")
224 inline sal_Int32
INetURLObject::SubString::clear()
226 sal_Int32 nDelta
= -m_nLength
;
232 inline sal_Int32
INetURLObject::SubString::set(OUStringBuffer
& rString
,
233 OUString
const & rSubString
)
235 sal_Int32 nDelta
= rSubString
.getLength() - m_nLength
;
237 rString
.remove(m_nBegin
, m_nLength
);
238 rString
.insert(m_nBegin
, rSubString
);
240 m_nLength
= rSubString
.getLength();
244 inline sal_Int32
INetURLObject::SubString::set(OUString
& rString
,
245 OUString
const & rSubString
)
247 sal_Int32 nDelta
= rSubString
.getLength() - m_nLength
;
249 rString
= rString
.replaceAt(m_nBegin
, m_nLength
, rSubString
);
251 m_nLength
= rSubString
.getLength();
255 inline sal_Int32
INetURLObject::SubString::set(OUStringBuffer
& rString
,
256 OUString
const & rSubString
,
259 m_nBegin
= nTheBegin
;
260 return set(rString
, rSubString
);
263 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta
)
266 m_nBegin
= m_nBegin
+ nDelta
;
269 int INetURLObject::SubString::compare(SubString
const & rOther
,
270 OUStringBuffer
const & rThisString
,
271 OUStringBuffer
const & rOtherString
) const
273 sal_Int32 len
= std::min(m_nLength
, rOther
.m_nLength
);
274 sal_Unicode
const * p1
= rThisString
.getStr() + m_nBegin
;
275 sal_Unicode
const * end
= p1
+ len
;
276 sal_Unicode
const * p2
= rOtherString
.getStr() + rOther
.m_nBegin
;
280 } else if (*p1
> *p2
) {
286 return m_nLength
< rOther
.m_nLength
? -1
287 : m_nLength
> rOther
.m_nLength
? 1
291 struct INetURLObject::SchemeInfo
293 char const * m_pScheme
;
294 char const * m_pPrefix
;
301 bool m_bHierarchical
;
305 struct INetURLObject::PrefixInfo
307 enum class Kind
{ Official
, Internal
, External
}; // order is important!
309 char const * m_pPrefix
;
310 char const * m_pTranslatedPrefix
;
311 INetProtocol m_eScheme
;
316 inline INetURLObject::SchemeInfo
const &
317 INetURLObject::getSchemeInfo(INetProtocol eTheScheme
)
319 static o3tl::enumarray
<INetProtocol
, SchemeInfo
> const map
= {
321 "", "", false, false, false, false, false, false, false, false},
323 "ftp", "ftp://", true, true, false, true, true, true, true,
326 "http", "http://", true, false, false, false, true, true, true,
329 "file", "file://", true, false, false, false, true, false, true,
332 "mailto", "mailto:", false, false, false, false, false, false,
335 "vnd.sun.star.webdav", "vnd.sun.star.webdav://", true, false,
336 false, false, true, true, true, true},
338 "private", "private:", false, false, false, false, false, false,
341 "vnd.sun.star.help", "vnd.sun.star.help://", true, false, false,
342 false, false, false, true, true},
344 "https", "https://", true, false, false, false, true, true,
347 "slot", "slot:", false, false, false, false, false, false, false,
350 "macro", "macro:", false, false, false, false, false, false,
353 "javascript", "javascript:", false, false, false, false, false,
354 false, false, false},
356 "data", "data:", false, false, false, false, false, false, false,
359 "cid", "cid:", false, false, false, false, false, false, false,
362 "vnd.sun.star.hier", "vnd.sun.star.hier:", true, false, false,
363 false, false, false, true, false},
365 ".uno", ".uno:", false, false, false, false, false, false, false,
368 ".component", ".component:", false, false, false, false, false,
371 "vnd.sun.star.pkg", "vnd.sun.star.pkg://", true, false, false,
372 false, false, false, true, true},
374 "ldap", "ldap://", true, false, false, false, true, true,
377 "db", "db:", false, false, false, false, false, false, false,
380 "vnd.sun.star.cmd", "vnd.sun.star.cmd:", false, false, false,
381 false, false, false, false, false},
383 "telnet", "telnet://", true, true, false, true, true, true,
386 "vnd.sun.star.expand", "vnd.sun.star.expand:", false, false,
387 false, false, false, false, false, false},
389 "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", false, false, false,
390 false, false, false, true, false},
392 "", "", false, false, false, false, true, true, true, false },
394 "smb", "smb://", true, true, false, true, true, true, true,
397 "hid", "hid:", false, false, false, false, false, false, false,
400 "sftp", "sftp://", true, true, false, true, true, true, true,
403 "vnd.libreoffice.cmis", "vnd.libreoffice.cmis://", true, true,
404 false, false, true, false, true, true} };
405 return map
[eTheScheme
];
408 inline INetURLObject::SchemeInfo
const & INetURLObject::getSchemeInfo() const
410 return getSchemeInfo(m_eScheme
);
415 sal_Unicode
getHexDigit(sal_uInt32 nWeight
)
417 assert(nWeight
< 16);
418 static const sal_Unicode aDigits
[16]
419 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
421 return aDigits
[nWeight
];
427 inline void INetURLObject::appendEscape(OUStringBuffer
& rTheText
,
430 rTheText
.append( '%' );
431 rTheText
.append( getHexDigit(nOctet
>> 4) );
432 rTheText
.append( getHexDigit(nOctet
& 15) );
439 PA
= INetURLObject::PART_USER_PASSWORD
,
440 PD
= INetURLObject::PART_FPATH
,
441 PE
= INetURLObject::PART_AUTHORITY
,
442 PF
= INetURLObject::PART_REL_SEGMENT_EXTRA
,
443 PG
= INetURLObject::PART_URIC
,
444 PH
= INetURLObject::PART_HTTP_PATH
,
445 PI
= INetURLObject::PART_MESSAGE_ID_PATH
,
446 PJ
= INetURLObject::PART_MAILTO
,
447 PK
= INetURLObject::PART_PATH_BEFORE_QUERY
,
448 PL
= INetURLObject::PART_PCHAR
,
449 PM
= INetURLObject::PART_VISIBLE
,
450 PN
= INetURLObject::PART_VISIBLE_NONSPECIAL
,
451 PO
= INetURLObject::PART_UNO_PARAM_VALUE
,
452 PP
= INetURLObject::PART_UNAMBIGUOUS
,
453 PQ
= INetURLObject::PART_URIC_NO_SLASH
,
454 PR
= INetURLObject::PART_HTTP_QUERY
,
457 sal_uInt32
const aMustEncodeMap
[128]
458 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 /* ! */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
464 /* $ */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
466 /* & */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PK
+PL
+PM
+PN
+PO
+PQ
+PR
,
467 /* ' */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
468 /* ( */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
469 /* ) */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
470 /* * */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
471 /* + */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PQ
+PR
,
472 /* , */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PQ
+PR
,
473 /* - */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
474 /* . */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
475 /* / */ +PD
+PG
+PH
+PI
+PJ
+PK
+PM
+PN
+PO
,
476 /* 0 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
477 /* 1 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
478 /* 2 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
479 /* 3 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
480 /* 4 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
481 /* 5 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
482 /* 6 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
483 /* 7 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
484 /* 8 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
485 /* 9 */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
486 /* : */ +PD
+PE
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PQ
+PR
,
487 /* ; */ PA
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PM
+PQ
+PR
,
488 /* < */ +PI
+PM
+PN
+PP
,
489 /* = */ PA
+PD
+PE
+PF
+PG
+PH
+PK
+PL
+PM
+PN
+PQ
+PR
,
490 /* > */ +PI
+PM
+PN
+PP
,
491 /* ? */ +PG
+PM
+PO
+PQ
,
492 /* @ */ +PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
493 /* A */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
494 /* B */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
495 /* C */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
496 /* D */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
497 /* E */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
498 /* F */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
499 /* G */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
500 /* H */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
501 /* I */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
502 /* J */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
503 /* K */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
504 /* L */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
505 /* M */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
506 /* N */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
507 /* O */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
508 /* P */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
509 /* Q */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
510 /* R */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
511 /* S */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
512 /* T */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
513 /* U */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
514 /* V */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
515 /* W */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
516 /* X */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
517 /* Y */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
518 /* Z */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
519 /* [ */ PG
+PM
+PN
+PO
,
521 /* ] */ PG
+PM
+PN
+PO
,
523 /* _ */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
525 /* a */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
526 /* b */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
527 /* c */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
528 /* d */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
529 /* e */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
530 /* f */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
531 /* g */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
532 /* h */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
533 /* i */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
534 /* j */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
535 /* k */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
536 /* l */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
537 /* m */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
538 /* n */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
539 /* o */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
540 /* p */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
541 /* q */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
542 /* r */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
543 /* s */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
544 /* t */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
545 /* u */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
546 /* v */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
547 /* w */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
548 /* x */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
549 /* y */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
550 /* z */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
+PR
,
554 /* ~ */ PA
+PD
+PE
+PF
+PG
+PH
+PI
+PJ
+PK
+PL
+PM
+PN
+PO
+PP
+PQ
,
557 bool mustEncode(sal_uInt32 nUTF32
, INetURLObject::Part ePart
)
559 return !rtl::isAscii(nUTF32
) || !(aMustEncodeMap
[nUTF32
] & ePart
);
564 void INetURLObject::setInvalid()
566 m_aAbsURIRef
.setLength(0);
567 m_eScheme
= INetProtocol::NotValid
;
580 std::unique_ptr
<SvMemoryStream
> memoryStream(
581 void const * data
, sal_Int32 length
)
583 std::unique_ptr
<char[]> b(
585 memcpy(b
.get(), data
, length
);
586 std::unique_ptr
<SvMemoryStream
> s(
587 new SvMemoryStream(b
.get(), length
, StreamMode::READ
));
588 s
->ObjectOwnsMemory(true);
595 std::unique_ptr
<SvMemoryStream
> INetURLObject::getData() const
597 if( GetProtocol() != INetProtocol::Data
)
602 OUString sURLPath
= GetURLPath( DecodeMechanism::WithCharset
, RTL_TEXTENCODING_ISO_8859_1
);
603 sal_Unicode
const * pSkippedMediatype
= INetMIME::scanContentType( sURLPath
);
604 sal_Int32 nCharactersSkipped
= pSkippedMediatype
== nullptr
605 ? 0 : pSkippedMediatype
-sURLPath
.getStr();
606 if (sURLPath
.match(",", nCharactersSkipped
))
608 nCharactersSkipped
+= strlen(",");
609 OString
sURLEncodedData(
610 sURLPath
.getStr() + nCharactersSkipped
,
611 sURLPath
.getLength() - nCharactersSkipped
,
612 RTL_TEXTENCODING_ISO_8859_1
, OUSTRING_TO_OSTRING_CVTFLAGS
);
614 sURLEncodedData
.getStr(), sURLEncodedData
.getLength());
616 else if (sURLPath
.matchIgnoreAsciiCase(";base64,", nCharactersSkipped
))
618 nCharactersSkipped
+= strlen(";base64,");
619 OUString sBase64Data
= sURLPath
.copy( nCharactersSkipped
);
620 css::uno::Sequence
< sal_Int8
> aDecodedData
;
621 if (comphelper::Base64::decodeSomeChars(aDecodedData
, sBase64Data
)
622 == sBase64Data
.getLength())
625 aDecodedData
.getArray(), aDecodedData
.getLength());
633 FSysStyle
guessFSysStyleByCounting(sal_Unicode
const * pBegin
,
634 sal_Unicode
const * pEnd
,
640 "guessFSysStyleByCounting(): Bad style");
641 DBG_ASSERT(std::numeric_limits
< sal_Int32
>::min() < pBegin
- pEnd
642 && pEnd
- pBegin
<= std::numeric_limits
< sal_Int32
>::max(),
643 "guessFSysStyleByCounting(): Too big");
644 sal_Int32 nSlashCount
645 = (eStyle
& FSysStyle::Unix
) ?
646 0 : std::numeric_limits
< sal_Int32
>::min();
647 sal_Int32 nBackslashCount
648 = (eStyle
& FSysStyle::Dos
) ?
649 0 : std::numeric_limits
< sal_Int32
>::min();
650 while (pBegin
!= pEnd
)
661 return nSlashCount
>= nBackslashCount
?
662 FSysStyle::Unix
: FSysStyle::Dos
;
665 OUString
parseScheme(
666 sal_Unicode
const ** begin
, sal_Unicode
const * end
,
667 sal_uInt32 fragmentDelimiter
)
669 sal_Unicode
const * p
= *begin
;
670 if (p
!= end
&& rtl::isAsciiAlpha(*p
)) {
674 && (rtl::isAsciiAlphanumeric(*p
) || *p
== '+' || *p
== '-'
676 // #i34835# To avoid problems with Windows file paths like "C:\foo",
677 // do not accept generic schemes that are only one character long:
678 if (end
- p
> 1 && p
[0] == ':' && p
[1] != fragmentDelimiter
682 OUString(*begin
, p
- *begin
).toAsciiLowerCase());
692 bool INetURLObject::setAbsURIRef(OUString
const & rTheAbsURIRef
,
693 EncodeMechanism eMechanism
,
694 rtl_TextEncoding eCharset
,
698 sal_Unicode
const * pPos
= rTheAbsURIRef
.getStr();
699 sal_Unicode
const * pEnd
= pPos
+ rTheAbsURIRef
.getLength();
703 sal_uInt32 nFragmentDelimiter
= '#';
705 OUStringBuffer
aSynAbsURIRef(rTheAbsURIRef
.getLength()*2);
708 sal_Unicode
const * p
= pPos
;
709 PrefixInfo
const * pPrefix
= getPrefix(p
, pEnd
);
713 m_eScheme
= pPrefix
->m_eScheme
;
715 OUString
sTemp(OUString::createFromAscii(pPrefix
->m_eKind
716 >= PrefixInfo::Kind::External
?
717 pPrefix
->m_pTranslatedPrefix
:
718 pPrefix
->m_pPrefix
));
719 aSynAbsURIRef
.append(sTemp
);
720 m_aScheme
= SubString( 0, sTemp
.indexOf(':') );
726 // For scheme detection, the first (if any) of the following
727 // productions that matches the input string (and for which the
728 // appropriate style bit is set in eStyle, if applicable)
729 // determines the scheme. The productions use the auxiliary rules
731 // domain = label *("." label)
732 // label = alphanum [*(alphanum / "-") alphanum]
733 // alphanum = ALPHA / DIGIT
734 // IPv6reference = "[" IPv6address "]"
735 // IPv6address = hexpart [":" IPv4address]
736 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
737 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
738 // hexseq = hex4 *(":" hex4)
740 // UCS4 = <any UCS4 character>
742 // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
743 // <one of the known schemes, ignoring case> ":" *UCS4
744 // 2nd Production (mailto):
746 // 3rd Production (ftp):
747 // "FTP" 2*("." label) ["/" *UCS4]
748 // 4th Production (http):
749 // label 2*("." label) ["/" *UCS4]
750 // 5th Production (file):
751 // "//" (domain / IPv6reference) ["/" *UCS4]
752 // 6th Production (Unix file):
754 // 7th Production (UNC file; FSysStyle::Dos only):
755 // "\\" domain ["\" *UCS4]
756 // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
757 // ALPHA ":" ["/" *UCS4]
758 // 9th Production (DOS file; FSysStyle::Dos only):
759 // ALPHA ":" ["\" *UCS4]
760 // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
761 // after this else branch):
762 // <any scheme> ":" *UCS4
764 // For the 'non URL' file productions 6--9, the interpretation of
765 // the input as a (degenerate) URI is turned off, i.e., escape
766 // sequences and fragments are never detected as such, but are
767 // taken as literal characters.
769 sal_Unicode
const * p1
= pPos
;
770 if (eStyle
& FSysStyle::Dos
772 && rtl::isAsciiAlpha(p1
[0])
774 && (pEnd
- p1
== 2 || p1
[2] == '/' || p1
[2] == '\\'))
776 m_eScheme
= INetProtocol::File
; // 8th, 9th
777 eMechanism
= EncodeMechanism::All
;
778 nFragmentDelimiter
= 0x80000000;
780 else if (pEnd
- p1
>= 2 && p1
[0] == '/' && p1
[1] == '/')
783 if ((scanDomain(p1
, pEnd
) > 0 || scanIPv6reference(p1
, pEnd
))
784 && (p1
== pEnd
|| *p1
== '/'))
785 m_eScheme
= INetProtocol::File
; // 5th
787 else if (p1
!= pEnd
&& *p1
== '/')
789 m_eScheme
= INetProtocol::File
; // 6th
790 eMechanism
= EncodeMechanism::All
;
791 nFragmentDelimiter
= 0x80000000;
793 else if (eStyle
& FSysStyle::Dos
799 sal_Int32 n
= rtl_ustr_indexOfChar_WithLength(
800 p1
, pEnd
- p1
, '\\');
801 sal_Unicode
const * pe
= n
== -1 ? pEnd
: p1
+ n
;
803 parseHostOrNetBiosName(
804 p1
, pe
, EncodeMechanism::All
, RTL_TEXTENCODING_DONTKNOW
,
806 (scanDomain(p1
, pe
) > 0 && p1
== pe
)
809 m_eScheme
= INetProtocol::File
; // 7th
810 eMechanism
= EncodeMechanism::All
;
811 nFragmentDelimiter
= 0x80000000;
816 sal_Unicode
const * pDomainEnd
= p1
;
817 sal_uInt32 nLabels
= scanDomain(pDomainEnd
, pEnd
);
818 if (nLabels
> 0 && pDomainEnd
!= pEnd
&& *pDomainEnd
== '@')
821 if (scanDomain(pDomainEnd
, pEnd
) > 0
822 && pDomainEnd
== pEnd
)
823 m_eScheme
= INetProtocol::Mailto
; // 2nd
825 else if (nLabels
>= 3
826 && (pDomainEnd
== pEnd
|| *pDomainEnd
== '/'))
828 = pDomainEnd
- p1
>= 4
829 && (p1
[0] == 'f' || p1
[0] == 'F')
830 && (p1
[1] == 't' || p1
[1] == 'T')
831 && (p1
[2] == 'p' || p1
[2] == 'P')
833 INetProtocol::Ftp
: INetProtocol::Http
; // 3rd, 4th
838 if (m_eScheme
== INetProtocol::NotValid
) {
839 sal_Unicode
const * p1
= pPos
;
840 aSynScheme
= parseScheme(&p1
, pEnd
, nFragmentDelimiter
);
841 if (!aSynScheme
.isEmpty())
843 m_eScheme
= INetProtocol::Generic
;
848 if (bSmart
&& m_eScheme
== INetProtocol::NotValid
&& pPos
!= pEnd
849 && *pPos
!= nFragmentDelimiter
)
851 m_eScheme
= m_eSmartScheme
;
854 if (m_eScheme
== INetProtocol::NotValid
)
860 if (m_eScheme
!= INetProtocol::Generic
) {
861 aSynScheme
= OUString::createFromAscii(getSchemeInfo().m_pScheme
);
863 m_aScheme
.set(aSynAbsURIRef
, aSynScheme
, aSynAbsURIRef
.getLength());
864 aSynAbsURIRef
.append(':');
867 sal_uInt32 nSegmentDelimiter
= '/';
868 sal_uInt32 nAltSegmentDelimiter
= 0x80000000;
869 bool bSkippedInitialSlash
= false;
871 // Parse //<user>;AUTH=<auth>@<host>:<port> or
872 // //<user>:<password>@<host>:<port> or
874 if (getSchemeInfo().m_bAuthority
)
876 sal_Unicode
const * pUserInfoBegin
= nullptr;
877 sal_Unicode
const * pUserInfoEnd
= nullptr;
878 sal_Unicode
const * pHostPortBegin
= nullptr;
879 sal_Unicode
const * pHostPortEnd
= nullptr;
883 case INetProtocol::VndSunStarHelp
:
885 if (pEnd
- pPos
< 2 || *pPos
++ != '/' || *pPos
++ != '/')
890 aSynAbsURIRef
.append("//");
891 OUStringBuffer aSynAuthority
;
893 && *pPos
!= '/' && *pPos
!= '?'
894 && *pPos
!= nFragmentDelimiter
)
896 EscapeType eEscapeType
;
897 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
,
899 eCharset
, eEscapeType
);
900 appendUCS4(aSynAuthority
, nUTF32
, eEscapeType
,
901 PART_AUTHORITY
, eCharset
, false);
903 m_aHost
.set(aSynAbsURIRef
,
904 aSynAuthority
.makeStringAndClear(),
905 aSynAbsURIRef
.getLength());
906 // misusing m_aHost to store the authority
910 case INetProtocol::VndSunStarHier
:
912 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
915 aSynAbsURIRef
.append("//");
916 OUStringBuffer aSynAuthority
;
918 && *pPos
!= '/' && *pPos
!= '?'
919 && *pPos
!= nFragmentDelimiter
)
921 EscapeType eEscapeType
;
922 sal_uInt32 nUTF32
= getUTF32(pPos
,
927 appendUCS4(aSynAuthority
,
934 if (aSynAuthority
.isEmpty())
939 m_aHost
.set(aSynAbsURIRef
,
940 aSynAuthority
.makeStringAndClear(),
941 aSynAbsURIRef
.getLength());
942 // misusing m_aHost to store the authority
947 case INetProtocol::VndSunStarPkg
:
948 case INetProtocol::Cmis
:
950 if (pEnd
- pPos
< 2 || *pPos
++ != '/' || *pPos
++ != '/')
955 aSynAbsURIRef
.append("//");
956 OUStringBuffer
aSynUser(128);
958 bool bHasUser
= false;
959 while (pPos
< pEnd
&& *pPos
!= '@'
960 && *pPos
!= '/' && *pPos
!= '?'
961 && *pPos
!= nFragmentDelimiter
)
963 EscapeType eEscapeType
;
964 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
,
966 eCharset
, eEscapeType
);
967 appendUCS4(aSynUser
, nUTF32
, eEscapeType
,
968 PART_USER_PASSWORD
, eCharset
, false);
970 bHasUser
= *pPos
== '@';
973 OUStringBuffer
aSynAuthority(64);
976 aSynAuthority
= aSynUser
;
980 m_aUser
.set(aSynAbsURIRef
,
981 aSynUser
.makeStringAndClear(),
982 aSynAbsURIRef
.getLength());
983 aSynAbsURIRef
.append("@");
987 && *pPos
!= '/' && *pPos
!= '?'
988 && *pPos
!= nFragmentDelimiter
)
990 EscapeType eEscapeType
;
991 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
,
993 eCharset
, eEscapeType
);
994 appendUCS4(aSynAuthority
, nUTF32
, eEscapeType
,
995 PART_AUTHORITY
, eCharset
, false);
998 if (aSynAuthority
.isEmpty())
1003 m_aHost
.set(aSynAbsURIRef
,
1004 aSynAuthority
.makeStringAndClear(),
1005 aSynAbsURIRef
.getLength());
1006 // misusing m_aHost to store the authority
1010 case INetProtocol::File
:
1013 // The first of the following seven productions that
1014 // matches the rest of the input string (and for which the
1015 // appropriate style bit is set in eStyle, if applicable)
1016 // determines the used notation. The productions use the
1019 // domain = label *("." label)
1020 // label = alphanum [*(alphanum / "-") alphanum]
1021 // alphanum = ALPHA / DIGIT
1022 // IPv6reference = "[" IPv6address "]"
1023 // IPv6address = hexpart [":" IPv4address]
1024 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1025 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1026 // hexseq = hex4 *(":" hex4)
1028 // path = <any UCS4 character except "#">
1029 // UCS4 = <any UCS4 character>
1031 // 1st Production (URL):
1032 // "//" [domain / IPv6reference] ["/" *path]
1035 // "file://" domain "/" *path ["#" *UCS4]
1036 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
1038 sal_Unicode
const * p1
= pPos
+ 2;
1039 while (p1
!= pEnd
&& *p1
!= '/' &&
1040 *p1
!= nFragmentDelimiter
)
1044 if (parseHostOrNetBiosName(
1045 pPos
+ 2, p1
, EncodeMechanism::All
,
1046 RTL_TEXTENCODING_DONTKNOW
, true, nullptr))
1048 aSynAbsURIRef
.append("//");
1049 pHostPortBegin
= pPos
+ 2;
1056 // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
1057 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1059 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1060 // replacing "\" by "/" within <*path>
1061 // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
1062 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1064 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1065 // replacing "\" by "/" within <*path>
1066 // 4th Production (miscounted slashes):
1067 // "//" *path ["#" *UCS4]
1069 // "file:///" *path ["#" *UCS4]
1070 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
1072 aSynAbsURIRef
.append("//");
1074 bSkippedInitialSlash
= true;
1075 if ((eStyle
& FSysStyle::Dos
)
1077 && rtl::isAsciiAlpha(pPos
[0])
1079 && (pEnd
- pPos
== 2
1080 || pPos
[2] == '/' || pPos
[2] == '\\'))
1081 nAltSegmentDelimiter
= '\\';
1085 // 5th Production (Unix):
1086 // "/" *path ["#" *UCS4]
1088 // "file:///" *path ["#" *UCS4]
1089 if (pPos
< pEnd
&& *pPos
== '/')
1091 aSynAbsURIRef
.append("//");
1095 // 6th Production (UNC; FSysStyle::Dos only):
1096 // "\\" domain ["\" *path] ["#" *UCS4]
1098 // "file://" domain "/" *path ["#" *UCS4]
1099 // replacing "\" by "/" within <*path>
1100 if (eStyle
& FSysStyle::Dos
1105 sal_Unicode
const * p1
= pPos
+ 2;
1106 sal_Unicode
const * pe
= p1
;
1107 while (pe
< pEnd
&& *pe
!= '\\' &&
1108 *pe
!= nFragmentDelimiter
)
1113 parseHostOrNetBiosName(
1114 p1
, pe
, EncodeMechanism::All
,
1115 RTL_TEXTENCODING_DONTKNOW
, true, nullptr) ||
1116 (scanDomain(p1
, pe
) > 0 && p1
== pe
)
1119 aSynAbsURIRef
.append("//");
1120 pHostPortBegin
= pPos
+ 2;
1123 nSegmentDelimiter
= '\\';
1128 // 7th Production (Unix-like DOS; FSysStyle::Dos only):
1129 // ALPHA ":" ["/" *path] ["#" *UCS4]
1131 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1132 // replacing "\" by "/" within <*path>
1133 // 8th Production (DOS; FSysStyle::Dos only):
1134 // ALPHA ":" ["\" *path] ["#" *UCS4]
1136 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1137 // replacing "\" by "/" within <*path>
1138 if (eStyle
& FSysStyle::Dos
1140 && rtl::isAsciiAlpha(pPos
[0])
1142 && (pEnd
- pPos
== 2
1144 || pPos
[2] == '\\'))
1146 aSynAbsURIRef
.append("//");
1147 nAltSegmentDelimiter
= '\\';
1148 bSkippedInitialSlash
= true;
1152 // 9th Production (any):
1153 // *path ["#" *UCS4]
1155 // "file:///" *path ["#" *UCS4]
1156 // replacing the delimiter by "/" within <*path>. The
1157 // delimiter is that character from the set { "/", "\"}
1158 // which appears most often in <*path> (if FSysStyle::Unix
1159 // is not among the style bits, "/" is removed from the
1160 // set; if FSysStyle::Dos is not among the style bits, "\" is
1161 // removed from the set). If two or
1162 // more characters appear the same number of times, the
1163 // character mentioned first in that set is chosen. If
1164 // the first character of <*path> is the delimiter, that
1165 // character is not copied
1166 if (eStyle
& (FSysStyle::Unix
| FSysStyle::Dos
))
1168 aSynAbsURIRef
.append("//");
1169 switch (guessFSysStyleByCounting(pPos
, pEnd
, eStyle
))
1171 case FSysStyle::Unix
:
1172 nSegmentDelimiter
= '/';
1175 case FSysStyle::Dos
:
1176 nSegmentDelimiter
= '\\';
1181 "INetURLObject::setAbsURIRef():"
1182 " Bad guessFSysStyleByCounting");
1185 bSkippedInitialSlash
1186 = pPos
!= pEnd
&& *pPos
!= nSegmentDelimiter
;
1193 // For INetProtocol::File, allow an empty authority ("//") to be
1194 // missing if the following path starts with an explicit "/"
1195 // (Java is notorious in generating such file URLs, so be
1197 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
1200 && !(m_eScheme
== INetProtocol::File
1201 && pPos
!= pEnd
&& *pPos
== '/'))
1206 aSynAbsURIRef
.append("//");
1208 sal_Unicode
const * pAuthority
= pPos
;
1209 sal_uInt32 c
= getSchemeInfo().m_bQuery
? '?' : 0x80000000;
1210 while (pPos
< pEnd
&& *pPos
!= '/' && *pPos
!= c
1211 && *pPos
!= nFragmentDelimiter
)
1213 if (getSchemeInfo().m_bUser
)
1214 if (getSchemeInfo().m_bHost
)
1216 sal_Unicode
const * p1
= pAuthority
;
1217 while (p1
< pPos
&& *p1
!= '@')
1221 pHostPortBegin
= pAuthority
;
1222 pHostPortEnd
= pPos
;
1226 pUserInfoBegin
= pAuthority
;
1228 pHostPortBegin
= p1
+ 1;
1229 pHostPortEnd
= pPos
;
1234 pUserInfoBegin
= pAuthority
;
1235 pUserInfoEnd
= pPos
;
1237 else if (getSchemeInfo().m_bHost
)
1239 pHostPortBegin
= pAuthority
;
1240 pHostPortEnd
= pPos
;
1242 else if (pPos
!= pAuthority
)
1253 Part ePart
= PART_USER_PASSWORD
;
1254 bool bSupportsPassword
= getSchemeInfo().m_bPassword
;
1256 = !bSupportsPassword
&& getSchemeInfo().m_bAuth
;
1257 bool bHasAuth
= false;
1258 OUStringBuffer aSynUser
;
1259 sal_Unicode
const * p1
= pUserInfoBegin
;
1260 while (p1
< pUserInfoEnd
)
1262 EscapeType eEscapeType
;
1263 sal_uInt32 nUTF32
= getUTF32(p1
, pUserInfoEnd
,
1264 eMechanism
, eCharset
, eEscapeType
);
1265 if (eEscapeType
== EscapeType::NONE
)
1267 if (nUTF32
== ':' && bSupportsPassword
)
1272 else if (nUTF32
== ';' && bSupportsAuth
1273 && pUserInfoEnd
- p1
1274 > RTL_CONSTASCII_LENGTH("auth=")
1275 && INetMIME::equalIgnoreCase(
1277 p1
+ RTL_CONSTASCII_LENGTH("auth="),
1280 p1
+= RTL_CONSTASCII_LENGTH("auth=");
1285 appendUCS4(aSynUser
, nUTF32
, eEscapeType
, ePart
,
1288 m_aUser
.set(aSynAbsURIRef
, aSynUser
.makeStringAndClear(),
1289 aSynAbsURIRef
.getLength());
1292 if (bSupportsPassword
)
1294 aSynAbsURIRef
.append(':');
1295 OUStringBuffer aSynAuth
;
1296 while (p1
< pUserInfoEnd
)
1298 EscapeType eEscapeType
;
1299 sal_uInt32 nUTF32
= getUTF32(p1
, pUserInfoEnd
,
1300 eMechanism
, eCharset
,
1302 appendUCS4(aSynAuth
, nUTF32
, eEscapeType
,
1303 ePart
, eCharset
, false);
1305 m_aAuth
.set(aSynAbsURIRef
, aSynAuth
.makeStringAndClear(),
1306 aSynAbsURIRef
.getLength());
1310 aSynAbsURIRef
.append(";AUTH=");
1311 OUStringBuffer aSynAuth
;
1312 while (p1
< pUserInfoEnd
)
1314 EscapeType eEscapeType
;
1315 sal_uInt32 nUTF32
= getUTF32(p1
, pUserInfoEnd
,
1316 eMechanism
, eCharset
,
1318 if (!INetMIME::isIMAPAtomChar(nUTF32
))
1323 appendUCS4(aSynAuth
, nUTF32
, eEscapeType
,
1324 ePart
, eCharset
, false);
1326 m_aAuth
.set(aSynAbsURIRef
, aSynAuth
.makeStringAndClear(),
1327 aSynAbsURIRef
.getLength());
1331 aSynAbsURIRef
.append('@');
1336 sal_Unicode
const * pPort
= pHostPortEnd
;
1337 if ( getSchemeInfo().m_bPort
&& pHostPortBegin
< pHostPortEnd
)
1339 sal_Unicode
const * p1
= pHostPortEnd
- 1;
1340 while (p1
> pHostPortBegin
&& rtl::isAsciiDigit(*p1
))
1345 bool bNetBiosName
= false;
1348 case INetProtocol::File
:
1349 // If the host equals "LOCALHOST" (unencoded and ignoring
1350 // case), turn it into an empty host:
1351 if (INetMIME::equalIgnoreCase(pHostPortBegin
, pPort
,
1353 pHostPortBegin
= pPort
;
1354 bNetBiosName
= true;
1357 case INetProtocol::Ldap
:
1358 case INetProtocol::Smb
:
1359 if (pHostPortBegin
== pPort
&& pPort
!= pHostPortEnd
)
1366 if (pHostPortBegin
== pPort
)
1373 OUStringBuffer
aSynHost(64);
1374 if (!parseHostOrNetBiosName(
1375 pHostPortBegin
, pPort
, eMechanism
, eCharset
,
1376 bNetBiosName
, &aSynHost
))
1381 m_aHost
.set(aSynAbsURIRef
, aSynHost
.makeStringAndClear(),
1382 aSynAbsURIRef
.getLength());
1383 if (pPort
!= pHostPortEnd
)
1385 aSynAbsURIRef
.append(':');
1386 m_aPort
.set(aSynAbsURIRef
,
1387 OUString(pPort
+ 1, pHostPortEnd
- (pPort
+ 1)),
1388 aSynAbsURIRef
.getLength());
1394 OUStringBuffer aSynPath
;
1395 if (!parsePath(m_eScheme
, &pPos
, pEnd
, eMechanism
, eCharset
,
1396 bSkippedInitialSlash
, nSegmentDelimiter
,
1397 nAltSegmentDelimiter
,
1398 getSchemeInfo().m_bQuery
? '?' : 0x80000000,
1399 nFragmentDelimiter
, aSynPath
))
1404 m_aPath
.set(aSynAbsURIRef
, aSynPath
.makeStringAndClear(),
1405 aSynAbsURIRef
.getLength());
1408 if (getSchemeInfo().m_bQuery
&& pPos
< pEnd
&& *pPos
== '?')
1410 aSynAbsURIRef
.append('?');
1411 OUStringBuffer aSynQuery
;
1412 for (++pPos
; pPos
< pEnd
&& *pPos
!= nFragmentDelimiter
;)
1414 EscapeType eEscapeType
;
1415 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
,
1416 eMechanism
, eCharset
, eEscapeType
);
1417 appendUCS4(aSynQuery
, nUTF32
, eEscapeType
,
1418 PART_URIC
, eCharset
, true);
1420 m_aQuery
.set(aSynAbsURIRef
, aSynQuery
.makeStringAndClear(),
1421 aSynAbsURIRef
.getLength());
1424 // Parse #<fragment>
1425 if (pPos
< pEnd
&& *pPos
== nFragmentDelimiter
)
1427 aSynAbsURIRef
.append(sal_Unicode(nFragmentDelimiter
));
1428 OUStringBuffer aSynFragment
;
1429 for (++pPos
; pPos
< pEnd
;)
1431 EscapeType eEscapeType
;
1432 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
,
1433 eMechanism
, eCharset
, eEscapeType
);
1434 appendUCS4(aSynFragment
, nUTF32
, eEscapeType
, PART_URIC
,
1437 m_aFragment
.set(aSynAbsURIRef
, aSynFragment
.makeStringAndClear(),
1438 aSynAbsURIRef
.getLength());
1447 m_aAbsURIRef
= aSynAbsURIRef
;
1452 void INetURLObject::changeScheme(INetProtocol eTargetScheme
) {
1453 OUString aTmpStr
=m_aAbsURIRef
.toString();
1454 m_aAbsURIRef
.setLength(0);
1455 int oldSchemeLen
= 0;
1456 if (m_eScheme
== INetProtocol::Generic
)
1457 oldSchemeLen
= m_aScheme
.getLength();
1459 oldSchemeLen
= strlen(getSchemeInfo().m_pScheme
);
1460 m_eScheme
=eTargetScheme
;
1461 int newSchemeLen
=strlen(getSchemeInfo().m_pScheme
);
1462 m_aAbsURIRef
.appendAscii(getSchemeInfo().m_pScheme
);
1463 m_aAbsURIRef
.append(aTmpStr
.getStr()+oldSchemeLen
);
1464 int delta
=newSchemeLen
-oldSchemeLen
;
1474 bool INetURLObject::convertRelToAbs(OUString
const & rTheRelURIRef
,
1475 INetURLObject
& rTheAbsURIRef
,
1476 bool & rWasAbsolute
,
1477 EncodeMechanism eMechanism
,
1478 rtl_TextEncoding eCharset
,
1479 bool bIgnoreFragment
, bool bSmart
,
1480 bool bRelativeNonURIs
, FSysStyle eStyle
)
1483 sal_Unicode
const * p
= rTheRelURIRef
.getStr();
1484 sal_Unicode
const * pEnd
= p
+ rTheRelURIRef
.getLength();
1486 sal_Unicode
const * pPrefixBegin
= p
;
1487 PrefixInfo
const * pPrefix
= getPrefix(pPrefixBegin
, pEnd
);
1488 bool hasScheme
= pPrefix
!= nullptr;
1491 hasScheme
= !parseScheme(&pPrefixBegin
, pEnd
, '#').isEmpty();
1494 sal_uInt32 nSegmentDelimiter
= '/';
1495 sal_uInt32 nQueryDelimiter
1496 = !bSmart
|| getSchemeInfo().m_bQuery
? '?' : 0x80000000;
1497 sal_uInt32 nFragmentDelimiter
= '#';
1498 Part ePart
= PART_VISIBLE
;
1500 if (!hasScheme
&& bSmart
)
1502 // If the input matches any of the following productions (for which
1503 // the appropriate style bit is set in eStyle), it is assumed to be an
1504 // absolute file system path, rather than a relative URI reference.
1505 // (This is only a subset of the productions used for scheme detection
1506 // in INetURLObject::setAbsURIRef(), because most of those productions
1507 // interfere with the syntax of relative URI references.) The
1508 // productions use the auxiliary rules
1510 // domain = label *("." label)
1511 // label = alphanum [*(alphanum / "-") alphanum]
1512 // alphanum = ALPHA / DIGIT
1513 // UCS4 = <any UCS4 character>
1515 // 1st Production (UNC file; FSysStyle::Dos only):
1516 // "\\" domain ["\" *UCS4]
1517 // 2nd Production (Unix-like DOS file; FSysStyle::Dos only):
1518 // ALPHA ":" ["/" *UCS4]
1519 // 3rd Production (DOS file; FSysStyle::Dos only):
1520 // ALPHA ":" ["\" *UCS4]
1521 if (eStyle
& FSysStyle::Dos
)
1524 sal_Unicode
const * q
= p
;
1526 && rtl::isAsciiAlpha(q
[0])
1528 && (pEnd
- q
== 2 || q
[2] == '/' || q
[2] == '\\'))
1529 bFSys
= true; // 2nd, 3rd
1530 else if (pEnd
- q
>= 2 && q
[0] == '\\' && q
[1] == '\\')
1533 sal_Int32 n
= rtl_ustr_indexOfChar_WithLength(
1535 sal_Unicode
const * qe
= n
== -1 ? pEnd
: q
+ n
;
1536 if (parseHostOrNetBiosName(
1537 q
, qe
, EncodeMechanism::All
, RTL_TEXTENCODING_DONTKNOW
,
1540 bFSys
= true; // 1st
1545 INetURLObject aNewURI
;
1546 aNewURI
.setAbsURIRef(rTheRelURIRef
, eMechanism
,
1547 eCharset
, true, eStyle
);
1548 if (!aNewURI
.HasError())
1550 rTheAbsURIRef
= aNewURI
;
1551 rWasAbsolute
= true;
1557 // When the base URL is a file URL, accept relative file system paths
1558 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1559 // and "#"), as well as relative URIs using "/" as delimiter:
1560 if (m_eScheme
== INetProtocol::File
)
1561 switch (guessFSysStyleByCounting(p
, pEnd
, eStyle
))
1563 case FSysStyle::Unix
:
1564 nSegmentDelimiter
= '/';
1567 case FSysStyle::Dos
:
1568 nSegmentDelimiter
= '\\';
1569 bRelativeNonURIs
= true;
1573 OSL_FAIL("INetURLObject::convertRelToAbs():"
1574 " Bad guessFSysStyleByCounting");
1578 if (bRelativeNonURIs
)
1580 eMechanism
= EncodeMechanism::All
;
1581 nQueryDelimiter
= 0x80000000;
1582 nFragmentDelimiter
= 0x80000000;
1583 ePart
= PART_VISIBLE_NONSPECIAL
;
1587 // If the relative URI has the same scheme as the base URI, and that
1588 // scheme is hierarchical, then ignore its presence in the relative
1589 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1591 if (pPrefix
&& pPrefix
->m_eScheme
== m_eScheme
1592 && getSchemeInfo().m_bHierarchical
)
1595 while (p
!= pEnd
&& *p
++ != ':') ;
1597 rWasAbsolute
= hasScheme
;
1599 // Fast solution for non-relative URIs:
1602 INetURLObject
aNewURI(rTheRelURIRef
, eMechanism
, eCharset
);
1603 if (aNewURI
.HasError())
1605 rWasAbsolute
= false;
1609 if (bIgnoreFragment
)
1610 aNewURI
.clearFragment();
1611 rTheAbsURIRef
= aNewURI
;
1615 enum State
{ STATE_AUTH
, STATE_ABS_PATH
, STATE_REL_PATH
, STATE_FRAGMENT
,
1618 OUStringBuffer
aSynAbsURIRef(128);
1619 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1620 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1621 if (m_eScheme
!= INetProtocol::Generic
)
1623 aSynAbsURIRef
.appendAscii(getSchemeInfo().m_pScheme
);
1627 sal_Unicode
const * pSchemeBegin
1628 = m_aAbsURIRef
.getStr();
1629 sal_Unicode
const * pSchemeEnd
= pSchemeBegin
;
1630 while (pSchemeEnd
[0] != ':')
1634 aSynAbsURIRef
.append(pSchemeBegin
, pSchemeEnd
- pSchemeBegin
);
1636 aSynAbsURIRef
.append(':');
1638 State eState
= STATE_AUTH
;
1639 bool bSameDoc
= true;
1641 if (getSchemeInfo().m_bAuthority
)
1643 if (pEnd
- p
>= 2 && p
[0] == '/' && p
[1] == '/')
1645 aSynAbsURIRef
.append("//");
1647 eState
= STATE_ABS_PATH
;
1651 EscapeType eEscapeType
;
1653 = getUTF32(p
, pEnd
, eMechanism
,
1654 eCharset
, eEscapeType
);
1655 if (eEscapeType
== EscapeType::NONE
)
1657 if (nUTF32
== nSegmentDelimiter
)
1659 else if (nUTF32
== nFragmentDelimiter
)
1661 eState
= STATE_FRAGMENT
;
1665 appendUCS4(aSynAbsURIRef
, nUTF32
, eEscapeType
,
1666 PART_VISIBLE
, eCharset
, true);
1671 SubString
aAuthority(getAuthority());
1672 aSynAbsURIRef
.append(m_aAbsURIRef
.getStr()
1673 + aAuthority
.getBegin(),
1674 aAuthority
.getLength());
1678 if (eState
== STATE_AUTH
)
1681 eState
= STATE_DONE
;
1682 else if (*p
== nFragmentDelimiter
)
1685 eState
= STATE_FRAGMENT
;
1687 else if (*p
== nSegmentDelimiter
)
1690 eState
= STATE_ABS_PATH
;
1695 eState
= STATE_REL_PATH
;
1700 if (eState
== STATE_ABS_PATH
)
1702 aSynAbsURIRef
.append('/');
1703 eState
= STATE_DONE
;
1706 EscapeType eEscapeType
;
1708 = getUTF32(p
, pEnd
, eMechanism
, eCharset
, eEscapeType
);
1709 if (eEscapeType
== EscapeType::NONE
)
1711 if (nUTF32
== nFragmentDelimiter
)
1713 eState
= STATE_FRAGMENT
;
1716 else if (nUTF32
== nSegmentDelimiter
)
1719 appendUCS4(aSynAbsURIRef
, nUTF32
, eEscapeType
, ePart
,
1723 else if (eState
== STATE_REL_PATH
)
1725 if (!getSchemeInfo().m_bHierarchical
)
1727 // Detect cases where a relative input could not be made absolute
1728 // because the given base URL is broken (most probably because it is
1731 HasError(), "tools.urlobj",
1732 "cannot make <" << rTheRelURIRef
1733 << "> absolute against broken base <"
1734 << GetMainURL(DecodeMechanism::NONE
) << ">");
1735 rWasAbsolute
= false;
1739 sal_Unicode
const * pBasePathBegin
1740 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
1741 sal_Unicode
const * pBasePathEnd
1742 = pBasePathBegin
+ m_aPath
.getLength();
1743 while (pBasePathEnd
!= pBasePathBegin
)
1744 if (*(--pBasePathEnd
) == '/')
1750 sal_Int32 nPathBegin
= aSynAbsURIRef
.getLength();
1751 aSynAbsURIRef
.append(pBasePathBegin
, pBasePathEnd
- pBasePathBegin
);
1752 DBG_ASSERT(aSynAbsURIRef
.getLength() > nPathBegin
1753 && aSynAbsURIRef
[aSynAbsURIRef
.getLength() - 1] == '/',
1754 "INetURLObject::convertRelToAbs(): Bad base path");
1756 while (p
!= pEnd
&& *p
!= nQueryDelimiter
&& *p
!= nFragmentDelimiter
)
1761 || p
[1] == nSegmentDelimiter
1762 || p
[1] == nQueryDelimiter
1763 || p
[1] == nFragmentDelimiter
)
1766 if (p
!= pEnd
&& *p
== nSegmentDelimiter
)
1770 else if (pEnd
- p
>= 2
1773 || p
[2] == nSegmentDelimiter
1774 || p
[2] == nQueryDelimiter
1775 || p
[2] == nFragmentDelimiter
)
1776 && aSynAbsURIRef
.getLength() - nPathBegin
> 1)
1779 if (p
!= pEnd
&& *p
== nSegmentDelimiter
)
1782 sal_Int32 i
= aSynAbsURIRef
.getLength() - 2;
1783 while (i
> nPathBegin
&& aSynAbsURIRef
[i
] != '/')
1785 aSynAbsURIRef
.setLength(i
+ 1);
1787 aSynAbsURIRef
.getLength() > nPathBegin
1788 && aSynAbsURIRef
[aSynAbsURIRef
.getLength() - 1] == '/',
1789 "INetURLObject::convertRelToAbs(): Bad base path");
1795 && *p
!= nSegmentDelimiter
1796 && *p
!= nQueryDelimiter
1797 && *p
!= nFragmentDelimiter
)
1799 EscapeType eEscapeType
;
1801 = getUTF32(p
, pEnd
, eMechanism
,
1802 eCharset
, eEscapeType
);
1803 appendUCS4(aSynAbsURIRef
, nUTF32
, eEscapeType
, ePart
,
1806 if (p
!= pEnd
&& *p
== nSegmentDelimiter
)
1808 aSynAbsURIRef
.append('/');
1813 while (p
!= pEnd
&& *p
!= nFragmentDelimiter
)
1815 EscapeType eEscapeType
;
1817 = getUTF32(p
, pEnd
, eMechanism
, eCharset
, eEscapeType
);
1818 appendUCS4(aSynAbsURIRef
, nUTF32
, eEscapeType
, ePart
,
1823 eState
= STATE_DONE
;
1827 eState
= STATE_FRAGMENT
;
1832 aSynAbsURIRef
.append(m_aAbsURIRef
.getStr() + m_aPath
.getBegin(),
1833 m_aPath
.getLength());
1834 if (m_aQuery
.isPresent())
1835 aSynAbsURIRef
.append(m_aAbsURIRef
.getStr()
1836 + m_aQuery
.getBegin() - 1,
1837 m_aQuery
.getLength() + 1);
1840 if (eState
== STATE_FRAGMENT
&& !bIgnoreFragment
)
1842 aSynAbsURIRef
.append('#');
1845 EscapeType eEscapeType
;
1847 = getUTF32(p
, pEnd
, eMechanism
, eCharset
, eEscapeType
);
1848 appendUCS4(aSynAbsURIRef
, nUTF32
, eEscapeType
,
1849 PART_VISIBLE
, eCharset
, true);
1853 INetURLObject
aNewURI(aSynAbsURIRef
.makeStringAndClear());
1854 if (aNewURI
.HasError())
1856 // Detect cases where a relative input could not be made absolute
1857 // because the given base URL is broken (most probably because it is
1860 HasError(), "tools.urlobj",
1861 "cannot make <" << rTheRelURIRef
1862 << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE
)
1864 rWasAbsolute
= false;
1868 rTheAbsURIRef
= aNewURI
;
1872 bool INetURLObject::convertAbsToRel(OUString
const & rTheAbsURIRef
,
1873 OUString
& rTheRelURIRef
,
1874 EncodeMechanism eEncodeMechanism
,
1875 DecodeMechanism eDecodeMechanism
,
1876 rtl_TextEncoding eCharset
,
1877 FSysStyle eStyle
) const
1879 // Check for hierarchical base URL:
1880 if (!getSchemeInfo().m_bHierarchical
)
1882 rTheRelURIRef
= decode(rTheAbsURIRef
, eDecodeMechanism
, eCharset
);
1886 // Convert the input (absolute or relative URI ref) to an absolute URI
1888 INetURLObject aSubject
;
1890 if (!convertRelToAbs(rTheAbsURIRef
, aSubject
, bWasAbsolute
,
1891 eEncodeMechanism
, eCharset
, false, false, false,
1894 rTheRelURIRef
= decode(rTheAbsURIRef
, eDecodeMechanism
, eCharset
);
1898 // Check for differing scheme or authority parts:
1899 if ((m_aScheme
.compare(
1900 aSubject
.m_aScheme
, m_aAbsURIRef
, aSubject
.m_aAbsURIRef
)
1902 || (m_aUser
.compare(
1903 aSubject
.m_aUser
, m_aAbsURIRef
, aSubject
.m_aAbsURIRef
)
1905 || (m_aAuth
.compare(
1906 aSubject
.m_aAuth
, m_aAbsURIRef
, aSubject
.m_aAbsURIRef
)
1908 || (m_aHost
.compare(
1909 aSubject
.m_aHost
, m_aAbsURIRef
, aSubject
.m_aAbsURIRef
)
1911 || (m_aPort
.compare(
1912 aSubject
.m_aPort
, m_aAbsURIRef
, aSubject
.m_aAbsURIRef
)
1915 rTheRelURIRef
= aSubject
.GetMainURL(eDecodeMechanism
, eCharset
);
1919 sal_Unicode
const * pBasePathBegin
1920 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
1921 sal_Unicode
const * pBasePathEnd
= pBasePathBegin
+ m_aPath
.getLength();
1922 sal_Unicode
const * pSubjectPathBegin
1923 = aSubject
.m_aAbsURIRef
.getStr() + aSubject
.m_aPath
.getBegin();
1924 sal_Unicode
const * pSubjectPathEnd
1925 = pSubjectPathBegin
+ aSubject
.m_aPath
.getLength();
1927 // Make nMatch point past the last matching slash, or past the end of the
1928 // paths, in case they are equal:
1929 sal_Unicode
const * pSlash
= nullptr;
1930 sal_Unicode
const * p1
= pBasePathBegin
;
1931 sal_Unicode
const * p2
= pSubjectPathBegin
;
1934 if (p1
== pBasePathEnd
|| p2
== pSubjectPathEnd
)
1936 if (p1
== pBasePathEnd
&& p2
== pSubjectPathEnd
)
1941 sal_Unicode c
= *p1
++;
1949 // One of the paths does not start with '/':
1950 rTheRelURIRef
= aSubject
.GetMainURL(eDecodeMechanism
, eCharset
);
1953 sal_Int32 nMatch
= pSlash
- pBasePathBegin
;
1955 // If the two URLs are DOS file URLs starting with different volumes
1956 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1957 // relative (it could be, but some people do not like that):
1958 if (m_eScheme
== INetProtocol::File
1960 && hasDosVolume(eStyle
)
1961 && aSubject
.hasDosVolume(eStyle
)) //TODO! ok to use eStyle for these?
1963 rTheRelURIRef
= aSubject
.GetMainURL(eDecodeMechanism
, eCharset
);
1967 // For every slash in the base path after nMatch, a prefix of "../" is
1968 // added to the new relative URL (if the common prefix of the two paths is
1969 // only "/"---but see handling of file URLs above---, the complete subject
1970 // path could go into the new relative URL instead, but some people don't
1972 OUStringBuffer aSynRelURIRef
;
1973 for (sal_Unicode
const * p
= pBasePathBegin
+ nMatch
; p
!= pBasePathEnd
;
1977 aSynRelURIRef
.append("../");
1980 // If the new relative URL would start with "//" (i.e., it would be
1981 // mistaken for a relative URL starting with an authority part), or if the
1982 // new relative URL would neither be empty nor start with <"/"> nor start
1983 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1984 // with a scheme part), then the new relative URL is prefixed with "./":
1985 if (aSynRelURIRef
.isEmpty())
1987 if (pSubjectPathEnd
- pSubjectPathBegin
>= nMatch
+ 2
1988 && pSubjectPathBegin
[nMatch
] == '/'
1989 && pSubjectPathBegin
[nMatch
+ 1] == '/')
1991 aSynRelURIRef
.append("./");
1995 for (sal_Unicode
const * p
= pSubjectPathBegin
+ nMatch
;
1996 p
!= pSubjectPathEnd
&& *p
!= '/'; ++p
)
1998 if (mustEncode(*p
, PART_REL_SEGMENT_EXTRA
))
2000 aSynRelURIRef
.append("./");
2007 // The remainder of the subject path, starting at nMatch, is appended to
2008 // the new relative URL:
2009 aSynRelURIRef
.append(decode(pSubjectPathBegin
+ nMatch
, pSubjectPathEnd
,
2010 eDecodeMechanism
, eCharset
));
2012 // If the subject has defined query or fragment parts, they are appended
2013 // to the new relative URL:
2014 if (aSubject
.m_aQuery
.isPresent())
2016 aSynRelURIRef
.append('?');
2017 aSynRelURIRef
.append(aSubject
.decode(aSubject
.m_aQuery
,
2018 eDecodeMechanism
, eCharset
));
2020 if (aSubject
.m_aFragment
.isPresent())
2022 aSynRelURIRef
.append('#');
2023 aSynRelURIRef
.append(aSubject
.decode(aSubject
.m_aFragment
,
2024 eDecodeMechanism
, eCharset
));
2027 rTheRelURIRef
= aSynRelURIRef
.makeStringAndClear();
2032 bool INetURLObject::convertIntToExt(OUString
const & rTheIntURIRef
,
2033 OUString
& rTheExtURIRef
,
2034 DecodeMechanism eDecodeMechanism
,
2035 rtl_TextEncoding eCharset
)
2037 OUString
aSynExtURIRef(encodeText(rTheIntURIRef
, PART_VISIBLE
,
2038 EncodeMechanism::NotCanonical
, eCharset
, true));
2039 sal_Unicode
const * pBegin
= aSynExtURIRef
.getStr();
2040 sal_Unicode
const * pEnd
= pBegin
+ aSynExtURIRef
.getLength();
2041 sal_Unicode
const * p
= pBegin
;
2042 PrefixInfo
const * pPrefix
= getPrefix(p
, pEnd
);
2043 bool bConvert
= pPrefix
&& pPrefix
->m_eKind
== PrefixInfo::Kind::Internal
;
2047 aSynExtURIRef
.replaceAt(0, p
- pBegin
,
2048 OUString::createFromAscii(pPrefix
->m_pTranslatedPrefix
));
2050 rTheExtURIRef
= decode(aSynExtURIRef
, eDecodeMechanism
, eCharset
);
2055 bool INetURLObject::convertExtToInt(OUString
const & rTheExtURIRef
,
2056 OUString
& rTheIntURIRef
,
2057 DecodeMechanism eDecodeMechanism
,
2058 rtl_TextEncoding eCharset
)
2060 OUString
aSynIntURIRef(encodeText(rTheExtURIRef
, PART_VISIBLE
,
2061 EncodeMechanism::NotCanonical
, eCharset
, true));
2062 sal_Unicode
const * pBegin
= aSynIntURIRef
.getStr();
2063 sal_Unicode
const * pEnd
= pBegin
+ aSynIntURIRef
.getLength();
2064 sal_Unicode
const * p
= pBegin
;
2065 PrefixInfo
const * pPrefix
= getPrefix(p
, pEnd
);
2066 bool bConvert
= pPrefix
&& pPrefix
->m_eKind
== PrefixInfo::Kind::External
;
2070 aSynIntURIRef
.replaceAt(0, p
- pBegin
,
2071 OUString::createFromAscii(pPrefix
->m_pTranslatedPrefix
));
2073 rTheIntURIRef
= decode(aSynIntURIRef
, eDecodeMechanism
, eCharset
);
2078 INetURLObject::PrefixInfo
const * INetURLObject::getPrefix(sal_Unicode
const *& rBegin
,
2079 sal_Unicode
const * pEnd
)
2081 static PrefixInfo
const aMap
[]
2082 = { // dummy entry at front needed, because pLast may point here:
2083 { nullptr, nullptr, INetProtocol::NotValid
, PrefixInfo::Kind::Internal
},
2084 { ".component:", "staroffice.component:", INetProtocol::Component
,
2085 PrefixInfo::Kind::Internal
},
2086 { ".uno:", "staroffice.uno:", INetProtocol::Uno
,
2087 PrefixInfo::Kind::Internal
},
2088 { "cid:", nullptr, INetProtocol::Cid
, PrefixInfo::Kind::Official
},
2089 { "data:", nullptr, INetProtocol::Data
, PrefixInfo::Kind::Official
},
2090 { "db:", "staroffice.db:", INetProtocol::Db
, PrefixInfo::Kind::Internal
},
2091 { "file:", nullptr, INetProtocol::File
, PrefixInfo::Kind::Official
},
2092 { "ftp:", nullptr, INetProtocol::Ftp
, PrefixInfo::Kind::Official
},
2093 { "hid:", "staroffice.hid:", INetProtocol::Hid
,
2094 PrefixInfo::Kind::Internal
},
2095 { "http:", nullptr, INetProtocol::Http
, PrefixInfo::Kind::Official
},
2096 { "https:", nullptr, INetProtocol::Https
, PrefixInfo::Kind::Official
},
2097 { "javascript:", nullptr, INetProtocol::Javascript
, PrefixInfo::Kind::Official
},
2098 { "ldap:", nullptr, INetProtocol::Ldap
, PrefixInfo::Kind::Official
},
2099 { "macro:", "staroffice.macro:", INetProtocol::Macro
,
2100 PrefixInfo::Kind::Internal
},
2101 { "mailto:", nullptr, INetProtocol::Mailto
, PrefixInfo::Kind::Official
},
2102 { "private:", "staroffice.private:", INetProtocol::PrivSoffice
,
2103 PrefixInfo::Kind::Internal
},
2104 { "private:factory/", "staroffice.factory:",
2105 INetProtocol::PrivSoffice
, PrefixInfo::Kind::Internal
},
2106 { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice
,
2107 PrefixInfo::Kind::Internal
},
2108 { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice
,
2109 PrefixInfo::Kind::Internal
},
2110 { "private:searchfolder:", "staroffice.searchfolder:",
2111 INetProtocol::PrivSoffice
, PrefixInfo::Kind::Internal
},
2112 { "private:trashcan:", "staroffice.trashcan:",
2113 INetProtocol::PrivSoffice
, PrefixInfo::Kind::Internal
},
2114 { "sftp:", nullptr, INetProtocol::Sftp
, PrefixInfo::Kind::Official
},
2115 { "slot:", "staroffice.slot:", INetProtocol::Slot
,
2116 PrefixInfo::Kind::Internal
},
2117 { "smb:", nullptr, INetProtocol::Smb
, PrefixInfo::Kind::Official
},
2118 { "staroffice.component:", ".component:", INetProtocol::Component
,
2119 PrefixInfo::Kind::External
},
2120 { "staroffice.db:", "db:", INetProtocol::Db
, PrefixInfo::Kind::External
},
2121 { "staroffice.factory:", "private:factory/",
2122 INetProtocol::PrivSoffice
, PrefixInfo::Kind::External
},
2123 { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice
,
2124 PrefixInfo::Kind::External
},
2125 { "staroffice.hid:", "hid:", INetProtocol::Hid
,
2126 PrefixInfo::Kind::External
},
2127 { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice
,
2128 PrefixInfo::Kind::External
},
2129 { "staroffice.macro:", "macro:", INetProtocol::Macro
,
2130 PrefixInfo::Kind::External
},
2131 { "staroffice.private:", "private:", INetProtocol::PrivSoffice
,
2132 PrefixInfo::Kind::External
},
2133 { "staroffice.searchfolder:", "private:searchfolder:",
2134 INetProtocol::PrivSoffice
, PrefixInfo::Kind::External
},
2135 { "staroffice.slot:", "slot:", INetProtocol::Slot
,
2136 PrefixInfo::Kind::External
},
2137 { "staroffice.trashcan:", "private:trashcan:",
2138 INetProtocol::PrivSoffice
, PrefixInfo::Kind::External
},
2139 { "staroffice.uno:", ".uno:", INetProtocol::Uno
,
2140 PrefixInfo::Kind::External
},
2141 { "staroffice:", "private:", INetProtocol::PrivSoffice
,
2142 PrefixInfo::Kind::External
},
2143 { "telnet:", nullptr, INetProtocol::Telnet
, PrefixInfo::Kind::Official
},
2144 { "vnd.libreoffice.cmis:", nullptr, INetProtocol::Cmis
, PrefixInfo::Kind::Internal
},
2145 { "vnd.sun.star.cmd:", nullptr, INetProtocol::VndSunStarCmd
,
2146 PrefixInfo::Kind::Official
},
2147 { "vnd.sun.star.expand:", nullptr, INetProtocol::VndSunStarExpand
,
2148 PrefixInfo::Kind::Official
},
2149 { "vnd.sun.star.help:", nullptr, INetProtocol::VndSunStarHelp
,
2150 PrefixInfo::Kind::Official
},
2151 { "vnd.sun.star.hier:", nullptr, INetProtocol::VndSunStarHier
,
2152 PrefixInfo::Kind::Official
},
2153 { "vnd.sun.star.pkg:", nullptr, INetProtocol::VndSunStarPkg
,
2154 PrefixInfo::Kind::Official
},
2155 { "vnd.sun.star.tdoc:", nullptr, INetProtocol::VndSunStarTdoc
,
2156 PrefixInfo::Kind::Official
},
2157 { "vnd.sun.star.webdav:", nullptr, INetProtocol::VndSunStarWebdav
,
2158 PrefixInfo::Kind::Official
}
2160 /* This list needs to be sorted, or you'll introduce serious bugs */
2162 PrefixInfo
const * pFirst
= aMap
+ 1;
2163 PrefixInfo
const * pLast
= aMap
+ sizeof aMap
/ sizeof (PrefixInfo
) - 1;
2164 PrefixInfo
const * pMatch
= nullptr;
2165 sal_Unicode
const * pMatched
= rBegin
;
2166 sal_Unicode
const * p
= rBegin
;
2168 for (; pFirst
< pLast
; ++i
)
2170 if (pFirst
->m_pPrefix
[i
] == '\0')
2177 sal_uInt32 nChar
= rtl::toAsciiLowerCase(*p
++);
2178 while (pFirst
<= pLast
&& static_cast<unsigned char>(pFirst
->m_pPrefix
[i
]) < nChar
)
2180 while (pFirst
<= pLast
&& static_cast<unsigned char>(pLast
->m_pPrefix
[i
]) > nChar
)
2183 if (pFirst
== pLast
)
2185 char const * q
= pFirst
->m_pPrefix
+ i
;
2186 while (p
< pEnd
&& *q
!= '\0'
2187 && rtl::toAsciiLowerCase(*p
) == static_cast<unsigned char>(*q
))
2202 sal_Int32
INetURLObject::getAuthorityBegin() const
2204 DBG_ASSERT(getSchemeInfo().m_bAuthority
,
2205 "INetURLObject::getAuthority(): Bad scheme");
2207 if (m_aUser
.isPresent())
2208 nBegin
= m_aUser
.getBegin();
2209 else if (m_aHost
.isPresent())
2210 nBegin
= m_aHost
.getBegin();
2212 nBegin
= m_aPath
.getBegin();
2213 nBegin
-= RTL_CONSTASCII_LENGTH("//");
2214 DBG_ASSERT(m_aAbsURIRef
[nBegin
] == '/' && m_aAbsURIRef
[nBegin
+ 1] == '/',
2215 "INetURLObject::getAuthority(): Bad authority");
2219 INetURLObject::SubString
INetURLObject::getAuthority() const
2221 sal_Int32 nBegin
= getAuthorityBegin();
2222 sal_Int32 nEnd
= m_aPort
.isPresent() ? m_aPort
.getEnd() :
2223 m_aHost
.isPresent() ? m_aHost
.getEnd() :
2224 m_aAuth
.isPresent() ? m_aAuth
.getEnd() :
2225 m_aUser
.isPresent() ? m_aUser
.getEnd() :
2226 nBegin
+ RTL_CONSTASCII_LENGTH("//");
2227 return SubString(nBegin
, nEnd
- nBegin
);
2230 bool INetURLObject::setUser(OUString
const & rTheUser
,
2231 rtl_TextEncoding eCharset
)
2234 !getSchemeInfo().m_bUser
2240 OUString
aNewUser(encodeText(rTheUser
, PART_USER_PASSWORD
,
2241 EncodeMechanism::WasEncoded
, eCharset
, false));
2243 if (m_aUser
.isPresent())
2244 nDelta
= m_aUser
.set(m_aAbsURIRef
, aNewUser
);
2245 else if (m_aHost
.isPresent())
2247 m_aAbsURIRef
.insert(m_aHost
.getBegin(), u
'@');
2248 nDelta
= m_aUser
.set(m_aAbsURIRef
, aNewUser
, m_aHost
.getBegin()) + 1;
2250 else if (getSchemeInfo().m_bHost
)
2253 nDelta
= m_aUser
.set(m_aAbsURIRef
, aNewUser
, m_aPath
.getBegin());
2259 m_aFragment
+= nDelta
;
2265 void lcl_Erase(OUStringBuffer
&rBuf
, sal_Int32 index
, sal_Int32 count
)
2267 OUString
sTemp(rBuf
.makeStringAndClear());
2268 rBuf
.append(sTemp
.replaceAt(index
, count
, OUString()));
2272 bool INetURLObject::clearPassword()
2274 if (!getSchemeInfo().m_bPassword
)
2276 if (m_aAuth
.isPresent())
2278 lcl_Erase(m_aAbsURIRef
, m_aAuth
.getBegin() - 1,
2279 m_aAuth
.getLength() + 1);
2280 sal_Int32 nDelta
= m_aAuth
.clear() - 1;
2285 m_aFragment
+= nDelta
;
2290 bool INetURLObject::setPassword(OUString
const & rThePassword
,
2291 rtl_TextEncoding eCharset
)
2293 if (!getSchemeInfo().m_bPassword
)
2295 OUString
aNewAuth(encodeText(rThePassword
, PART_USER_PASSWORD
,
2296 EncodeMechanism::WasEncoded
, eCharset
, false));
2298 if (m_aAuth
.isPresent())
2299 nDelta
= m_aAuth
.set(m_aAbsURIRef
, aNewAuth
);
2300 else if (m_aUser
.isPresent())
2302 m_aAbsURIRef
.insert(m_aUser
.getEnd(), u
':');
2304 = m_aAuth
.set(m_aAbsURIRef
, aNewAuth
, m_aUser
.getEnd() + 1) + 1;
2306 else if (m_aHost
.isPresent())
2308 m_aAbsURIRef
.insert(m_aHost
.getBegin(), ":@" );
2309 m_aUser
.set(m_aAbsURIRef
, OUString(), m_aHost
.getBegin());
2311 = m_aAuth
.set(m_aAbsURIRef
, aNewAuth
, m_aHost
.getBegin() + 1) + 2;
2313 else if (getSchemeInfo().m_bHost
)
2317 m_aAbsURIRef
.insert(m_aPath
.getBegin(), u
':');
2318 m_aUser
.set(m_aAbsURIRef
, OUString(), m_aPath
.getBegin());
2320 = m_aAuth
.set(m_aAbsURIRef
, aNewAuth
, m_aPath
.getBegin() + 1) + 1;
2326 m_aFragment
+= nDelta
;
2331 bool INetURLObject::parseHost(sal_Unicode
const *& rBegin
, sal_Unicode
const * pEnd
,
2332 OUString
& rCanonic
)
2334 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2335 // IPv4 address directly follows the abbreviating "::". The ABNF in
2336 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2337 // mentions "::13:1.68.3". This algorithm accepts both variants:
2338 enum State
{ STATE_INITIAL
, STATE_LABEL
, STATE_LABEL_HYPHEN
,
2339 STATE_LABEL_DOT
, STATE_TOPLABEL
, STATE_TOPLABEL_HYPHEN
,
2340 STATE_TOPLABEL_DOT
, STATE_IP4
, STATE_IP4_DOT
, STATE_IP6
,
2341 STATE_IP6_COLON
, STATE_IP6_2COLON
, STATE_IP6_3COLON
,
2342 STATE_IP6_HEXSEQ1
, STATE_IP6_HEXSEQ1_COLON
,
2343 STATE_IP6_HEXSEQ1_MAYBE_IP4
, STATE_IP6_HEXSEQ2
,
2344 STATE_IP6_HEXSEQ2_COLON
, STATE_IP6_HEXSEQ2_MAYBE_IP4
,
2345 STATE_IP6_IP4
, STATE_IP6_IP4_DOT
, STATE_IP6_DONE
};
2346 OUStringBuffer
aTheCanonic(32);
2347 sal_uInt32 nNumber
= 0;
2350 State eState
= STATE_INITIAL
;
2351 sal_Unicode
const * p
= rBegin
;
2352 for (; p
!= pEnd
; ++p
)
2358 aTheCanonic
.append('[');
2361 else if (rtl::isAsciiAlpha(*p
) || *p
== '_')
2362 eState
= STATE_TOPLABEL
;
2363 else if (rtl::isAsciiDigit(*p
))
2365 nNumber
= INetMIME::getWeight(*p
);
2376 eState
= STATE_LABEL_DOT
;
2378 eState
= STATE_LABEL_HYPHEN
;
2379 else if (!rtl::isAsciiAlphanumeric(*p
) && *p
!= '_')
2383 case STATE_LABEL_HYPHEN
:
2384 if (rtl::isAsciiAlphanumeric(*p
) || *p
== '_')
2385 eState
= STATE_LABEL
;
2390 case STATE_LABEL_DOT
:
2391 if (rtl::isAsciiAlpha(*p
) || *p
== '_')
2392 eState
= STATE_TOPLABEL
;
2393 else if (rtl::isAsciiDigit(*p
))
2394 eState
= STATE_LABEL
;
2399 case STATE_TOPLABEL
:
2401 eState
= STATE_TOPLABEL_DOT
;
2403 eState
= STATE_TOPLABEL_HYPHEN
;
2404 else if (!rtl::isAsciiAlphanumeric(*p
) && *p
!= '_')
2408 case STATE_TOPLABEL_HYPHEN
:
2409 if (rtl::isAsciiAlphanumeric(*p
) || *p
== '_')
2410 eState
= STATE_TOPLABEL
;
2415 case STATE_TOPLABEL_DOT
:
2416 if (rtl::isAsciiAlpha(*p
) || *p
== '_')
2417 eState
= STATE_TOPLABEL
;
2418 else if (rtl::isAsciiDigit(*p
))
2419 eState
= STATE_LABEL
;
2428 aTheCanonic
.append( OUString::number(nNumber
) );
2429 aTheCanonic
.append( '.' );
2431 eState
= STATE_IP4_DOT
;
2434 eState
= STATE_LABEL_DOT
;
2436 eState
= STATE_LABEL_HYPHEN
;
2437 else if (rtl::isAsciiAlpha(*p
) || *p
== '_')
2438 eState
= STATE_LABEL
;
2439 else if (rtl::isAsciiDigit(*p
))
2442 nNumber
= 10 * nNumber
+ INetMIME::getWeight(*p
);
2446 eState
= STATE_LABEL
;
2452 if (rtl::isAsciiAlpha(*p
) || *p
== '_')
2453 eState
= STATE_TOPLABEL
;
2454 else if (rtl::isAsciiDigit(*p
))
2456 nNumber
= INetMIME::getWeight(*p
);
2466 eState
= STATE_IP6_COLON
;
2467 else if (rtl::isAsciiHexDigit(*p
))
2469 nNumber
= INetMIME::getHexWeight(*p
);
2471 eState
= STATE_IP6_HEXSEQ1
;
2477 case STATE_IP6_COLON
:
2480 aTheCanonic
.append("::");
2481 eState
= STATE_IP6_2COLON
;
2487 case STATE_IP6_2COLON
:
2489 eState
= STATE_IP6_DONE
;
2492 aTheCanonic
.append(':');
2493 eState
= STATE_IP6_3COLON
;
2495 else if (rtl::isAsciiDigit(*p
))
2497 nNumber
= INetMIME::getWeight(*p
);
2499 eState
= STATE_IP6_HEXSEQ2_MAYBE_IP4
;
2501 else if (rtl::isAsciiHexDigit(*p
))
2503 nNumber
= INetMIME::getHexWeight(*p
);
2505 eState
= STATE_IP6_HEXSEQ2
;
2511 case STATE_IP6_3COLON
:
2512 if (rtl::isAsciiDigit(*p
))
2514 nNumber
= INetMIME::getWeight(*p
);
2517 eState
= STATE_IP6_IP4
;
2523 case STATE_IP6_HEXSEQ1
:
2527 OUString::number(nNumber
, 16));
2528 eState
= STATE_IP6_DONE
;
2533 OUString::number(nNumber
, 16));
2534 aTheCanonic
.append(':');
2535 eState
= STATE_IP6_HEXSEQ1_COLON
;
2537 else if (rtl::isAsciiHexDigit(*p
) && nDigits
< 4)
2539 nNumber
= 16 * nNumber
+ INetMIME::getHexWeight(*p
);
2546 case STATE_IP6_HEXSEQ1_COLON
:
2549 aTheCanonic
.append(':');
2550 eState
= STATE_IP6_2COLON
;
2552 else if (rtl::isAsciiDigit(*p
))
2554 nNumber
= INetMIME::getWeight(*p
);
2556 eState
= STATE_IP6_HEXSEQ1_MAYBE_IP4
;
2558 else if (rtl::isAsciiHexDigit(*p
))
2560 nNumber
= INetMIME::getHexWeight(*p
);
2562 eState
= STATE_IP6_HEXSEQ1
;
2568 case STATE_IP6_HEXSEQ1_MAYBE_IP4
:
2572 OUString::number(nNumber
, 16));
2573 eState
= STATE_IP6_DONE
;
2578 OUString::number(nNumber
, 16));
2579 aTheCanonic
.append(':');
2580 eState
= STATE_IP6_HEXSEQ1_COLON
;
2584 nNumber
= 100 * (nNumber
>> 8) + 10 * (nNumber
>> 4 & 15)
2587 OUString::number(nNumber
));
2588 aTheCanonic
.append('.');
2590 eState
= STATE_IP6_IP4_DOT
;
2592 else if (rtl::isAsciiDigit(*p
) && nDigits
< 3)
2594 nNumber
= 16 * nNumber
+ INetMIME::getWeight(*p
);
2597 else if (rtl::isAsciiHexDigit(*p
) && nDigits
< 4)
2599 nNumber
= 16 * nNumber
+ INetMIME::getHexWeight(*p
);
2601 eState
= STATE_IP6_HEXSEQ1
;
2607 case STATE_IP6_HEXSEQ2
:
2611 OUString::number(nNumber
, 16));
2612 eState
= STATE_IP6_DONE
;
2617 OUString::number(nNumber
, 16));
2618 aTheCanonic
.append(':');
2619 eState
= STATE_IP6_HEXSEQ2_COLON
;
2621 else if (rtl::isAsciiHexDigit(*p
) && nDigits
< 4)
2623 nNumber
= 16 * nNumber
+ INetMIME::getHexWeight(*p
);
2630 case STATE_IP6_HEXSEQ2_COLON
:
2631 if (rtl::isAsciiDigit(*p
))
2633 nNumber
= INetMIME::getWeight(*p
);
2635 eState
= STATE_IP6_HEXSEQ2_MAYBE_IP4
;
2637 else if (rtl::isAsciiHexDigit(*p
))
2639 nNumber
= INetMIME::getHexWeight(*p
);
2641 eState
= STATE_IP6_HEXSEQ2
;
2647 case STATE_IP6_HEXSEQ2_MAYBE_IP4
:
2651 OUString::number(nNumber
, 16));
2652 eState
= STATE_IP6_DONE
;
2657 OUString::number(nNumber
, 16));
2658 aTheCanonic
.append(':');
2659 eState
= STATE_IP6_HEXSEQ2_COLON
;
2663 nNumber
= 100 * (nNumber
>> 8) + 10 * (nNumber
>> 4 & 15)
2666 OUString::number(nNumber
));
2667 aTheCanonic
.append('.');
2669 eState
= STATE_IP6_IP4_DOT
;
2671 else if (rtl::isAsciiDigit(*p
) && nDigits
< 3)
2673 nNumber
= 16 * nNumber
+ INetMIME::getWeight(*p
);
2676 else if (rtl::isAsciiHexDigit(*p
) && nDigits
< 4)
2678 nNumber
= 16 * nNumber
+ INetMIME::getHexWeight(*p
);
2680 eState
= STATE_IP6_HEXSEQ2
;
2691 OUString::number(nNumber
));
2692 eState
= STATE_IP6_DONE
;
2700 OUString::number(nNumber
));
2701 aTheCanonic
.append('.');
2703 eState
= STATE_IP6_IP4_DOT
;
2707 else if (rtl::isAsciiDigit(*p
) && nDigits
< 3)
2709 nNumber
= 10 * nNumber
+ INetMIME::getWeight(*p
);
2716 case STATE_IP6_IP4_DOT
:
2717 if (rtl::isAsciiDigit(*p
))
2719 nNumber
= INetMIME::getWeight(*p
);
2721 eState
= STATE_IP6_IP4
;
2727 case STATE_IP6_DONE
:
2734 case STATE_TOPLABEL
:
2735 case STATE_TOPLABEL_DOT
:
2736 aTheCanonic
.setLength(0);
2737 aTheCanonic
.append(rBegin
, p
- rBegin
);
2739 rCanonic
= aTheCanonic
.makeStringAndClear();
2746 OUString::number(nNumber
));
2748 rCanonic
= aTheCanonic
.makeStringAndClear();
2753 case STATE_IP6_DONE
:
2754 aTheCanonic
.append(']');
2756 rCanonic
= aTheCanonic
.makeStringAndClear();
2765 bool INetURLObject::parseHostOrNetBiosName(
2766 sal_Unicode
const * pBegin
, sal_Unicode
const * pEnd
,
2767 EncodeMechanism eMechanism
, rtl_TextEncoding eCharset
, bool bNetBiosName
,
2768 OUStringBuffer
* pCanonic
)
2770 OUString aTheCanonic
;
2773 sal_Unicode
const * p
= pBegin
;
2774 if (!parseHost(p
, pEnd
, aTheCanonic
) || p
!= pEnd
)
2779 while (pBegin
< pEnd
)
2781 EscapeType eEscapeType
;
2782 sal_uInt32 nUTF32
= getUTF32(pBegin
, pEnd
,
2783 eMechanism
, eCharset
,
2785 if (!INetMIME::isVisible(nUTF32
))
2787 if (!rtl::isAsciiAlphanumeric(nUTF32
))
2808 if (pCanonic
!= nullptr) {
2810 buf
, nUTF32
, eEscapeType
, PART_URIC
,
2814 aTheCanonic
= buf
.makeStringAndClear();
2820 if (pCanonic
!= nullptr) {
2821 *pCanonic
= aTheCanonic
;
2826 bool INetURLObject::setHost(OUString
const & rTheHost
,
2827 rtl_TextEncoding eCharset
)
2829 if (!getSchemeInfo().m_bHost
)
2831 OUStringBuffer
aSynHost(rTheHost
);
2832 bool bNetBiosName
= false;
2835 case INetProtocol::File
:
2837 OUString
sTemp(aSynHost
.toString());
2838 if (sTemp
.equalsIgnoreAsciiCase("localhost"))
2840 aSynHost
.setLength(0);
2842 bNetBiosName
= true;
2845 case INetProtocol::Ldap
:
2846 if (aSynHost
.isEmpty() && m_aPort
.isPresent())
2851 if (aSynHost
.isEmpty())
2855 if (!parseHostOrNetBiosName(
2856 aSynHost
.getStr(), aSynHost
.getStr() + aSynHost
.getLength(),
2857 EncodeMechanism::WasEncoded
, eCharset
, bNetBiosName
, &aSynHost
))
2859 sal_Int32 nDelta
= m_aHost
.set(m_aAbsURIRef
, aSynHost
.makeStringAndClear());
2863 m_aFragment
+= nDelta
;
2868 bool INetURLObject::parsePath(INetProtocol eScheme
,
2869 sal_Unicode
const ** pBegin
,
2870 sal_Unicode
const * pEnd
,
2871 EncodeMechanism eMechanism
,
2872 rtl_TextEncoding eCharset
,
2873 bool bSkippedInitialSlash
,
2874 sal_uInt32 nSegmentDelimiter
,
2875 sal_uInt32 nAltSegmentDelimiter
,
2876 sal_uInt32 nQueryDelimiter
,
2877 sal_uInt32 nFragmentDelimiter
,
2878 OUStringBuffer
&rSynPath
)
2880 DBG_ASSERT(pBegin
, "INetURLObject::parsePath(): Null output param");
2882 sal_Unicode
const * pPos
= *pBegin
;
2883 OUStringBuffer
aTheSynPath(256);
2887 case INetProtocol::NotValid
:
2890 case INetProtocol::Ftp
:
2891 if (pPos
< pEnd
&& *pPos
!= '/' && *pPos
!= nFragmentDelimiter
)
2893 while (pPos
< pEnd
&& *pPos
!= nFragmentDelimiter
)
2895 EscapeType eEscapeType
;
2896 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
2897 eCharset
, eEscapeType
);
2898 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
2899 PART_HTTP_PATH
, eCharset
, true);
2901 if (aTheSynPath
.isEmpty())
2902 aTheSynPath
.append('/');
2905 case INetProtocol::Http
:
2906 case INetProtocol::VndSunStarWebdav
:
2907 case INetProtocol::Https
:
2908 case INetProtocol::Smb
:
2909 case INetProtocol::Cmis
:
2910 if (pPos
< pEnd
&& *pPos
!= '/' && *pPos
!= nFragmentDelimiter
)
2912 while (pPos
< pEnd
&& *pPos
!= nQueryDelimiter
2913 && *pPos
!= nFragmentDelimiter
)
2915 EscapeType eEscapeType
;
2916 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
2917 eCharset
, eEscapeType
);
2918 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
2919 PART_HTTP_PATH
, eCharset
, true);
2921 if (aTheSynPath
.isEmpty())
2922 aTheSynPath
.append('/');
2925 case INetProtocol::File
:
2927 if (bSkippedInitialSlash
)
2928 aTheSynPath
.append('/');
2929 else if (pPos
< pEnd
2930 && *pPos
!= nSegmentDelimiter
2931 && *pPos
!= nAltSegmentDelimiter
)
2933 while (pPos
< pEnd
&& *pPos
!= nFragmentDelimiter
)
2935 EscapeType eEscapeType
;
2936 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
2937 eCharset
, eEscapeType
);
2938 if (eEscapeType
== EscapeType::NONE
)
2940 if (nUTF32
== nSegmentDelimiter
2941 || nUTF32
== nAltSegmentDelimiter
)
2943 aTheSynPath
.append('/');
2946 else if (nUTF32
== '|'
2948 || *pPos
== nFragmentDelimiter
2949 || *pPos
== nSegmentDelimiter
2950 || *pPos
== nAltSegmentDelimiter
)
2951 && aTheSynPath
.getLength() == 2
2952 && rtl::isAsciiAlpha(aTheSynPath
[1]))
2954 // A first segment of <ALPHA "|"> is translated to
2956 aTheSynPath
.append(':');
2960 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
2961 PART_PCHAR
, eCharset
, true);
2963 if (aTheSynPath
.isEmpty())
2964 aTheSynPath
.append('/');
2968 case INetProtocol::Mailto
:
2969 while (pPos
< pEnd
&& *pPos
!= nQueryDelimiter
2970 && *pPos
!= nFragmentDelimiter
)
2972 EscapeType eEscapeType
;
2973 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
2974 eCharset
, eEscapeType
);
2975 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
2976 PART_MAILTO
, eCharset
, true);
2981 case INetProtocol::PrivSoffice
:
2982 case INetProtocol::Slot
:
2983 case INetProtocol::Hid
:
2984 case INetProtocol::Macro
:
2985 case INetProtocol::Uno
:
2986 case INetProtocol::Component
:
2987 case INetProtocol::Ldap
:
2988 while (pPos
< pEnd
&& *pPos
!= nQueryDelimiter
2989 && *pPos
!= nFragmentDelimiter
)
2991 EscapeType eEscapeType
;
2992 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
2993 eCharset
, eEscapeType
);
2994 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
2995 PART_PATH_BEFORE_QUERY
, eCharset
, true);
2999 case INetProtocol::VndSunStarHelp
:
3001 || *pPos
== nQueryDelimiter
3002 || *pPos
== nFragmentDelimiter
)
3003 aTheSynPath
.append('/');
3008 while (pPos
< pEnd
&& *pPos
!= nQueryDelimiter
3009 && *pPos
!= nFragmentDelimiter
)
3011 EscapeType eEscapeType
;
3012 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
,
3014 eCharset
, eEscapeType
);
3015 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
3016 PART_HTTP_PATH
, eCharset
, true);
3021 case INetProtocol::Javascript
:
3022 case INetProtocol::Data
:
3023 case INetProtocol::Cid
:
3024 case INetProtocol::Db
:
3025 while (pPos
< pEnd
&& *pPos
!= nFragmentDelimiter
)
3027 EscapeType eEscapeType
;
3028 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
3029 eCharset
, eEscapeType
);
3030 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
3031 PART_URIC
, eCharset
, true);
3035 case INetProtocol::VndSunStarHier
:
3036 case INetProtocol::VndSunStarPkg
:
3037 if (pPos
< pEnd
&& *pPos
!= '/'
3038 && *pPos
!= nQueryDelimiter
&& *pPos
!= nFragmentDelimiter
)
3040 while (pPos
< pEnd
&& *pPos
!= nQueryDelimiter
3041 && *pPos
!= nFragmentDelimiter
)
3043 EscapeType eEscapeType
;
3044 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
3045 eCharset
, eEscapeType
);
3046 if (eEscapeType
== EscapeType::NONE
&& nUTF32
== '/')
3047 aTheSynPath
.append('/');
3049 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
3050 PART_PCHAR
, eCharset
, false);
3052 if (aTheSynPath
.isEmpty())
3053 aTheSynPath
.append('/');
3056 case INetProtocol::VndSunStarCmd
:
3057 case INetProtocol::VndSunStarExpand
:
3059 if (pPos
== pEnd
|| *pPos
== nFragmentDelimiter
)
3061 Part ePart
= PART_URIC_NO_SLASH
;
3062 while (pPos
!= pEnd
&& *pPos
!= nFragmentDelimiter
)
3064 EscapeType eEscapeType
;
3065 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
3066 eCharset
, eEscapeType
);
3067 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
, ePart
,
3074 case INetProtocol::Telnet
:
3077 if (*pPos
!= '/' || pEnd
- pPos
> 1)
3081 aTheSynPath
.append('/');
3084 case INetProtocol::VndSunStarTdoc
:
3085 if (pPos
== pEnd
|| *pPos
!= '/')
3087 while (pPos
< pEnd
&& *pPos
!= nFragmentDelimiter
)
3089 EscapeType eEscapeType
;
3090 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
3091 eCharset
, eEscapeType
);
3092 if (eEscapeType
== EscapeType::NONE
&& nUTF32
== '/')
3093 aTheSynPath
.append('/');
3095 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
3096 PART_PCHAR
, eCharset
, false);
3100 case INetProtocol::Generic
:
3101 case INetProtocol::Sftp
:
3102 while (pPos
< pEnd
&& *pPos
!= nFragmentDelimiter
)
3104 EscapeType eEscapeType
;
3105 sal_uInt32 nUTF32
= getUTF32(pPos
, pEnd
, eMechanism
,
3106 eCharset
, eEscapeType
);
3107 appendUCS4(aTheSynPath
, nUTF32
, eEscapeType
,
3108 PART_URIC
, eCharset
, true);
3110 if (aTheSynPath
.isEmpty())
3119 rSynPath
= aTheSynPath
;
3123 bool INetURLObject::setPath(OUString
const & rThePath
,
3124 EncodeMechanism eMechanism
,
3125 rtl_TextEncoding eCharset
)
3127 OUStringBuffer aSynPath
;
3128 sal_Unicode
const * p
= rThePath
.getStr();
3129 sal_Unicode
const * pEnd
= p
+ rThePath
.getLength();
3130 if (!parsePath(m_eScheme
, &p
, pEnd
, eMechanism
, eCharset
, false,
3131 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath
)
3134 sal_Int32 nDelta
= m_aPath
.set(m_aAbsURIRef
, aSynPath
.makeStringAndClear());
3136 m_aFragment
+= nDelta
;
3140 bool INetURLObject::checkHierarchical() const {
3141 if (m_eScheme
== INetProtocol::VndSunStarExpand
) {
3143 "INetURLObject::checkHierarchical vnd.sun.star.expand");
3146 return getSchemeInfo().m_bHierarchical
;
3150 bool INetURLObject::Append(OUString
const & rTheSegment
,
3151 EncodeMechanism eMechanism
,
3152 rtl_TextEncoding eCharset
)
3154 return insertName(rTheSegment
, false, LAST_SEGMENT
, eMechanism
, eCharset
);
3157 INetURLObject::SubString
INetURLObject::getSegment(sal_Int32 nIndex
,
3158 bool bIgnoreFinalSlash
)
3161 DBG_ASSERT(nIndex
>= 0 || nIndex
== LAST_SEGMENT
,
3162 "INetURLObject::getSegment(): Bad index");
3164 if (!checkHierarchical())
3167 sal_Unicode
const * pPathBegin
3168 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
3169 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
3170 sal_Unicode
const * pSegBegin
;
3171 sal_Unicode
const * pSegEnd
;
3172 if (nIndex
== LAST_SEGMENT
)
3175 if (bIgnoreFinalSlash
&& pSegEnd
> pPathBegin
&& pSegEnd
[-1] == '/')
3177 if (pSegEnd
<= pPathBegin
)
3179 pSegBegin
= pSegEnd
- 1;
3180 while (pSegBegin
> pPathBegin
&& *pSegBegin
!= '/')
3185 pSegBegin
= pPathBegin
;
3186 while (nIndex
-- > 0)
3190 if (pSegBegin
>= pPathEnd
)
3193 while (*pSegBegin
!= '/');
3194 pSegEnd
= pSegBegin
+ 1;
3195 while (pSegEnd
< pPathEnd
&& *pSegEnd
!= '/')
3199 return SubString(pSegBegin
- m_aAbsURIRef
.getStr(),
3200 pSegEnd
- pSegBegin
);
3203 bool INetURLObject::insertName(OUString
const & rTheName
,
3204 bool bAppendFinalSlash
, sal_Int32 nIndex
,
3205 EncodeMechanism eMechanism
,
3206 rtl_TextEncoding eCharset
)
3208 DBG_ASSERT(nIndex
>= 0 || nIndex
== LAST_SEGMENT
,
3209 "INetURLObject::insertName(): Bad index");
3211 if (!checkHierarchical())
3214 sal_Unicode
const * pPathBegin
3215 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
3216 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
3217 sal_Unicode
const * pPrefixEnd
;
3219 sal_Unicode
const * pSuffixBegin
;
3220 if (nIndex
== LAST_SEGMENT
)
3222 pPrefixEnd
= pPathEnd
;
3223 if (pPrefixEnd
> pPathBegin
&&
3224 pPrefixEnd
[-1] == '/')
3228 bInsertSlash
= bAppendFinalSlash
;
3229 pSuffixBegin
= pPathEnd
;
3231 else if (nIndex
== 0)
3233 pPrefixEnd
= pPathBegin
;
3235 (pPathBegin
< pPathEnd
&& *pPathBegin
!= '/') ||
3236 (pPathBegin
== pPathEnd
&& bAppendFinalSlash
);
3238 (pPathEnd
- pPathBegin
== 1 && *pPathBegin
== '/' &&
3240 ? pPathEnd
: pPathBegin
;
3244 pPrefixEnd
= pPathBegin
;
3245 sal_Unicode
const * pEnd
= pPathEnd
;
3246 if (pEnd
> pPathBegin
&& pEnd
[-1] == '/')
3248 bool bSkip
= pPrefixEnd
< pEnd
&& *pPrefixEnd
== '/';
3249 bInsertSlash
= false;
3250 pSuffixBegin
= pPathEnd
;
3251 while (nIndex
-- > 0)
3257 if (pPrefixEnd
>= pEnd
)
3261 bInsertSlash
= bAppendFinalSlash
;
3267 if (*pPrefixEnd
== '/')
3269 pSuffixBegin
= pPrefixEnd
;
3275 OUStringBuffer
aNewPath(256);
3276 aNewPath
.append(pPathBegin
, pPrefixEnd
- pPathBegin
);
3277 aNewPath
.append('/');
3278 aNewPath
.append(encodeText(rTheName
, PART_PCHAR
,
3279 eMechanism
, eCharset
, true));
3281 aNewPath
.append('/');
3283 aNewPath
.append(pSuffixBegin
, pPathEnd
- pSuffixBegin
);
3285 return setPath(aNewPath
.makeStringAndClear(), EncodeMechanism::NotCanonical
,
3286 RTL_TEXTENCODING_UTF8
);
3289 void INetURLObject::clearQuery()
3293 if (m_aQuery
.isPresent())
3295 lcl_Erase(m_aAbsURIRef
, m_aQuery
.getBegin() - 1,
3296 m_aQuery
.getLength() + 1);
3297 m_aFragment
+= m_aQuery
.clear() - 1;
3301 bool INetURLObject::setQuery(OUString
const & rTheQuery
,
3302 EncodeMechanism eMechanism
,
3303 rtl_TextEncoding eCharset
)
3305 if (!getSchemeInfo().m_bQuery
)
3307 OUString
aNewQuery(encodeText(rTheQuery
, PART_URIC
,
3308 eMechanism
, eCharset
, true));
3310 if (m_aQuery
.isPresent())
3311 nDelta
= m_aQuery
.set(m_aAbsURIRef
, aNewQuery
);
3314 m_aAbsURIRef
.insert(m_aPath
.getEnd(), u
'?');
3315 nDelta
= m_aQuery
.set(m_aAbsURIRef
, aNewQuery
, m_aPath
.getEnd() + 1)
3318 m_aFragment
+= nDelta
;
3322 bool INetURLObject::clearFragment()
3326 if (m_aFragment
.isPresent())
3328 m_aAbsURIRef
.setLength(m_aFragment
.getBegin() - 1);
3329 m_aFragment
.clear();
3334 bool INetURLObject::setFragment(OUString
const & rTheFragment
,
3335 EncodeMechanism eMechanism
,
3336 rtl_TextEncoding eCharset
)
3340 OUString
aNewFragment(encodeText(rTheFragment
, PART_URIC
,
3341 eMechanism
, eCharset
, true));
3342 if (m_aFragment
.isPresent())
3343 m_aFragment
.set(m_aAbsURIRef
, aNewFragment
);
3346 m_aAbsURIRef
.append('#');
3347 m_aFragment
.set(m_aAbsURIRef
, aNewFragment
, m_aAbsURIRef
.getLength());
3352 bool INetURLObject::hasDosVolume(FSysStyle eStyle
) const
3354 sal_Unicode
const * p
= m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
3355 return (eStyle
& FSysStyle::Dos
)
3356 && m_aPath
.getLength() >= 3
3358 && rtl::isAsciiAlpha(p
[1])
3360 && (m_aPath
.getLength() == 3 || p
[3] == '/');
3364 OUString
INetURLObject::encodeText(sal_Unicode
const * pBegin
,
3365 sal_Unicode
const * pEnd
,
3366 Part ePart
, EncodeMechanism eMechanism
,
3367 rtl_TextEncoding eCharset
,
3368 bool bKeepVisibleEscapes
)
3370 OUStringBuffer
aResult(256);
3371 while (pBegin
< pEnd
)
3373 EscapeType eEscapeType
;
3374 sal_uInt32 nUTF32
= getUTF32(pBegin
, pEnd
,
3375 eMechanism
, eCharset
, eEscapeType
);
3376 appendUCS4(aResult
, nUTF32
, eEscapeType
, ePart
,
3377 eCharset
, bKeepVisibleEscapes
);
3379 return aResult
.makeStringAndClear();
3383 OUString
INetURLObject::decode(sal_Unicode
const * pBegin
,
3384 sal_Unicode
const * pEnd
,
3385 DecodeMechanism eMechanism
,
3386 rtl_TextEncoding eCharset
)
3390 case DecodeMechanism::NONE
:
3391 return OUString(pBegin
, pEnd
- pBegin
);
3393 case DecodeMechanism::ToIUri
:
3394 eCharset
= RTL_TEXTENCODING_UTF8
;
3400 OUStringBuffer
aResult(static_cast<int>(pEnd
-pBegin
));
3401 while (pBegin
< pEnd
)
3403 EscapeType eEscapeType
;
3404 sal_uInt32 nUTF32
= getUTF32(pBegin
, pEnd
,
3405 EncodeMechanism::WasEncoded
, eCharset
, eEscapeType
);
3406 switch (eEscapeType
)
3408 case EscapeType::NONE
:
3409 aResult
.appendUtf32(nUTF32
);
3412 case EscapeType::Octet
:
3413 appendEscape(aResult
, nUTF32
);
3416 case EscapeType::Utf32
:
3418 rtl::isAscii(nUTF32
) &&
3420 eMechanism
== DecodeMechanism::ToIUri
||
3422 eMechanism
== DecodeMechanism::Unambiguous
&&
3423 mustEncode(nUTF32
, PART_UNAMBIGUOUS
)
3428 appendEscape(aResult
, nUTF32
);
3431 aResult
.appendUtf32(nUTF32
);
3435 return aResult
.makeStringAndClear();
3438 OUString
INetURLObject::GetURLNoPass(DecodeMechanism eMechanism
,
3439 rtl_TextEncoding eCharset
) const
3441 INetURLObject
aTemp(*this);
3442 aTemp
.clearPassword();
3443 return aTemp
.GetMainURL(eMechanism
, eCharset
);
3446 OUString
INetURLObject::GetURLNoMark(DecodeMechanism eMechanism
,
3447 rtl_TextEncoding eCharset
) const
3449 INetURLObject
aTemp(*this);
3450 aTemp
.clearFragment();
3451 return aTemp
.GetMainURL(eMechanism
, eCharset
);
3455 INetURLObject::getAbbreviated(
3456 uno::Reference
< util::XStringWidth
> const & rStringWidth
,
3458 DecodeMechanism eMechanism
,
3459 rtl_TextEncoding eCharset
)
3462 OSL_ENSURE(rStringWidth
.is(), "specification violation");
3463 OUStringBuffer aBuffer
;
3464 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3465 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3466 if (m_eScheme
!= INetProtocol::Generic
)
3468 aBuffer
.appendAscii(getSchemeInfo().m_pScheme
);
3472 if (!m_aAbsURIRef
.isEmpty())
3474 sal_Unicode
const * pSchemeBegin
3475 = m_aAbsURIRef
.getStr();
3476 sal_Unicode
const * pSchemeEnd
= pSchemeBegin
;
3478 while (pSchemeEnd
[0] != ':')
3482 aBuffer
.append(pSchemeBegin
, pSchemeEnd
- pSchemeBegin
);
3485 aBuffer
.append(':');
3486 bool bAuthority
= getSchemeInfo().m_bAuthority
;
3487 sal_Unicode
const * pCoreBegin
3488 = m_aAbsURIRef
.getStr() + (bAuthority
? getAuthorityBegin() :
3489 m_aPath
.getBegin());
3490 sal_Unicode
const * pCoreEnd
3491 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin() + m_aPath
.getLength();
3492 bool bSegment
= false;
3493 if (getSchemeInfo().m_bHierarchical
)
3496 if (m_aQuery
.isPresent())
3498 else if (m_aFragment
.isPresent())
3500 OUStringBuffer aTrailer
;
3501 sal_Unicode
const * pBegin
= pCoreBegin
;
3502 sal_Unicode
const * pEnd
= pCoreEnd
;
3503 sal_Unicode
const * pPrefixBegin
= pBegin
;
3504 sal_Unicode
const * pSuffixEnd
= pEnd
;
3505 bool bPrefix
= true;
3506 bool bSuffix
= true;
3511 sal_Unicode
const * p
= pSuffixEnd
- 1;
3512 if (pSuffixEnd
== pCoreEnd
&& *p
== '/')
3516 if (bAuthority
&& p
== pCoreBegin
+ 1)
3519 aSegment(decode(p
+ (p
== pBegin
&& pBegin
!= pCoreBegin
?
3525 OUStringBuffer
aResult(aBuffer
);
3526 if (pSuffixEnd
!= pBegin
)
3527 aResult
.append("...");
3528 aResult
.append(aSegment
);
3529 aResult
.append(aTrailer
.toString());
3530 aResult
.append(aRest
);
3532 queryStringWidth(aResult
.makeStringAndClear())
3535 aTrailer
.insert(0, aSegment
);
3541 if (pPrefixBegin
> pSuffixEnd
)
3542 pPrefixBegin
= pSuffixEnd
;
3548 sal_Unicode
const * p
3550 + (bAuthority
&& pPrefixBegin
== pCoreBegin
? 2 :
3552 OSL_ASSERT(p
<= pEnd
);
3553 while (p
< pEnd
&& *p
!= '/')
3555 if (p
== pCoreEnd
- 1 && *p
== '/')
3558 aSegment(decode(pPrefixBegin
3559 + (pPrefixBegin
== pCoreBegin
? 0 :
3561 p
== pEnd
? p
: p
+ 1,
3565 OUStringBuffer
aResult(aBuffer
);
3566 aResult
.append(aSegment
);
3567 if (pPrefixBegin
!= pEnd
)
3568 aResult
.append("...");
3569 aResult
.append(aTrailer
.toString());
3570 aResult
.append(aRest
);
3572 queryStringWidth(aResult
.makeStringAndClear())
3575 aBuffer
.append(aSegment
);
3577 pBegin
= pPrefixBegin
;
3581 if (pPrefixBegin
> pSuffixEnd
)
3582 pSuffixEnd
= pPrefixBegin
;
3587 while (bPrefix
|| bSuffix
);
3590 if (pPrefixBegin
!= pBegin
|| pSuffixEnd
!= pEnd
)
3591 aBuffer
.append("...");
3592 aBuffer
.append(aTrailer
.toString());
3596 aBuffer
.append(decode(pCoreBegin
,
3600 if (m_aQuery
.isPresent())
3602 aBuffer
.append('?');
3603 aBuffer
.append(decode(m_aQuery
, eMechanism
, eCharset
));
3605 if (m_aFragment
.isPresent())
3607 aBuffer
.append('#');
3608 aBuffer
.append(decode(m_aFragment
, eMechanism
, eCharset
));
3610 if (!aBuffer
.isEmpty())
3612 OUStringBuffer
aResult(aBuffer
);
3613 if (rStringWidth
->queryStringWidth(aResult
.makeStringAndClear())
3615 for (sal_Int32 i
= aBuffer
.getLength();;)
3619 aBuffer
.setLength(aBuffer
.getLength() - 1);
3620 if (aBuffer
.isEmpty())
3625 aBuffer
.setLength(--i
);
3626 aBuffer
.append("...");
3630 queryStringWidth(aResult
.makeStringAndClear())
3635 return aBuffer
.makeStringAndClear();
3638 bool INetURLObject::operator ==(INetURLObject
const & rObject
) const
3640 if (m_eScheme
!= rObject
.m_eScheme
)
3642 if (m_eScheme
== INetProtocol::NotValid
)
3643 return m_aAbsURIRef
.toString() == rObject
.m_aAbsURIRef
.toString();
3644 if ((m_aScheme
.compare(
3645 rObject
.m_aScheme
, m_aAbsURIRef
, rObject
.m_aAbsURIRef
)
3647 || GetUser(DecodeMechanism::NONE
) != rObject
.GetUser(DecodeMechanism::NONE
)
3648 || GetPass(DecodeMechanism::NONE
) != rObject
.GetPass(DecodeMechanism::NONE
)
3649 || !GetHost(DecodeMechanism::NONE
).equalsIgnoreAsciiCase(
3650 rObject
.GetHost(DecodeMechanism::NONE
))
3651 || GetPort() != rObject
.GetPort()
3652 || HasParam() != rObject
.HasParam()
3653 || GetParam() != rObject
.GetParam())
3655 OUString
aPath1(GetURLPath(DecodeMechanism::NONE
));
3656 OUString
aPath2(rObject
.GetURLPath(DecodeMechanism::NONE
));
3659 case INetProtocol::File
:
3661 // If the URL paths of two file URLs only differ in that one has a
3662 // final '/' and the other has not, take the two paths as
3663 // equivalent (this could be useful for other schemes, too):
3664 sal_Int32 nLength
= aPath1
.getLength();
3665 switch (nLength
- aPath2
.getLength())
3668 if (aPath2
[nLength
] != '/')
3676 if (aPath1
[--nLength
] != '/')
3683 return aPath1
.compareTo(aPath2
, nLength
) == 0;
3687 return aPath1
== aPath2
;
3691 bool INetURLObject::ConcatData(INetProtocol eTheScheme
,
3692 OUString
const & rTheUser
,
3693 OUString
const & rThePassword
,
3694 OUString
const & rTheHost
,
3695 sal_uInt32 nThePort
,
3696 OUString
const & rThePath
)
3699 m_eScheme
= eTheScheme
;
3700 if (HasError() || m_eScheme
== INetProtocol::Generic
)
3702 m_aAbsURIRef
.setLength(0);
3703 m_aAbsURIRef
.appendAscii(getSchemeInfo().m_pScheme
);
3704 m_aAbsURIRef
.append(':');
3705 if (getSchemeInfo().m_bAuthority
)
3707 m_aAbsURIRef
.append("//");
3708 bool bUserInfo
= false;
3709 if (getSchemeInfo().m_bUser
)
3711 if (!rTheUser
.isEmpty())
3713 m_aUser
.set(m_aAbsURIRef
,
3714 encodeText(rTheUser
, PART_USER_PASSWORD
,
3715 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false),
3716 m_aAbsURIRef
.getLength());
3720 else if (!rTheUser
.isEmpty())
3725 if (!rThePassword
.isEmpty())
3727 if (getSchemeInfo().m_bPassword
)
3729 m_aAbsURIRef
.append(':');
3730 m_aAuth
.set(m_aAbsURIRef
,
3731 encodeText(rThePassword
, PART_USER_PASSWORD
,
3732 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false),
3733 m_aAbsURIRef
.getLength());
3742 if (bUserInfo
&& getSchemeInfo().m_bHost
)
3743 m_aAbsURIRef
.append('@');
3744 if (getSchemeInfo().m_bHost
)
3746 OUStringBuffer
aSynHost(rTheHost
);
3747 bool bNetBiosName
= false;
3750 case INetProtocol::File
:
3752 OUString
sTemp(aSynHost
.toString());
3753 if (sTemp
.equalsIgnoreAsciiCase( "localhost" ))
3755 aSynHost
.setLength(0);
3757 bNetBiosName
= true;
3761 case INetProtocol::Ldap
:
3762 if (aSynHost
.isEmpty() && nThePort
!= 0)
3770 if (aSynHost
.isEmpty())
3777 if (!parseHostOrNetBiosName(
3778 aSynHost
.getStr(), aSynHost
.getStr() + aSynHost
.getLength(),
3779 EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, bNetBiosName
, &aSynHost
))
3784 m_aHost
.set(m_aAbsURIRef
, aSynHost
.makeStringAndClear(),
3785 m_aAbsURIRef
.getLength());
3788 if (getSchemeInfo().m_bPort
)
3790 m_aAbsURIRef
.append(':');
3791 m_aPort
.set(m_aAbsURIRef
,
3792 OUString::number(nThePort
),
3793 m_aAbsURIRef
.getLength());
3802 else if (!rTheHost
.isEmpty() || nThePort
!= 0)
3808 OUStringBuffer aSynPath
;
3809 sal_Unicode
const * p
= rThePath
.getStr();
3810 sal_Unicode
const * pEnd
= p
+ rThePath
.getLength();
3811 if (!parsePath(m_eScheme
, &p
, pEnd
, EncodeMechanism::WasEncoded
, RTL_TEXTENCODING_UTF8
, false, '/',
3812 0x80000000, 0x80000000, 0x80000000, aSynPath
)
3818 m_aPath
.set(m_aAbsURIRef
, aSynPath
.makeStringAndClear(),
3819 m_aAbsURIRef
.getLength());
3824 OUString
INetURLObject::GetAbsURL(OUString
const & rTheBaseURIRef
,
3825 OUString
const & rTheRelURIRef
,
3826 EncodeMechanism eEncodeMechanism
,
3827 DecodeMechanism eDecodeMechanism
,
3828 rtl_TextEncoding eCharset
)
3830 // Backwards compatibility:
3831 if (rTheRelURIRef
.isEmpty() || rTheRelURIRef
[0] == '#')
3832 return rTheRelURIRef
;
3834 INetURLObject aTheAbsURIRef
;
3836 return INetURLObject(rTheBaseURIRef
, eEncodeMechanism
, eCharset
).
3837 convertRelToAbs(rTheRelURIRef
, aTheAbsURIRef
,
3838 bWasAbsolute
, eEncodeMechanism
,
3839 eCharset
, false, false,
3840 false, FSysStyle::Detect
)
3841 || eEncodeMechanism
!= EncodeMechanism::WasEncoded
3842 || eDecodeMechanism
!= DecodeMechanism::ToIUri
3843 || eCharset
!= RTL_TEXTENCODING_UTF8
?
3844 aTheAbsURIRef
.GetMainURL(eDecodeMechanism
, eCharset
) :
3848 OUString
INetURLObject::getExternalURL() const
3850 OUString aTheExtURIRef
;
3851 translateToExternal(
3852 m_aAbsURIRef
.toString(), aTheExtURIRef
);
3853 return aTheExtURIRef
;
3856 bool INetURLObject::isSchemeEqualTo(std::u16string_view scheme
) const {
3857 return m_aScheme
.isPresent()
3858 && (rtl_ustr_compareIgnoreAsciiCase_WithLength(
3859 scheme
.data(), scheme
.size(),
3860 m_aAbsURIRef
.getStr() + m_aScheme
.getBegin(),
3861 m_aScheme
.getLength())
3865 bool INetURLObject::isAnyKnownWebDAVScheme() const {
3866 return ( isSchemeEqualTo( INetProtocol::Http
) ||
3867 isSchemeEqualTo( INetProtocol::Https
) ||
3868 isSchemeEqualTo( INetProtocol::VndSunStarWebdav
) ||
3869 isSchemeEqualTo( u
"vnd.sun.star.webdavs" ) ||
3870 isSchemeEqualTo( u
"webdav" ) ||
3871 isSchemeEqualTo( u
"webdavs" ));
3875 OUString
INetURLObject::GetScheme(INetProtocol eTheScheme
)
3877 return OUString::createFromAscii(getSchemeInfo(eTheScheme
).m_pPrefix
);
3881 OUString
INetURLObject::GetSchemeName(INetProtocol eTheScheme
)
3883 return OUString::createFromAscii(getSchemeInfo(eTheScheme
).m_pScheme
);
3887 INetProtocol
INetURLObject::CompareProtocolScheme(OUString
const &
3890 sal_Unicode
const * p
= rTheAbsURIRef
.getStr();
3891 PrefixInfo
const * pPrefix
= getPrefix(p
, p
+ rTheAbsURIRef
.getLength());
3892 return pPrefix
? pPrefix
->m_eScheme
: INetProtocol::NotValid
;
3895 OUString
INetURLObject::GetHostPort(DecodeMechanism eMechanism
,
3896 rtl_TextEncoding eCharset
) const
3898 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
3899 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
3900 if (!getSchemeInfo().m_bHost
)
3902 OUStringBuffer
aHostPort(decode(m_aHost
, eMechanism
, eCharset
));
3903 if (m_aPort
.isPresent())
3905 aHostPort
.append(':');
3906 aHostPort
.append(decode(m_aPort
, eMechanism
, eCharset
));
3908 return aHostPort
.makeStringAndClear();
3911 sal_uInt32
INetURLObject::GetPort() const
3913 if (m_aPort
.isPresent())
3915 sal_Unicode
const * p
= m_aAbsURIRef
.getStr() + m_aPort
.getBegin();
3916 sal_Unicode
const * pEnd
= p
+ m_aPort
.getLength();
3917 sal_uInt32 nThePort
;
3918 if (INetMIME::scanUnsigned(p
, pEnd
, true, nThePort
) && p
== pEnd
)
3924 bool INetURLObject::SetPort(sal_uInt32 nThePort
)
3926 if (getSchemeInfo().m_bPort
&& m_aHost
.isPresent())
3928 OUString
aNewPort(OUString::number(nThePort
));
3930 if (m_aPort
.isPresent())
3931 nDelta
= m_aPort
.set(m_aAbsURIRef
, aNewPort
);
3934 m_aAbsURIRef
.insert(m_aHost
.getEnd(), u
':');
3935 nDelta
= m_aPort
.set(m_aAbsURIRef
, aNewPort
, m_aHost
.getEnd() + 1)
3940 m_aFragment
+= nDelta
;
3946 sal_Int32
INetURLObject::getSegmentCount(bool bIgnoreFinalSlash
) const
3948 if (!checkHierarchical())
3951 sal_Unicode
const * p
= m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
3952 sal_Unicode
const * pEnd
= p
+ m_aPath
.getLength();
3953 if (bIgnoreFinalSlash
&& pEnd
> p
&& pEnd
[-1] == '/')
3955 sal_Int32 n
= p
== pEnd
|| *p
== '/' ? 0 : 1;
3962 bool INetURLObject::removeSegment(sal_Int32 nIndex
, bool bIgnoreFinalSlash
)
3964 SubString
aSegment(getSegment(nIndex
, bIgnoreFinalSlash
));
3965 if (!aSegment
.isPresent())
3968 OUStringBuffer
aNewPath(m_aPath
.getLength());
3969 aNewPath
.append(m_aAbsURIRef
.getStr() + m_aPath
.getBegin(),
3970 aSegment
.getBegin() - m_aPath
.getBegin());
3971 if (bIgnoreFinalSlash
&& aSegment
.getEnd() == m_aPath
.getEnd())
3972 aNewPath
.append('/');
3974 aNewPath
.append(m_aAbsURIRef
.getStr() + aSegment
.getEnd(),
3975 m_aPath
.getEnd() - aSegment
.getEnd());
3976 if (aNewPath
.isEmpty() && !aSegment
.isEmpty() &&
3977 m_aAbsURIRef
[aSegment
.getBegin()] == '/')
3979 aNewPath
.append('/');
3982 return setPath(aNewPath
.makeStringAndClear(), EncodeMechanism::NotCanonical
,
3983 RTL_TEXTENCODING_UTF8
);
3986 OUString
INetURLObject::getName(sal_Int32 nIndex
, bool bIgnoreFinalSlash
,
3987 DecodeMechanism eMechanism
,
3988 rtl_TextEncoding eCharset
) const
3990 SubString
aSegment(getSegment(nIndex
, bIgnoreFinalSlash
));
3991 if (!aSegment
.isPresent())
3994 sal_Unicode
const * pSegBegin
3995 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
3996 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
3998 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4000 sal_Unicode
const * p
= pSegBegin
;
4001 while (p
!= pSegEnd
&& *p
!= ';')
4004 return decode(pSegBegin
, p
, eMechanism
, eCharset
);
4007 bool INetURLObject::setName(OUString
const& rTheName
, EncodeMechanism eMechanism
,
4008 rtl_TextEncoding eCharset
)
4010 SubString
aSegment(getSegment(LAST_SEGMENT
, true));
4011 if (!aSegment
.isPresent())
4014 sal_Unicode
const * pPathBegin
4015 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4016 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4017 sal_Unicode
const * pSegBegin
4018 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4019 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4021 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4023 sal_Unicode
const * p
= pSegBegin
;
4024 while (p
!= pSegEnd
&& *p
!= ';')
4027 OUStringBuffer
aNewPath(256);
4028 aNewPath
.append(pPathBegin
, pSegBegin
- pPathBegin
);
4029 aNewPath
.append(encodeText(rTheName
, PART_PCHAR
, eMechanism
, eCharset
, true));
4030 aNewPath
.append(p
, pPathEnd
- p
);
4032 return setPath(aNewPath
.makeStringAndClear(), EncodeMechanism::NotCanonical
,
4033 RTL_TEXTENCODING_UTF8
);
4036 bool INetURLObject::hasExtension()
4039 SubString
aSegment(getSegment(LAST_SEGMENT
, true/*bIgnoreFinalSlash*/));
4040 if (!aSegment
.isPresent())
4043 sal_Unicode
const * pSegBegin
4044 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4045 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4047 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4049 for (sal_Unicode
const * p
= pSegBegin
; p
!= pSegEnd
&& *p
!= ';'; ++p
)
4050 if (*p
== '.' && p
!= pSegBegin
)
4055 OUString
INetURLObject::getBase(sal_Int32 nIndex
, bool bIgnoreFinalSlash
,
4056 DecodeMechanism eMechanism
,
4057 rtl_TextEncoding eCharset
) const
4059 SubString
aSegment(getSegment(nIndex
, bIgnoreFinalSlash
));
4060 if (!aSegment
.isPresent())
4063 sal_Unicode
const * pSegBegin
4064 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4065 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4067 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4069 sal_Unicode
const * pExtension
= nullptr;
4070 sal_Unicode
const * p
= pSegBegin
;
4071 for (; p
!= pSegEnd
&& *p
!= ';'; ++p
)
4072 if (*p
== '.' && p
!= pSegBegin
)
4077 return decode(pSegBegin
, pExtension
, eMechanism
, eCharset
);
4080 bool INetURLObject::setBase(OUString
const & rTheBase
, sal_Int32 nIndex
,
4081 EncodeMechanism eMechanism
,
4082 rtl_TextEncoding eCharset
)
4084 SubString
aSegment(getSegment(nIndex
, true/*bIgnoreFinalSlash*/));
4085 if (!aSegment
.isPresent())
4088 sal_Unicode
const * pPathBegin
4089 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4090 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4091 sal_Unicode
const * pSegBegin
4092 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4093 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4095 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4097 sal_Unicode
const * pExtension
= nullptr;
4098 sal_Unicode
const * p
= pSegBegin
;
4099 for (; p
!= pSegEnd
&& *p
!= ';'; ++p
)
4100 if (*p
== '.' && p
!= pSegBegin
)
4105 OUStringBuffer aNewPath
;
4106 aNewPath
.append(pPathBegin
, pSegBegin
- pPathBegin
);
4107 aNewPath
.append(encodeText(rTheBase
, PART_PCHAR
,
4108 eMechanism
, eCharset
, true));
4109 aNewPath
.append(pExtension
, pPathEnd
- pExtension
);
4111 return setPath(aNewPath
.makeStringAndClear(), EncodeMechanism::NotCanonical
,
4112 RTL_TEXTENCODING_UTF8
);
4115 OUString
INetURLObject::getExtension(sal_Int32 nIndex
,
4116 bool bIgnoreFinalSlash
,
4117 DecodeMechanism eMechanism
,
4118 rtl_TextEncoding eCharset
) const
4120 SubString
aSegment(getSegment(nIndex
, bIgnoreFinalSlash
));
4121 if (!aSegment
.isPresent())
4124 sal_Unicode
const * pSegBegin
4125 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4126 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4128 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4130 sal_Unicode
const * pExtension
= nullptr;
4131 sal_Unicode
const * p
= pSegBegin
;
4132 for (; p
!= pSegEnd
&& *p
!= ';'; ++p
)
4133 if (*p
== '.' && p
!= pSegBegin
)
4139 return decode(pExtension
+ 1, p
, eMechanism
, eCharset
);
4142 bool INetURLObject::setExtension(OUString
const & rTheExtension
,
4143 sal_Int32 nIndex
, bool bIgnoreFinalSlash
,
4144 rtl_TextEncoding eCharset
)
4146 SubString
aSegment(getSegment(nIndex
, bIgnoreFinalSlash
));
4147 if (!aSegment
.isPresent())
4150 sal_Unicode
const * pPathBegin
4151 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4152 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4153 sal_Unicode
const * pSegBegin
4154 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4155 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4157 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4159 sal_Unicode
const * pExtension
= nullptr;
4160 sal_Unicode
const * p
= pSegBegin
;
4161 for (; p
!= pSegEnd
&& *p
!= ';'; ++p
)
4162 if (*p
== '.' && p
!= pSegBegin
)
4167 OUStringBuffer
aNewPath(128);
4168 aNewPath
.append(pPathBegin
, pExtension
- pPathBegin
);
4169 aNewPath
.append('.');
4170 aNewPath
.append(encodeText(rTheExtension
, PART_PCHAR
,
4171 EncodeMechanism::WasEncoded
, eCharset
, true));
4172 aNewPath
.append(p
, pPathEnd
- p
);
4174 return setPath(aNewPath
.makeStringAndClear(), EncodeMechanism::NotCanonical
,
4175 RTL_TEXTENCODING_UTF8
);
4178 bool INetURLObject::removeExtension(sal_Int32 nIndex
, bool bIgnoreFinalSlash
)
4180 SubString
aSegment(getSegment(nIndex
, bIgnoreFinalSlash
));
4181 if (!aSegment
.isPresent())
4184 sal_Unicode
const * pPathBegin
4185 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4186 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4187 sal_Unicode
const * pSegBegin
4188 = m_aAbsURIRef
.getStr() + aSegment
.getBegin();
4189 sal_Unicode
const * pSegEnd
= pSegBegin
+ aSegment
.getLength();
4191 if (pSegBegin
< pSegEnd
&& *pSegBegin
== '/')
4193 sal_Unicode
const * pExtension
= nullptr;
4194 sal_Unicode
const * p
= pSegBegin
;
4195 for (; p
!= pSegEnd
&& *p
!= ';'; ++p
)
4196 if (*p
== '.' && p
!= pSegBegin
)
4202 OUString::Concat(std::u16string_view(pPathBegin
, pExtension
- pPathBegin
)) +
4203 std::u16string_view(p
, pPathEnd
- p
);
4205 return setPath(aNewPath
, EncodeMechanism::NotCanonical
, RTL_TEXTENCODING_UTF8
);
4208 bool INetURLObject::hasFinalSlash() const
4210 if (!checkHierarchical())
4213 sal_Unicode
const * pPathBegin
4214 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4215 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4216 return pPathEnd
> pPathBegin
&& pPathEnd
[-1] == '/';
4219 bool INetURLObject::setFinalSlash()
4221 if (!checkHierarchical())
4224 sal_Unicode
const * pPathBegin
4225 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4226 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4227 if (pPathEnd
> pPathBegin
&& pPathEnd
[-1] == '/')
4231 = OUString::Concat(std::u16string_view(pPathBegin
, pPathEnd
- pPathBegin
)) + "/";
4233 return setPath(aNewPath
, EncodeMechanism::NotCanonical
, RTL_TEXTENCODING_UTF8
);
4236 bool INetURLObject::removeFinalSlash()
4238 if (!checkHierarchical())
4241 sal_Unicode
const * pPathBegin
4242 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4243 sal_Unicode
const * pPathEnd
= pPathBegin
+ m_aPath
.getLength();
4244 if (pPathEnd
<= pPathBegin
|| pPathEnd
[-1] != '/')
4248 if (pPathEnd
== pPathBegin
&& *pPathBegin
== '/')
4250 OUString
aNewPath(pPathBegin
, pPathEnd
- pPathBegin
);
4252 return setPath(aNewPath
, EncodeMechanism::NotCanonical
, RTL_TEXTENCODING_UTF8
);
4255 OUString
INetURLObject::getFSysPath(FSysStyle eStyle
,
4256 sal_Unicode
* pDelimiter
) const
4258 if (m_eScheme
!= INetProtocol::File
)
4261 if (((eStyle
& FSysStyle::Vos
) ? 1 : 0)
4262 + ((eStyle
& FSysStyle::Unix
) ? 1 : 0)
4263 + ((eStyle
& FSysStyle::Dos
) ? 1 : 0)
4266 if(eStyle
& FSysStyle::Vos
&& m_aHost
.isPresent() && m_aHost
.getLength() > 0)
4268 eStyle
= FSysStyle::Vos
;
4272 if(hasDosVolume(eStyle
) || ((eStyle
& FSysStyle::Dos
) && m_aHost
.isPresent() && m_aHost
.getLength() > 0))
4274 eStyle
= FSysStyle::Dos
;
4278 if(eStyle
& FSysStyle::Unix
&& (!m_aHost
.isPresent() || m_aHost
.getLength() == 0))
4280 eStyle
= FSysStyle::Unix
;
4284 eStyle
= FSysStyle(0);
4292 case FSysStyle::Vos
:
4297 OUStringBuffer aSynFSysPath
;
4298 aSynFSysPath
.append("//");
4299 if (m_aHost
.isPresent() && m_aHost
.getLength() > 0)
4300 aSynFSysPath
.append(decode(m_aHost
, DecodeMechanism::WithCharset
,
4301 RTL_TEXTENCODING_UTF8
));
4303 aSynFSysPath
.append('.');
4304 aSynFSysPath
.append(decode(m_aPath
, DecodeMechanism::WithCharset
,
4305 RTL_TEXTENCODING_UTF8
));
4306 return aSynFSysPath
.makeStringAndClear();
4309 case FSysStyle::Unix
:
4311 if (m_aHost
.isPresent() && m_aHost
.getLength() > 0)
4317 return decode(m_aPath
, DecodeMechanism::WithCharset
, RTL_TEXTENCODING_UTF8
);
4320 case FSysStyle::Dos
:
4325 OUStringBuffer
aSynFSysPath(64);
4326 if (m_aHost
.isPresent() && m_aHost
.getLength() > 0)
4328 aSynFSysPath
.append("\\\\");
4329 aSynFSysPath
.append(decode(m_aHost
, DecodeMechanism::WithCharset
,
4330 RTL_TEXTENCODING_UTF8
));
4331 aSynFSysPath
.append('\\');
4333 sal_Unicode
const * p
4334 = m_aAbsURIRef
.getStr() + m_aPath
.getBegin();
4335 sal_Unicode
const * pEnd
= p
+ m_aPath
.getLength();
4336 DBG_ASSERT(p
< pEnd
&& *p
== '/',
4337 "INetURLObject::getFSysPath(): Bad path");
4341 EscapeType eEscapeType
;
4342 sal_uInt32 nUTF32
= getUTF32(p
, pEnd
, EncodeMechanism::WasEncoded
,
4343 RTL_TEXTENCODING_UTF8
,
4345 if (eEscapeType
== EscapeType::NONE
&& nUTF32
== '/')
4346 aSynFSysPath
.append('\\');
4348 aSynFSysPath
.appendUtf32(nUTF32
);
4350 return aSynFSysPath
.makeStringAndClear();
4359 void INetURLObject::appendUCS4Escape(OUStringBuffer
& rTheText
,
4362 DBG_ASSERT(nUCS4
< 0x80000000,
4363 "INetURLObject::appendUCS4Escape(): Bad char");
4365 appendEscape(rTheText
, nUCS4
);
4366 else if (nUCS4
< 0x800)
4368 appendEscape(rTheText
, nUCS4
>> 6 | 0xC0);
4369 appendEscape(rTheText
, (nUCS4
& 0x3F) | 0x80);
4371 else if (nUCS4
< 0x10000)
4373 appendEscape(rTheText
, nUCS4
>> 12 | 0xE0);
4374 appendEscape(rTheText
, (nUCS4
>> 6 & 0x3F) | 0x80);
4375 appendEscape(rTheText
, (nUCS4
& 0x3F) | 0x80);
4377 else if (nUCS4
< 0x200000)
4379 appendEscape(rTheText
, nUCS4
>> 18 | 0xF0);
4380 appendEscape(rTheText
, (nUCS4
>> 12 & 0x3F) | 0x80);
4381 appendEscape(rTheText
, (nUCS4
>> 6 & 0x3F) | 0x80);
4382 appendEscape(rTheText
, (nUCS4
& 0x3F) | 0x80);
4384 else if (nUCS4
< 0x4000000)
4386 appendEscape(rTheText
, nUCS4
>> 24 | 0xF8);
4387 appendEscape(rTheText
, (nUCS4
>> 18 & 0x3F) | 0x80);
4388 appendEscape(rTheText
, (nUCS4
>> 12 & 0x3F) | 0x80);
4389 appendEscape(rTheText
, (nUCS4
>> 6 & 0x3F) | 0x80);
4390 appendEscape(rTheText
, (nUCS4
& 0x3F) | 0x80);
4394 appendEscape(rTheText
, nUCS4
>> 30 | 0xFC);
4395 appendEscape(rTheText
, (nUCS4
>> 24 & 0x3F) | 0x80);
4396 appendEscape(rTheText
, (nUCS4
>> 18 & 0x3F) | 0x80);
4397 appendEscape(rTheText
, (nUCS4
>> 12 & 0x3F) | 0x80);
4398 appendEscape(rTheText
, (nUCS4
>> 6 & 0x3F) | 0x80);
4399 appendEscape(rTheText
, (nUCS4
& 0x3F) | 0x80);
4404 void INetURLObject::appendUCS4(OUStringBuffer
& rTheText
, sal_uInt32 nUCS4
,
4405 EscapeType eEscapeType
,
4406 Part ePart
, rtl_TextEncoding eCharset
,
4407 bool bKeepVisibleEscapes
)
4410 rtl_TextEncoding eTargetCharset
= RTL_TEXTENCODING_DONTKNOW
;
4411 switch (eEscapeType
)
4413 case EscapeType::NONE
:
4414 if (mustEncode(nUCS4
, ePart
))
4417 eTargetCharset
= RTL_TEXTENCODING_UTF8
;
4423 case EscapeType::Octet
:
4425 eTargetCharset
= RTL_TEXTENCODING_ISO_8859_1
;
4428 case EscapeType::Utf32
:
4429 if (mustEncode(nUCS4
, ePart
))
4432 eTargetCharset
= eCharset
;
4434 else if (bKeepVisibleEscapes
&& INetMIME::isVisible(nUCS4
))
4437 eTargetCharset
= RTL_TEXTENCODING_ASCII_US
;
4448 switch (eTargetCharset
)
4451 OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
4453 case RTL_TEXTENCODING_ASCII_US
:
4454 case RTL_TEXTENCODING_ISO_8859_1
:
4455 appendEscape(rTheText
, nUCS4
);
4457 case RTL_TEXTENCODING_UTF8
:
4458 appendUCS4Escape(rTheText
, nUCS4
);
4463 rTheText
.append(sal_Unicode(nUCS4
));
4467 sal_uInt32
INetURLObject::getUTF32(sal_Unicode
const *& rBegin
,
4468 sal_Unicode
const * pEnd
,
4469 EncodeMechanism eMechanism
,
4470 rtl_TextEncoding eCharset
,
4471 EscapeType
& rEscapeType
)
4473 DBG_ASSERT(rBegin
< pEnd
, "INetURLObject::getUTF32(): Bad sequence");
4474 sal_uInt32 nUTF32
= INetMIME::getUTF32Character(rBegin
, pEnd
);
4477 case EncodeMechanism::All
:
4478 rEscapeType
= EscapeType::NONE
;
4481 case EncodeMechanism::WasEncoded
:
4485 if (nUTF32
== static_cast<unsigned char>('%') && rBegin
+ 1 < pEnd
4486 && (nWeight1
= INetMIME::getHexWeight(rBegin
[0])) >= 0
4487 && (nWeight2
= INetMIME::getHexWeight(rBegin
[1])) >= 0)
4490 nUTF32
= nWeight1
<< 4 | nWeight2
;
4495 "INetURLObject::getUTF32(): Unsupported charset");
4497 case RTL_TEXTENCODING_ASCII_US
:
4498 rEscapeType
= rtl::isAscii(nUTF32
) ?
4499 EscapeType::Utf32
: EscapeType::Octet
;
4502 case RTL_TEXTENCODING_ISO_8859_1
:
4503 rEscapeType
= EscapeType::Utf32
;
4506 case RTL_TEXTENCODING_UTF8
:
4507 if (rtl::isAscii(nUTF32
))
4508 rEscapeType
= EscapeType::Utf32
;
4511 if (nUTF32
>= 0xC0 && nUTF32
<= 0xF4)
4513 sal_uInt32 nEncoded
;
4518 nEncoded
= (nUTF32
& 0x1F) << 6;
4522 else if (nUTF32
<= 0xEF)
4524 nEncoded
= (nUTF32
& 0x0F) << 12;
4530 nEncoded
= (nUTF32
& 0x07) << 18;
4534 sal_Unicode
const * p
= rBegin
;
4541 = INetMIME::getHexWeight(p
[1]))
4545 = INetMIME::getHexWeight(p
[2]))
4553 |= ((nWeight1
& 3) << 4 | nWeight2
)
4559 if (bUTF8
&& rtl::isUnicodeScalarValue(nEncoded
)
4560 && nEncoded
>= nMin
)
4564 rEscapeType
= EscapeType::Utf32
;
4568 rEscapeType
= EscapeType::Octet
;
4574 rEscapeType
= EscapeType::NONE
;
4578 case EncodeMechanism::NotCanonical
:
4582 if (nUTF32
== static_cast<unsigned char>('%') && rBegin
+ 1 < pEnd
4583 && ((nWeight1
= INetMIME::getHexWeight(rBegin
[0])) >= 0)
4584 && ((nWeight2
= INetMIME::getHexWeight(rBegin
[1])) >= 0))
4587 nUTF32
= nWeight1
<< 4 | nWeight2
;
4588 rEscapeType
= EscapeType::Octet
;
4591 rEscapeType
= EscapeType::NONE
;
4599 sal_uInt32
INetURLObject::scanDomain(sal_Unicode
const *& rBegin
,
4600 sal_Unicode
const * pEnd
,
4603 enum State
{ STATE_DOT
, STATE_LABEL
, STATE_HYPHEN
};
4604 State eState
= STATE_DOT
;
4605 sal_Int32 nLabels
= 0;
4606 sal_Unicode
const * pLastAlphanumeric
= nullptr;
4607 for (sal_Unicode
const * p
= rBegin
;; ++p
)
4611 if (p
!= pEnd
&& (rtl::isAsciiAlphanumeric(*p
) || *p
== '_'))
4614 eState
= STATE_LABEL
;
4617 if (bEager
|| nLabels
== 0)
4625 if (rtl::isAsciiAlphanumeric(*p
) || *p
== '_')
4634 pLastAlphanumeric
= p
;
4635 eState
= STATE_HYPHEN
;
4645 if (rtl::isAsciiAlphanumeric(*p
) || *p
== '_')
4647 eState
= STATE_LABEL
;
4655 rBegin
= pLastAlphanumeric
;
4661 bool INetURLObject::scanIPv6reference(sal_Unicode
const *& rBegin
,
4662 sal_Unicode
const * pEnd
)
4664 if (rBegin
!= pEnd
&& *rBegin
== '[') {
4665 sal_Unicode
const * p
= rBegin
+ 1;
4666 //TODO: check for valid IPv6address (RFC 2373):
4667 while (p
!= pEnd
&& (rtl::isAsciiHexDigit(*p
) || *p
== ':' || *p
== '.'))
4671 if (p
!= pEnd
&& *p
== ']') {
4679 OUString
INetURLObject::GetPartBeforeLastName()
4682 if (!checkHierarchical())
4684 INetURLObject
aTemp(*this);
4685 aTemp
.clearFragment();
4687 aTemp
.removeSegment(LAST_SEGMENT
, false);
4688 aTemp
.setFinalSlash();
4689 return aTemp
.GetMainURL(DecodeMechanism::ToIUri
);
4692 OUString
INetURLObject::GetLastName(DecodeMechanism eMechanism
,
4693 rtl_TextEncoding eCharset
) const
4695 return getName(LAST_SEGMENT
, true, eMechanism
, eCharset
);
4698 OUString
INetURLObject::GetFileExtension() const
4700 return getExtension(LAST_SEGMENT
, false);
4703 void INetURLObject::CutLastName()
4705 INetURLObject
aTemp(*this);
4706 aTemp
.clearFragment();
4708 if (!aTemp
.removeSegment(LAST_SEGMENT
, false))
4713 OUString
INetURLObject::PathToFileName() const
4715 if (m_eScheme
!= INetProtocol::File
)
4717 OUString aSystemPath
;
4718 if (osl::FileBase::getSystemPathFromFileURL(
4719 decode(m_aAbsURIRef
.getStr(),
4720 m_aAbsURIRef
.getStr() + m_aPath
.getEnd(),
4721 DecodeMechanism::NONE
, RTL_TEXTENCODING_UTF8
),
4723 != osl::FileBase::E_None
)
4728 OUString
INetURLObject::GetFull() const
4730 INetURLObject
aTemp(*this);
4731 aTemp
.removeFinalSlash();
4732 return aTemp
.PathToFileName();
4735 OUString
INetURLObject::GetPath() const
4737 INetURLObject
aTemp(*this);
4738 aTemp
.removeSegment();
4739 aTemp
.removeFinalSlash();
4740 return aTemp
.PathToFileName();
4743 void INetURLObject::SetBase(OUString
const & rTheBase
)
4745 setBase(rTheBase
, LAST_SEGMENT
, EncodeMechanism::All
);
4748 OUString
INetURLObject::GetBase() const
4750 return getBase(LAST_SEGMENT
, true, DecodeMechanism::WithCharset
);
4753 void INetURLObject::SetExtension(OUString
const & rTheExtension
)
4755 setExtension(rTheExtension
, LAST_SEGMENT
, false);
4758 OUString
INetURLObject::CutExtension()
4760 OUString
aTheExtension(getExtension(LAST_SEGMENT
, false));
4761 return removeExtension(LAST_SEGMENT
, false)
4762 ? aTheExtension
: OUString();
4765 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */