Bump version to 21.06.18.1
[LibreOffice.git] / tools / source / fsys / urlobj.cxx
blobd52edb1d2f2a1f847e8af71c9842755862fb6abe
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <tools/urlobj.hxx>
23 #include <tools/debug.hxx>
24 #include <tools/inetmime.hxx>
25 #include <tools/stream.hxx>
26 #include <com/sun/star/uno/Reference.hxx>
27 #include <com/sun/star/util/XStringWidth.hpp>
28 #include <o3tl/enumarray.hxx>
29 #include <osl/diagnose.h>
30 #include <osl/file.hxx>
31 #include <rtl/character.hxx>
32 #include <rtl/string.h>
33 #include <rtl/textenc.h>
34 #include <rtl/ustring.hxx>
35 #include <sal/log.hxx>
36 #include <sal/types.h>
38 #include <algorithm>
39 #include <cassert>
40 #include <limits>
41 #include <memory>
42 #include <string_view>
44 #include <string.h>
46 #include <com/sun/star/uno/Sequence.hxx>
47 #include <comphelper/base64.hxx>
49 using namespace css;
51 // INetURLObject
53 /* The URI grammar (using RFC 2234 conventions).
55 Constructs of the form
56 {reference <rule1> using rule2}
57 stand for a rule matching the given rule1 specified in the given reference,
58 encoded to URI syntax using rule2 (as specified in this URI grammar).
61 ; RFC 1738, RFC 2396, RFC 2732, private
62 login = [user [":" password] "@"] hostport
63 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
64 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
65 hostport = host [":" port]
66 host = incomplete-hostname / hostname / IPv4address / IPv6reference
67 incomplete-hostname = *(domainlabel ".") domainlabel
68 hostname = *(domainlabel ".") toplabel ["."]
69 domainlabel = alphanum [*(alphanum / "-") alphanum]
70 toplabel = ALPHA [*(alphanum / "-") alphanum]
71 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
72 IPv6reference = "[" hexpart [":" IPv4address] "]"
73 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
74 hexseq = hex4 *(":" hex4)
75 hex4 = 1*4HEXDIG
76 port = *DIGIT
77 escaped = "%" HEXDIG HEXDIG
78 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
79 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
80 alphanum = ALPHA / DIGIT
81 unreserved = alphanum / mark
82 uric = escaped / reserved / unreserved
83 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
86 ; RFC 1738, RFC 2396
87 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
88 segment = *pchar
91 ; RFC 1738, RFC 2396
92 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
93 segment = *(pchar / ";")
96 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
97 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
98 segment = *pchar
99 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
102 ; RFC 2368, RFC 2396
103 mailto-url = "MAILTO:" [to] [headers]
104 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
105 headers = "?" header *("&" header)
106 header = hname "=" hvalue
107 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
108 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
111 ; private (see RFC 1738, RFC 2396)
112 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
113 segment = *(pchar / ";")
116 ; private
117 private-url = "PRIVATE:" path ["?" *uric]
118 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
121 ; private
122 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
123 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
124 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
127 ; private
128 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
129 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
132 ; private
133 slot-url = "SLOT:" path ["?" *uric]
134 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
137 ; private
138 macro-url = "MACRO:" path ["?" *uric]
139 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
142 ; private
143 javascript-url = "JAVASCRIPT:" *uric
146 ; RFC 2397
147 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
148 mediatype = [type "/" subtype] *(";" attribute "=" value)
149 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
150 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
151 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
152 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
155 ; RFC 2392, RFC 2396
156 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
159 ; private
160 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
161 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
164 ; private
165 uno-url = ".UNO:" path ["?" *uric]
166 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
169 ; private
170 component-url = ".COMPONENT:" path ["?" *uric]
171 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
174 ; private
175 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
176 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
179 ; RFC 2255
180 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
181 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
182 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
183 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
184 extension = ["!"] ["X-"] extoken ["=" exvalue]
185 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
186 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
189 ; private
190 db-url = "DB:" *uric
193 ; private
194 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
195 opaque_part = uric_no_slash *uric
196 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
199 ; RFC 1738
200 telnet-url = "TELNET://" login ["/"]
203 ; private
204 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
205 opaque_part = uric_no_slash *uric
206 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
209 ; private
210 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
211 segment = *pchar
214 ; private
215 unknown-url = scheme ":" 1*uric
216 scheme = ALPHA *(alphanum / "+" / "-" / ".")
219 ; private (http://ubiqx.org/cifs/Appendix-D.html):
220 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
221 segment = *(pchar / ";")
224 inline sal_Int32 INetURLObject::SubString::clear()
226 sal_Int32 nDelta = -m_nLength;
227 m_nBegin = -1;
228 m_nLength = 0;
229 return nDelta;
232 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
233 OUString const & rSubString)
235 sal_Int32 nDelta = rSubString.getLength() - m_nLength;
237 rString.remove(m_nBegin, m_nLength);
238 rString.insert(m_nBegin, rSubString);
240 m_nLength = rSubString.getLength();
241 return nDelta;
244 inline sal_Int32 INetURLObject::SubString::set(OUString & rString,
245 OUString const & rSubString)
247 sal_Int32 nDelta = rSubString.getLength() - m_nLength;
249 rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
251 m_nLength = rSubString.getLength();
252 return nDelta;
255 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
256 OUString const & rSubString,
257 sal_Int32 nTheBegin)
259 m_nBegin = nTheBegin;
260 return set(rString, rSubString);
263 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
265 if (isPresent())
266 m_nBegin = m_nBegin + nDelta;
269 int INetURLObject::SubString::compare(SubString const & rOther,
270 OUStringBuffer const & rThisString,
271 OUStringBuffer const & rOtherString) const
273 sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
274 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
275 sal_Unicode const * end = p1 + len;
276 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
277 while (p1 != end) {
278 if (*p1 < *p2) {
279 return -1;
280 } else if (*p1 > *p2) {
281 return 1;
283 ++p1;
284 ++p2;
286 return m_nLength < rOther.m_nLength ? -1
287 : m_nLength > rOther.m_nLength ? 1
288 : 0;
291 struct INetURLObject::SchemeInfo
293 char const * m_pScheme;
294 char const * m_pPrefix;
295 bool m_bAuthority;
296 bool m_bUser;
297 bool m_bAuth;
298 bool m_bPassword;
299 bool m_bHost;
300 bool m_bPort;
301 bool m_bHierarchical;
302 bool m_bQuery;
305 struct INetURLObject::PrefixInfo
307 enum class Kind { Official, Internal, External }; // order is important!
309 char const * m_pPrefix;
310 char const * m_pTranslatedPrefix;
311 INetProtocol m_eScheme;
312 Kind m_eKind;
315 // static
316 inline INetURLObject::SchemeInfo const &
317 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
319 static o3tl::enumarray<INetProtocol, SchemeInfo> const map = {
320 SchemeInfo{
321 "", "", false, false, false, false, false, false, false, false},
322 SchemeInfo{
323 "ftp", "ftp://", true, true, false, true, true, true, true,
324 false},
325 SchemeInfo{
326 "http", "http://", true, false, false, false, true, true, true,
327 true},
328 SchemeInfo{
329 "file", "file://", true, false, false, false, true, false, true,
330 false},
331 SchemeInfo{
332 "mailto", "mailto:", false, false, false, false, false, false,
333 false, true},
334 SchemeInfo{
335 "vnd.sun.star.webdav", "vnd.sun.star.webdav://", true, false,
336 false, false, true, true, true, true},
337 SchemeInfo{
338 "private", "private:", false, false, false, false, false, false,
339 false, true},
340 SchemeInfo{
341 "vnd.sun.star.help", "vnd.sun.star.help://", true, false, false,
342 false, false, false, true, true},
343 SchemeInfo{
344 "https", "https://", true, false, false, false, true, true,
345 true, true},
346 SchemeInfo{
347 "slot", "slot:", false, false, false, false, false, false, false,
348 true},
349 SchemeInfo{
350 "macro", "macro:", false, false, false, false, false, false,
351 false, true},
352 SchemeInfo{
353 "javascript", "javascript:", false, false, false, false, false,
354 false, false, false},
355 SchemeInfo{
356 "data", "data:", false, false, false, false, false, false, false,
357 false},
358 SchemeInfo{
359 "cid", "cid:", false, false, false, false, false, false, false,
360 false},
361 SchemeInfo{
362 "vnd.sun.star.hier", "vnd.sun.star.hier:", true, false, false,
363 false, false, false, true, false},
364 SchemeInfo{
365 ".uno", ".uno:", false, false, false, false, false, false, false,
366 true},
367 SchemeInfo{
368 ".component", ".component:", false, false, false, false, false,
369 false, false, true},
370 SchemeInfo{
371 "vnd.sun.star.pkg", "vnd.sun.star.pkg://", true, false, false,
372 false, false, false, true, true},
373 SchemeInfo{
374 "ldap", "ldap://", true, false, false, false, true, true,
375 false, true},
376 SchemeInfo{
377 "db", "db:", false, false, false, false, false, false, false,
378 false},
379 SchemeInfo{
380 "vnd.sun.star.cmd", "vnd.sun.star.cmd:", false, false, false,
381 false, false, false, false, false},
382 SchemeInfo{
383 "telnet", "telnet://", true, true, false, true, true, true,
384 true, false},
385 SchemeInfo{
386 "vnd.sun.star.expand", "vnd.sun.star.expand:", false, false,
387 false, false, false, false, false, false},
388 SchemeInfo{
389 "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", false, false, false,
390 false, false, false, true, false},
391 SchemeInfo{
392 "", "", false, false, false, false, true, true, true, false },
393 SchemeInfo{
394 "smb", "smb://", true, true, false, true, true, true, true,
395 true},
396 SchemeInfo{
397 "hid", "hid:", false, false, false, false, false, false, false,
398 true},
399 SchemeInfo{
400 "sftp", "sftp://", true, true, false, true, true, true, true,
401 true},
402 SchemeInfo{
403 "vnd.libreoffice.cmis", "vnd.libreoffice.cmis://", true, true,
404 false, false, true, false, true, true} };
405 return map[eTheScheme];
408 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
410 return getSchemeInfo(m_eScheme);
413 namespace {
415 sal_Unicode getHexDigit(sal_uInt32 nWeight)
417 assert(nWeight < 16);
418 static const sal_Unicode aDigits[16]
419 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
420 'D', 'E', 'F' };
421 return aDigits[nWeight];
426 // static
427 inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
428 sal_uInt32 nOctet)
430 rTheText.append( '%' );
431 rTheText.append( getHexDigit(nOctet >> 4) );
432 rTheText.append( getHexDigit(nOctet & 15) );
435 namespace {
437 enum
439 PA = INetURLObject::PART_USER_PASSWORD,
440 PD = INetURLObject::PART_FPATH,
441 PE = INetURLObject::PART_AUTHORITY,
442 PF = INetURLObject::PART_REL_SEGMENT_EXTRA,
443 PG = INetURLObject::PART_URIC,
444 PH = INetURLObject::PART_HTTP_PATH,
445 PI = INetURLObject::PART_MESSAGE_ID_PATH,
446 PJ = INetURLObject::PART_MAILTO,
447 PK = INetURLObject::PART_PATH_BEFORE_QUERY,
448 PL = INetURLObject::PART_PCHAR,
449 PM = INetURLObject::PART_VISIBLE,
450 PN = INetURLObject::PART_VISIBLE_NONSPECIAL,
451 PO = INetURLObject::PART_UNO_PARAM_VALUE,
452 PP = INetURLObject::PART_UNAMBIGUOUS,
453 PQ = INetURLObject::PART_URIC_NO_SLASH,
454 PR = INetURLObject::PART_HTTP_QUERY,
457 sal_uInt32 const aMustEncodeMap[128]
458 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 /* */ PP,
461 /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
462 /* " */ PM+PN +PP,
463 /* # */ PM,
464 /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
465 /* % */ PM,
466 /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR,
467 /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
468 /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
469 /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
470 /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
471 /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
472 /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR,
473 /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
474 /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
475 /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO,
476 /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
477 /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
478 /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
479 /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
480 /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
481 /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
482 /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
483 /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
484 /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
485 /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
486 /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
487 /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR,
488 /* < */ +PI +PM+PN +PP,
489 /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR,
490 /* > */ +PI +PM+PN +PP,
491 /* ? */ +PG +PM +PO +PQ,
492 /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
493 /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
494 /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
495 /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
496 /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
497 /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
498 /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
499 /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
500 /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
501 /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
502 /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
503 /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
504 /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
505 /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
506 /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
507 /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
508 /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
509 /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
510 /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
511 /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
512 /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
513 /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
514 /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
515 /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
516 /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
517 /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
518 /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
519 /* [ */ PG +PM+PN+PO,
520 /* \ */ +PM+PN +PP,
521 /* ] */ PG +PM+PN+PO,
522 /* ^ */ PM+PN +PP,
523 /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
524 /* ` */ PM+PN +PP,
525 /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
526 /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
527 /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
528 /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
529 /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
530 /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
531 /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
532 /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
533 /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
534 /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
535 /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
536 /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
537 /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
538 /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
539 /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
540 /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
541 /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
542 /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
543 /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
544 /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
545 /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
546 /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
547 /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
548 /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
549 /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
550 /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
551 /* { */ PM+PN +PP,
552 /* | */ +PM+PN +PP,
553 /* } */ PM+PN +PP,
554 /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
555 0 };
557 bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
559 return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
564 void INetURLObject::setInvalid()
566 m_aAbsURIRef.setLength(0);
567 m_eScheme = INetProtocol::NotValid;
568 m_aScheme.clear();
569 m_aUser.clear();
570 m_aAuth.clear();
571 m_aHost.clear();
572 m_aPort.clear();
573 m_aPath.clear();
574 m_aQuery.clear();
575 m_aFragment.clear();
578 namespace {
580 std::unique_ptr<SvMemoryStream> memoryStream(
581 void const * data, sal_Int32 length)
583 std::unique_ptr<char[]> b(
584 new char[length]);
585 memcpy(b.get(), data, length);
586 std::unique_ptr<SvMemoryStream> s(
587 new SvMemoryStream(b.get(), length, StreamMode::READ));
588 s->ObjectOwnsMemory(true);
589 b.release();
590 return s;
595 std::unique_ptr<SvMemoryStream> INetURLObject::getData() const
597 if( GetProtocol() != INetProtocol::Data )
599 return nullptr;
602 OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 );
603 sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath );
604 sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr
605 ? 0 : pSkippedMediatype-sURLPath.getStr();
606 if (sURLPath.match(",", nCharactersSkipped))
608 nCharactersSkipped += strlen(",");
609 OString sURLEncodedData(
610 sURLPath.getStr() + nCharactersSkipped,
611 sURLPath.getLength() - nCharactersSkipped,
612 RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
613 return memoryStream(
614 sURLEncodedData.getStr(), sURLEncodedData.getLength());
616 else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
618 nCharactersSkipped += strlen(";base64,");
619 OUString sBase64Data = sURLPath.copy( nCharactersSkipped );
620 css::uno::Sequence< sal_Int8 > aDecodedData;
621 if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data)
622 == sBase64Data.getLength())
624 return memoryStream(
625 aDecodedData.getArray(), aDecodedData.getLength());
628 return nullptr;
631 namespace {
633 FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
634 sal_Unicode const * pEnd,
635 FSysStyle eStyle)
637 DBG_ASSERT(eStyle
638 & (FSysStyle::Unix
639 | FSysStyle::Dos),
640 "guessFSysStyleByCounting(): Bad style");
641 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
642 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
643 "guessFSysStyleByCounting(): Too big");
644 sal_Int32 nSlashCount
645 = (eStyle & FSysStyle::Unix) ?
646 0 : std::numeric_limits< sal_Int32 >::min();
647 sal_Int32 nBackslashCount
648 = (eStyle & FSysStyle::Dos) ?
649 0 : std::numeric_limits< sal_Int32 >::min();
650 while (pBegin != pEnd)
651 switch (*pBegin++)
653 case '/':
654 ++nSlashCount;
655 break;
657 case '\\':
658 ++nBackslashCount;
659 break;
661 return nSlashCount >= nBackslashCount ?
662 FSysStyle::Unix : FSysStyle::Dos;
665 OUString parseScheme(
666 sal_Unicode const ** begin, sal_Unicode const * end,
667 sal_uInt32 fragmentDelimiter)
669 sal_Unicode const * p = *begin;
670 if (p != end && rtl::isAsciiAlpha(*p)) {
671 do {
672 ++p;
673 } while (p != end
674 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
675 || *p == '.'));
676 // #i34835# To avoid problems with Windows file paths like "C:\foo",
677 // do not accept generic schemes that are only one character long:
678 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
679 && p - *begin >= 2)
681 OUString scheme(
682 OUString(*begin, p - *begin).toAsciiLowerCase());
683 *begin = p + 1;
684 return scheme;
687 return OUString();
692 bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef,
693 EncodeMechanism eMechanism,
694 rtl_TextEncoding eCharset,
695 bool bSmart,
696 FSysStyle eStyle)
698 sal_Unicode const * pPos = rTheAbsURIRef.getStr();
699 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
701 setInvalid();
703 sal_uInt32 nFragmentDelimiter = '#';
705 OUStringBuffer aSynAbsURIRef(rTheAbsURIRef.getLength()*2);
707 // Parse <scheme>:
708 sal_Unicode const * p = pPos;
709 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
710 if (pPrefix)
712 pPos = p;
713 m_eScheme = pPrefix->m_eScheme;
715 OUString sTemp(OUString::createFromAscii(pPrefix->m_eKind
716 >= PrefixInfo::Kind::External ?
717 pPrefix->m_pTranslatedPrefix :
718 pPrefix->m_pPrefix));
719 aSynAbsURIRef.append(sTemp);
720 m_aScheme = SubString( 0, sTemp.indexOf(':') );
722 else
724 if (bSmart)
726 // For scheme detection, the first (if any) of the following
727 // productions that matches the input string (and for which the
728 // appropriate style bit is set in eStyle, if applicable)
729 // determines the scheme. The productions use the auxiliary rules
731 // domain = label *("." label)
732 // label = alphanum [*(alphanum / "-") alphanum]
733 // alphanum = ALPHA / DIGIT
734 // IPv6reference = "[" IPv6address "]"
735 // IPv6address = hexpart [":" IPv4address]
736 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
737 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
738 // hexseq = hex4 *(":" hex4)
739 // hex4 = 1*4HEXDIG
740 // UCS4 = <any UCS4 character>
742 // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
743 // <one of the known schemes, ignoring case> ":" *UCS4
744 // 2nd Production (mailto):
745 // domain "@" domain
746 // 3rd Production (ftp):
747 // "FTP" 2*("." label) ["/" *UCS4]
748 // 4th Production (http):
749 // label 2*("." label) ["/" *UCS4]
750 // 5th Production (file):
751 // "//" (domain / IPv6reference) ["/" *UCS4]
752 // 6th Production (Unix file):
753 // "/" *UCS4
754 // 7th Production (UNC file; FSysStyle::Dos only):
755 // "\\" domain ["\" *UCS4]
756 // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
757 // ALPHA ":" ["/" *UCS4]
758 // 9th Production (DOS file; FSysStyle::Dos only):
759 // ALPHA ":" ["\" *UCS4]
760 // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
761 // after this else branch):
762 // <any scheme> ":" *UCS4
764 // For the 'non URL' file productions 6--9, the interpretation of
765 // the input as a (degenerate) URI is turned off, i.e., escape
766 // sequences and fragments are never detected as such, but are
767 // taken as literal characters.
769 sal_Unicode const * p1 = pPos;
770 if (eStyle & FSysStyle::Dos
771 && pEnd - p1 >= 2
772 && rtl::isAsciiAlpha(p1[0])
773 && p1[1] == ':'
774 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
776 m_eScheme = INetProtocol::File; // 8th, 9th
777 eMechanism = EncodeMechanism::All;
778 nFragmentDelimiter = 0x80000000;
780 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
782 p1 += 2;
783 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
784 && (p1 == pEnd || *p1 == '/'))
785 m_eScheme = INetProtocol::File; // 5th
787 else if (p1 != pEnd && *p1 == '/')
789 m_eScheme = INetProtocol::File; // 6th
790 eMechanism = EncodeMechanism::All;
791 nFragmentDelimiter = 0x80000000;
793 else if (eStyle & FSysStyle::Dos
794 && pEnd - p1 >= 2
795 && p1[0] == '\\'
796 && p1[1] == '\\')
798 p1 += 2;
799 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
800 p1, pEnd - p1, '\\');
801 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
802 if (
803 parseHostOrNetBiosName(
804 p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
805 true, nullptr) ||
806 (scanDomain(p1, pe) > 0 && p1 == pe)
809 m_eScheme = INetProtocol::File; // 7th
810 eMechanism = EncodeMechanism::All;
811 nFragmentDelimiter = 0x80000000;
814 else
816 sal_Unicode const * pDomainEnd = p1;
817 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
818 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
820 ++pDomainEnd;
821 if (scanDomain(pDomainEnd, pEnd) > 0
822 && pDomainEnd == pEnd)
823 m_eScheme = INetProtocol::Mailto; // 2nd
825 else if (nLabels >= 3
826 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
827 m_eScheme
828 = pDomainEnd - p1 >= 4
829 && (p1[0] == 'f' || p1[0] == 'F')
830 && (p1[1] == 't' || p1[1] == 'T')
831 && (p1[2] == 'p' || p1[2] == 'P')
832 && p1[3] == '.' ?
833 INetProtocol::Ftp : INetProtocol::Http; // 3rd, 4th
837 OUString aSynScheme;
838 if (m_eScheme == INetProtocol::NotValid) {
839 sal_Unicode const * p1 = pPos;
840 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
841 if (!aSynScheme.isEmpty())
843 m_eScheme = INetProtocol::Generic;
844 pPos = p1;
848 if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
849 && *pPos != nFragmentDelimiter)
851 m_eScheme = m_eSmartScheme;
854 if (m_eScheme == INetProtocol::NotValid)
856 setInvalid();
857 return false;
860 if (m_eScheme != INetProtocol::Generic) {
861 aSynScheme = OUString::createFromAscii(getSchemeInfo().m_pScheme);
863 m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
864 aSynAbsURIRef.append(':');
867 sal_uInt32 nSegmentDelimiter = '/';
868 sal_uInt32 nAltSegmentDelimiter = 0x80000000;
869 bool bSkippedInitialSlash = false;
871 // Parse //<user>;AUTH=<auth>@<host>:<port> or
872 // //<user>:<password>@<host>:<port> or
873 // //<reg_name>
874 if (getSchemeInfo().m_bAuthority)
876 sal_Unicode const * pUserInfoBegin = nullptr;
877 sal_Unicode const * pUserInfoEnd = nullptr;
878 sal_Unicode const * pHostPortBegin = nullptr;
879 sal_Unicode const * pHostPortEnd = nullptr;
881 switch (m_eScheme)
883 case INetProtocol::VndSunStarHelp:
885 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
887 setInvalid();
888 return false;
890 aSynAbsURIRef.append("//");
891 OUStringBuffer aSynAuthority;
892 while (pPos < pEnd
893 && *pPos != '/' && *pPos != '?'
894 && *pPos != nFragmentDelimiter)
896 EscapeType eEscapeType;
897 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
898 eMechanism,
899 eCharset, eEscapeType);
900 appendUCS4(aSynAuthority, nUTF32, eEscapeType,
901 PART_AUTHORITY, eCharset, false);
903 m_aHost.set(aSynAbsURIRef,
904 aSynAuthority.makeStringAndClear(),
905 aSynAbsURIRef.getLength());
906 // misusing m_aHost to store the authority
907 break;
910 case INetProtocol::VndSunStarHier:
912 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
914 pPos += 2;
915 aSynAbsURIRef.append("//");
916 OUStringBuffer aSynAuthority;
917 while (pPos < pEnd
918 && *pPos != '/' && *pPos != '?'
919 && *pPos != nFragmentDelimiter)
921 EscapeType eEscapeType;
922 sal_uInt32 nUTF32 = getUTF32(pPos,
923 pEnd,
924 eMechanism,
925 eCharset,
926 eEscapeType);
927 appendUCS4(aSynAuthority,
928 nUTF32,
929 eEscapeType,
930 PART_AUTHORITY,
931 eCharset,
932 false);
934 if (aSynAuthority.isEmpty())
936 setInvalid();
937 return false;
939 m_aHost.set(aSynAbsURIRef,
940 aSynAuthority.makeStringAndClear(),
941 aSynAbsURIRef.getLength());
942 // misusing m_aHost to store the authority
944 break;
947 case INetProtocol::VndSunStarPkg:
948 case INetProtocol::Cmis:
950 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
952 setInvalid();
953 return false;
955 aSynAbsURIRef.append("//");
956 OUStringBuffer aSynUser(128);
958 bool bHasUser = false;
959 while (pPos < pEnd && *pPos != '@'
960 && *pPos != '/' && *pPos != '?'
961 && *pPos != nFragmentDelimiter)
963 EscapeType eEscapeType;
964 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
965 eMechanism,
966 eCharset, eEscapeType);
967 appendUCS4(aSynUser, nUTF32, eEscapeType,
968 PART_USER_PASSWORD, eCharset, false);
970 bHasUser = *pPos == '@';
973 OUStringBuffer aSynAuthority(64);
974 if ( !bHasUser )
976 aSynAuthority = aSynUser;
978 else
980 m_aUser.set(aSynAbsURIRef,
981 aSynUser.makeStringAndClear(),
982 aSynAbsURIRef.getLength());
983 aSynAbsURIRef.append("@");
984 ++pPos;
986 while (pPos < pEnd
987 && *pPos != '/' && *pPos != '?'
988 && *pPos != nFragmentDelimiter)
990 EscapeType eEscapeType;
991 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
992 eMechanism,
993 eCharset, eEscapeType);
994 appendUCS4(aSynAuthority, nUTF32, eEscapeType,
995 PART_AUTHORITY, eCharset, false);
998 if (aSynAuthority.isEmpty())
1000 setInvalid();
1001 return false;
1003 m_aHost.set(aSynAbsURIRef,
1004 aSynAuthority.makeStringAndClear(),
1005 aSynAbsURIRef.getLength());
1006 // misusing m_aHost to store the authority
1007 break;
1010 case INetProtocol::File:
1011 if (bSmart)
1013 // The first of the following seven productions that
1014 // matches the rest of the input string (and for which the
1015 // appropriate style bit is set in eStyle, if applicable)
1016 // determines the used notation. The productions use the
1017 // auxiliary rules
1019 // domain = label *("." label)
1020 // label = alphanum [*(alphanum / "-") alphanum]
1021 // alphanum = ALPHA / DIGIT
1022 // IPv6reference = "[" IPv6address "]"
1023 // IPv6address = hexpart [":" IPv4address]
1024 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1025 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1026 // hexseq = hex4 *(":" hex4)
1027 // hex4 = 1*4HEXDIG
1028 // path = <any UCS4 character except "#">
1029 // UCS4 = <any UCS4 character>
1031 // 1st Production (URL):
1032 // "//" [domain / IPv6reference] ["/" *path]
1033 // ["#" *UCS4]
1034 // becomes
1035 // "file://" domain "/" *path ["#" *UCS4]
1036 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1038 sal_Unicode const * p1 = pPos + 2;
1039 while (p1 != pEnd && *p1 != '/' &&
1040 *p1 != nFragmentDelimiter)
1042 ++p1;
1044 if (parseHostOrNetBiosName(
1045 pPos + 2, p1, EncodeMechanism::All,
1046 RTL_TEXTENCODING_DONTKNOW, true, nullptr))
1048 aSynAbsURIRef.append("//");
1049 pHostPortBegin = pPos + 2;
1050 pHostPortEnd = p1;
1051 pPos = p1;
1052 break;
1056 // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
1057 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1058 // becomes
1059 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1060 // replacing "\" by "/" within <*path>
1061 // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
1062 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1063 // becomes
1064 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1065 // replacing "\" by "/" within <*path>
1066 // 4th Production (miscounted slashes):
1067 // "//" *path ["#" *UCS4]
1068 // becomes
1069 // "file:///" *path ["#" *UCS4]
1070 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1072 aSynAbsURIRef.append("//");
1073 pPos += 2;
1074 bSkippedInitialSlash = true;
1075 if ((eStyle & FSysStyle::Dos)
1076 && pEnd - pPos >= 2
1077 && rtl::isAsciiAlpha(pPos[0])
1078 && pPos[1] == ':'
1079 && (pEnd - pPos == 2
1080 || pPos[2] == '/' || pPos[2] == '\\'))
1081 nAltSegmentDelimiter = '\\';
1082 break;
1085 // 5th Production (Unix):
1086 // "/" *path ["#" *UCS4]
1087 // becomes
1088 // "file:///" *path ["#" *UCS4]
1089 if (pPos < pEnd && *pPos == '/')
1091 aSynAbsURIRef.append("//");
1092 break;
1095 // 6th Production (UNC; FSysStyle::Dos only):
1096 // "\\" domain ["\" *path] ["#" *UCS4]
1097 // becomes
1098 // "file://" domain "/" *path ["#" *UCS4]
1099 // replacing "\" by "/" within <*path>
1100 if (eStyle & FSysStyle::Dos
1101 && pEnd - pPos >= 2
1102 && pPos[0] == '\\'
1103 && pPos[1] == '\\')
1105 sal_Unicode const * p1 = pPos + 2;
1106 sal_Unicode const * pe = p1;
1107 while (pe < pEnd && *pe != '\\' &&
1108 *pe != nFragmentDelimiter)
1110 ++pe;
1112 if (
1113 parseHostOrNetBiosName(
1114 p1, pe, EncodeMechanism::All,
1115 RTL_TEXTENCODING_DONTKNOW, true, nullptr) ||
1116 (scanDomain(p1, pe) > 0 && p1 == pe)
1119 aSynAbsURIRef.append("//");
1120 pHostPortBegin = pPos + 2;
1121 pHostPortEnd = pe;
1122 pPos = pe;
1123 nSegmentDelimiter = '\\';
1124 break;
1128 // 7th Production (Unix-like DOS; FSysStyle::Dos only):
1129 // ALPHA ":" ["/" *path] ["#" *UCS4]
1130 // becomes
1131 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1132 // replacing "\" by "/" within <*path>
1133 // 8th Production (DOS; FSysStyle::Dos only):
1134 // ALPHA ":" ["\" *path] ["#" *UCS4]
1135 // becomes
1136 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1137 // replacing "\" by "/" within <*path>
1138 if (eStyle & FSysStyle::Dos
1139 && pEnd - pPos >= 2
1140 && rtl::isAsciiAlpha(pPos[0])
1141 && pPos[1] == ':'
1142 && (pEnd - pPos == 2
1143 || pPos[2] == '/'
1144 || pPos[2] == '\\'))
1146 aSynAbsURIRef.append("//");
1147 nAltSegmentDelimiter = '\\';
1148 bSkippedInitialSlash = true;
1149 break;
1152 // 9th Production (any):
1153 // *path ["#" *UCS4]
1154 // becomes
1155 // "file:///" *path ["#" *UCS4]
1156 // replacing the delimiter by "/" within <*path>. The
1157 // delimiter is that character from the set { "/", "\"}
1158 // which appears most often in <*path> (if FSysStyle::Unix
1159 // is not among the style bits, "/" is removed from the
1160 // set; if FSysStyle::Dos is not among the style bits, "\" is
1161 // removed from the set). If two or
1162 // more characters appear the same number of times, the
1163 // character mentioned first in that set is chosen. If
1164 // the first character of <*path> is the delimiter, that
1165 // character is not copied
1166 if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
1168 aSynAbsURIRef.append("//");
1169 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1171 case FSysStyle::Unix:
1172 nSegmentDelimiter = '/';
1173 break;
1175 case FSysStyle::Dos:
1176 nSegmentDelimiter = '\\';
1177 break;
1179 default:
1180 OSL_FAIL(
1181 "INetURLObject::setAbsURIRef():"
1182 " Bad guessFSysStyleByCounting");
1183 break;
1185 bSkippedInitialSlash
1186 = pPos != pEnd && *pPos != nSegmentDelimiter;
1187 break;
1190 [[fallthrough]];
1191 default:
1193 // For INetProtocol::File, allow an empty authority ("//") to be
1194 // missing if the following path starts with an explicit "/"
1195 // (Java is notorious in generating such file URLs, so be
1196 // liberal here):
1197 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1198 pPos += 2;
1199 else if (!bSmart
1200 && !(m_eScheme == INetProtocol::File
1201 && pPos != pEnd && *pPos == '/'))
1203 setInvalid();
1204 return false;
1206 aSynAbsURIRef.append("//");
1208 sal_Unicode const * pAuthority = pPos;
1209 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1210 while (pPos < pEnd && *pPos != '/' && *pPos != c
1211 && *pPos != nFragmentDelimiter)
1212 ++pPos;
1213 if (getSchemeInfo().m_bUser)
1214 if (getSchemeInfo().m_bHost)
1216 sal_Unicode const * p1 = pAuthority;
1217 while (p1 < pPos && *p1 != '@')
1218 ++p1;
1219 if (p1 == pPos)
1221 pHostPortBegin = pAuthority;
1222 pHostPortEnd = pPos;
1224 else
1226 pUserInfoBegin = pAuthority;
1227 pUserInfoEnd = p1;
1228 pHostPortBegin = p1 + 1;
1229 pHostPortEnd = pPos;
1232 else
1234 pUserInfoBegin = pAuthority;
1235 pUserInfoEnd = pPos;
1237 else if (getSchemeInfo().m_bHost)
1239 pHostPortBegin = pAuthority;
1240 pHostPortEnd = pPos;
1242 else if (pPos != pAuthority)
1244 setInvalid();
1245 return false;
1247 break;
1251 if (pUserInfoBegin)
1253 Part ePart = PART_USER_PASSWORD;
1254 bool bSupportsPassword = getSchemeInfo().m_bPassword;
1255 bool bSupportsAuth
1256 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1257 bool bHasAuth = false;
1258 OUStringBuffer aSynUser;
1259 sal_Unicode const * p1 = pUserInfoBegin;
1260 while (p1 < pUserInfoEnd)
1262 EscapeType eEscapeType;
1263 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1264 eMechanism, eCharset, eEscapeType);
1265 if (eEscapeType == EscapeType::NONE)
1267 if (nUTF32 == ':' && bSupportsPassword)
1269 bHasAuth = true;
1270 break;
1272 else if (nUTF32 == ';' && bSupportsAuth
1273 && pUserInfoEnd - p1
1274 > RTL_CONSTASCII_LENGTH("auth=")
1275 && INetMIME::equalIgnoreCase(
1277 p1 + RTL_CONSTASCII_LENGTH("auth="),
1278 "auth="))
1280 p1 += RTL_CONSTASCII_LENGTH("auth=");
1281 bHasAuth = true;
1282 break;
1285 appendUCS4(aSynUser, nUTF32, eEscapeType, ePart,
1286 eCharset, false);
1288 m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1289 aSynAbsURIRef.getLength());
1290 if (bHasAuth)
1292 if (bSupportsPassword)
1294 aSynAbsURIRef.append(':');
1295 OUStringBuffer aSynAuth;
1296 while (p1 < pUserInfoEnd)
1298 EscapeType eEscapeType;
1299 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1300 eMechanism, eCharset,
1301 eEscapeType);
1302 appendUCS4(aSynAuth, nUTF32, eEscapeType,
1303 ePart, eCharset, false);
1305 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1306 aSynAbsURIRef.getLength());
1308 else
1310 aSynAbsURIRef.append(";AUTH=");
1311 OUStringBuffer aSynAuth;
1312 while (p1 < pUserInfoEnd)
1314 EscapeType eEscapeType;
1315 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1316 eMechanism, eCharset,
1317 eEscapeType);
1318 if (!INetMIME::isIMAPAtomChar(nUTF32))
1320 setInvalid();
1321 return false;
1323 appendUCS4(aSynAuth, nUTF32, eEscapeType,
1324 ePart, eCharset, false);
1326 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1327 aSynAbsURIRef.getLength());
1330 if (pHostPortBegin)
1331 aSynAbsURIRef.append('@');
1334 if (pHostPortBegin)
1336 sal_Unicode const * pPort = pHostPortEnd;
1337 if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1339 sal_Unicode const * p1 = pHostPortEnd - 1;
1340 while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
1341 --p1;
1342 if (*p1 == ':')
1343 pPort = p1;
1345 bool bNetBiosName = false;
1346 switch (m_eScheme)
1348 case INetProtocol::File:
1349 // If the host equals "LOCALHOST" (unencoded and ignoring
1350 // case), turn it into an empty host:
1351 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1352 "localhost"))
1353 pHostPortBegin = pPort;
1354 bNetBiosName = true;
1355 break;
1357 case INetProtocol::Ldap:
1358 case INetProtocol::Smb:
1359 if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1361 setInvalid();
1362 return false;
1364 break;
1365 default:
1366 if (pHostPortBegin == pPort)
1368 setInvalid();
1369 return false;
1371 break;
1373 OUStringBuffer aSynHost(64);
1374 if (!parseHostOrNetBiosName(
1375 pHostPortBegin, pPort, eMechanism, eCharset,
1376 bNetBiosName, &aSynHost))
1378 setInvalid();
1379 return false;
1381 m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1382 aSynAbsURIRef.getLength());
1383 if (pPort != pHostPortEnd)
1385 aSynAbsURIRef.append(':');
1386 m_aPort.set(aSynAbsURIRef,
1387 OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1388 aSynAbsURIRef.getLength());
1393 // Parse <path>
1394 OUStringBuffer aSynPath;
1395 if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset,
1396 bSkippedInitialSlash, nSegmentDelimiter,
1397 nAltSegmentDelimiter,
1398 getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1399 nFragmentDelimiter, aSynPath))
1401 setInvalid();
1402 return false;
1404 m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1405 aSynAbsURIRef.getLength());
1407 // Parse ?<query>
1408 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1410 aSynAbsURIRef.append('?');
1411 OUStringBuffer aSynQuery;
1412 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1414 EscapeType eEscapeType;
1415 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1416 eMechanism, eCharset, eEscapeType);
1417 appendUCS4(aSynQuery, nUTF32, eEscapeType,
1418 PART_URIC, eCharset, true);
1420 m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1421 aSynAbsURIRef.getLength());
1424 // Parse #<fragment>
1425 if (pPos < pEnd && *pPos == nFragmentDelimiter)
1427 aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1428 OUStringBuffer aSynFragment;
1429 for (++pPos; pPos < pEnd;)
1431 EscapeType eEscapeType;
1432 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1433 eMechanism, eCharset, eEscapeType);
1434 appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC,
1435 eCharset, true);
1437 m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1438 aSynAbsURIRef.getLength());
1441 if (pPos != pEnd)
1443 setInvalid();
1444 return false;
1447 m_aAbsURIRef = aSynAbsURIRef;
1449 return true;
1452 void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
1453 OUString aTmpStr=m_aAbsURIRef.toString();
1454 m_aAbsURIRef.setLength(0);
1455 int oldSchemeLen = 0;
1456 if (m_eScheme == INetProtocol::Generic)
1457 oldSchemeLen = m_aScheme.getLength();
1458 else
1459 oldSchemeLen = strlen(getSchemeInfo().m_pScheme);
1460 m_eScheme=eTargetScheme;
1461 int newSchemeLen=strlen(getSchemeInfo().m_pScheme);
1462 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1463 m_aAbsURIRef.append(aTmpStr.getStr()+oldSchemeLen);
1464 int delta=newSchemeLen-oldSchemeLen;
1465 m_aUser+=delta;
1466 m_aAuth+=delta;
1467 m_aHost+=delta;
1468 m_aPort+=delta;
1469 m_aPath+=delta;
1470 m_aQuery+=delta;
1471 m_aFragment+=delta;
1474 bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
1475 INetURLObject & rTheAbsURIRef,
1476 bool & rWasAbsolute,
1477 EncodeMechanism eMechanism,
1478 rtl_TextEncoding eCharset,
1479 bool bIgnoreFragment, bool bSmart,
1480 bool bRelativeNonURIs, FSysStyle eStyle)
1481 const
1483 sal_Unicode const * p = rTheRelURIRef.getStr();
1484 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1486 sal_Unicode const * pPrefixBegin = p;
1487 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1488 bool hasScheme = pPrefix != nullptr;
1489 if (!hasScheme) {
1490 pPrefixBegin = p;
1491 hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
1494 sal_uInt32 nSegmentDelimiter = '/';
1495 sal_uInt32 nQueryDelimiter
1496 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1497 sal_uInt32 nFragmentDelimiter = '#';
1498 Part ePart = PART_VISIBLE;
1500 if (!hasScheme && bSmart)
1502 // If the input matches any of the following productions (for which
1503 // the appropriate style bit is set in eStyle), it is assumed to be an
1504 // absolute file system path, rather than a relative URI reference.
1505 // (This is only a subset of the productions used for scheme detection
1506 // in INetURLObject::setAbsURIRef(), because most of those productions
1507 // interfere with the syntax of relative URI references.) The
1508 // productions use the auxiliary rules
1510 // domain = label *("." label)
1511 // label = alphanum [*(alphanum / "-") alphanum]
1512 // alphanum = ALPHA / DIGIT
1513 // UCS4 = <any UCS4 character>
1515 // 1st Production (UNC file; FSysStyle::Dos only):
1516 // "\\" domain ["\" *UCS4]
1517 // 2nd Production (Unix-like DOS file; FSysStyle::Dos only):
1518 // ALPHA ":" ["/" *UCS4]
1519 // 3rd Production (DOS file; FSysStyle::Dos only):
1520 // ALPHA ":" ["\" *UCS4]
1521 if (eStyle & FSysStyle::Dos)
1523 bool bFSys = false;
1524 sal_Unicode const * q = p;
1525 if (pEnd - q >= 2
1526 && rtl::isAsciiAlpha(q[0])
1527 && q[1] == ':'
1528 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1529 bFSys = true; // 2nd, 3rd
1530 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1532 q += 2;
1533 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1534 q, pEnd - q, '\\');
1535 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1536 if (parseHostOrNetBiosName(
1537 q, qe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
1538 true, nullptr))
1540 bFSys = true; // 1st
1543 if (bFSys)
1545 INetURLObject aNewURI;
1546 aNewURI.setAbsURIRef(rTheRelURIRef, eMechanism,
1547 eCharset, true, eStyle);
1548 if (!aNewURI.HasError())
1550 rTheAbsURIRef = aNewURI;
1551 rWasAbsolute = true;
1552 return true;
1557 // When the base URL is a file URL, accept relative file system paths
1558 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1559 // and "#"), as well as relative URIs using "/" as delimiter:
1560 if (m_eScheme == INetProtocol::File)
1561 switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1563 case FSysStyle::Unix:
1564 nSegmentDelimiter = '/';
1565 break;
1567 case FSysStyle::Dos:
1568 nSegmentDelimiter = '\\';
1569 bRelativeNonURIs = true;
1570 break;
1572 default:
1573 OSL_FAIL("INetURLObject::convertRelToAbs():"
1574 " Bad guessFSysStyleByCounting");
1575 break;
1578 if (bRelativeNonURIs)
1580 eMechanism = EncodeMechanism::All;
1581 nQueryDelimiter = 0x80000000;
1582 nFragmentDelimiter = 0x80000000;
1583 ePart = PART_VISIBLE_NONSPECIAL;
1587 // If the relative URI has the same scheme as the base URI, and that
1588 // scheme is hierarchical, then ignore its presence in the relative
1589 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1590 // step 3):
1591 if (pPrefix && pPrefix->m_eScheme == m_eScheme
1592 && getSchemeInfo().m_bHierarchical)
1594 hasScheme = false;
1595 while (p != pEnd && *p++ != ':') ;
1597 rWasAbsolute = hasScheme;
1599 // Fast solution for non-relative URIs:
1600 if (hasScheme)
1602 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1603 if (aNewURI.HasError())
1605 rWasAbsolute = false;
1606 return false;
1609 if (bIgnoreFragment)
1610 aNewURI.clearFragment();
1611 rTheAbsURIRef = aNewURI;
1612 return true;
1615 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1616 STATE_DONE };
1618 OUStringBuffer aSynAbsURIRef(128);
1619 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1620 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1621 if (m_eScheme != INetProtocol::Generic)
1623 aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1625 else
1627 sal_Unicode const * pSchemeBegin
1628 = m_aAbsURIRef.getStr();
1629 sal_Unicode const * pSchemeEnd = pSchemeBegin;
1630 while (pSchemeEnd[0] != ':')
1632 ++pSchemeEnd;
1634 aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1636 aSynAbsURIRef.append(':');
1638 State eState = STATE_AUTH;
1639 bool bSameDoc = true;
1641 if (getSchemeInfo().m_bAuthority)
1643 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1645 aSynAbsURIRef.append("//");
1646 p += 2;
1647 eState = STATE_ABS_PATH;
1648 bSameDoc = false;
1649 while (p != pEnd)
1651 EscapeType eEscapeType;
1652 sal_uInt32 nUTF32
1653 = getUTF32(p, pEnd, eMechanism,
1654 eCharset, eEscapeType);
1655 if (eEscapeType == EscapeType::NONE)
1657 if (nUTF32 == nSegmentDelimiter)
1658 break;
1659 else if (nUTF32 == nFragmentDelimiter)
1661 eState = STATE_FRAGMENT;
1662 break;
1665 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1666 PART_VISIBLE, eCharset, true);
1669 else
1671 SubString aAuthority(getAuthority());
1672 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1673 + aAuthority.getBegin(),
1674 aAuthority.getLength());
1678 if (eState == STATE_AUTH)
1680 if (p == pEnd)
1681 eState = STATE_DONE;
1682 else if (*p == nFragmentDelimiter)
1684 ++p;
1685 eState = STATE_FRAGMENT;
1687 else if (*p == nSegmentDelimiter)
1689 ++p;
1690 eState = STATE_ABS_PATH;
1691 bSameDoc = false;
1693 else
1695 eState = STATE_REL_PATH;
1696 bSameDoc = false;
1700 if (eState == STATE_ABS_PATH)
1702 aSynAbsURIRef.append('/');
1703 eState = STATE_DONE;
1704 while (p != pEnd)
1706 EscapeType eEscapeType;
1707 sal_uInt32 nUTF32
1708 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1709 if (eEscapeType == EscapeType::NONE)
1711 if (nUTF32 == nFragmentDelimiter)
1713 eState = STATE_FRAGMENT;
1714 break;
1716 else if (nUTF32 == nSegmentDelimiter)
1717 nUTF32 = '/';
1719 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1720 eCharset, true);
1723 else if (eState == STATE_REL_PATH)
1725 if (!getSchemeInfo().m_bHierarchical)
1727 // Detect cases where a relative input could not be made absolute
1728 // because the given base URL is broken (most probably because it is
1729 // empty):
1730 SAL_WARN_IF(
1731 HasError(), "tools.urlobj",
1732 "cannot make <" << rTheRelURIRef
1733 << "> absolute against broken base <"
1734 << GetMainURL(DecodeMechanism::NONE) << ">");
1735 rWasAbsolute = false;
1736 return false;
1739 sal_Unicode const * pBasePathBegin
1740 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1741 sal_Unicode const * pBasePathEnd
1742 = pBasePathBegin + m_aPath.getLength();
1743 while (pBasePathEnd != pBasePathBegin)
1744 if (*(--pBasePathEnd) == '/')
1746 ++pBasePathEnd;
1747 break;
1750 sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1751 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1752 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1753 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1754 "INetURLObject::convertRelToAbs(): Bad base path");
1756 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1758 if (*p == '.')
1760 if (pEnd - p == 1
1761 || p[1] == nSegmentDelimiter
1762 || p[1] == nQueryDelimiter
1763 || p[1] == nFragmentDelimiter)
1765 ++p;
1766 if (p != pEnd && *p == nSegmentDelimiter)
1767 ++p;
1768 continue;
1770 else if (pEnd - p >= 2
1771 && p[1] == '.'
1772 && (pEnd - p == 2
1773 || p[2] == nSegmentDelimiter
1774 || p[2] == nQueryDelimiter
1775 || p[2] == nFragmentDelimiter)
1776 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1778 p += 2;
1779 if (p != pEnd && *p == nSegmentDelimiter)
1780 ++p;
1782 sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1783 while (i > nPathBegin && aSynAbsURIRef[i] != '/')
1784 --i;
1785 aSynAbsURIRef.setLength(i + 1);
1786 DBG_ASSERT(
1787 aSynAbsURIRef.getLength() > nPathBegin
1788 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1789 "INetURLObject::convertRelToAbs(): Bad base path");
1790 continue;
1794 while (p != pEnd
1795 && *p != nSegmentDelimiter
1796 && *p != nQueryDelimiter
1797 && *p != nFragmentDelimiter)
1799 EscapeType eEscapeType;
1800 sal_uInt32 nUTF32
1801 = getUTF32(p, pEnd, eMechanism,
1802 eCharset, eEscapeType);
1803 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1804 eCharset, true);
1806 if (p != pEnd && *p == nSegmentDelimiter)
1808 aSynAbsURIRef.append('/');
1809 ++p;
1813 while (p != pEnd && *p != nFragmentDelimiter)
1815 EscapeType eEscapeType;
1816 sal_uInt32 nUTF32
1817 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1818 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1819 eCharset, true);
1822 if (p == pEnd)
1823 eState = STATE_DONE;
1824 else
1826 ++p;
1827 eState = STATE_FRAGMENT;
1830 else if (bSameDoc)
1832 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1833 m_aPath.getLength());
1834 if (m_aQuery.isPresent())
1835 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1836 + m_aQuery.getBegin() - 1,
1837 m_aQuery.getLength() + 1);
1840 if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1842 aSynAbsURIRef.append('#');
1843 while (p != pEnd)
1845 EscapeType eEscapeType;
1846 sal_uInt32 nUTF32
1847 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1848 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1849 PART_VISIBLE, eCharset, true);
1853 INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1854 if (aNewURI.HasError())
1856 // Detect cases where a relative input could not be made absolute
1857 // because the given base URL is broken (most probably because it is
1858 // empty):
1859 SAL_WARN_IF(
1860 HasError(), "tools.urlobj",
1861 "cannot make <" << rTheRelURIRef
1862 << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE)
1863 << ">");
1864 rWasAbsolute = false;
1865 return false;
1868 rTheAbsURIRef = aNewURI;
1869 return true;
1872 bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef,
1873 OUString & rTheRelURIRef,
1874 EncodeMechanism eEncodeMechanism,
1875 DecodeMechanism eDecodeMechanism,
1876 rtl_TextEncoding eCharset,
1877 FSysStyle eStyle) const
1879 // Check for hierarchical base URL:
1880 if (!getSchemeInfo().m_bHierarchical)
1882 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1883 return false;
1886 // Convert the input (absolute or relative URI ref) to an absolute URI
1887 // ref:
1888 INetURLObject aSubject;
1889 bool bWasAbsolute;
1890 if (!convertRelToAbs(rTheAbsURIRef, aSubject, bWasAbsolute,
1891 eEncodeMechanism, eCharset, false, false, false,
1892 eStyle))
1894 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1895 return false;
1898 // Check for differing scheme or authority parts:
1899 if ((m_aScheme.compare(
1900 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1901 != 0)
1902 || (m_aUser.compare(
1903 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1904 != 0)
1905 || (m_aAuth.compare(
1906 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1907 != 0)
1908 || (m_aHost.compare(
1909 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1910 != 0)
1911 || (m_aPort.compare(
1912 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1913 != 0))
1915 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1916 return false;
1919 sal_Unicode const * pBasePathBegin
1920 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1921 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1922 sal_Unicode const * pSubjectPathBegin
1923 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1924 sal_Unicode const * pSubjectPathEnd
1925 = pSubjectPathBegin + aSubject.m_aPath.getLength();
1927 // Make nMatch point past the last matching slash, or past the end of the
1928 // paths, in case they are equal:
1929 sal_Unicode const * pSlash = nullptr;
1930 sal_Unicode const * p1 = pBasePathBegin;
1931 sal_Unicode const * p2 = pSubjectPathBegin;
1932 for (;;)
1934 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1936 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1937 pSlash = p1;
1938 break;
1941 sal_Unicode c = *p1++;
1942 if (c != *p2++)
1943 break;
1944 if (c == '/')
1945 pSlash = p1;
1947 if (!pSlash)
1949 // One of the paths does not start with '/':
1950 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1951 return false;
1953 sal_Int32 nMatch = pSlash - pBasePathBegin;
1955 // If the two URLs are DOS file URLs starting with different volumes
1956 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1957 // relative (it could be, but some people do not like that):
1958 if (m_eScheme == INetProtocol::File
1959 && nMatch <= 1
1960 && hasDosVolume(eStyle)
1961 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1963 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1964 return false;
1967 // For every slash in the base path after nMatch, a prefix of "../" is
1968 // added to the new relative URL (if the common prefix of the two paths is
1969 // only "/"---but see handling of file URLs above---, the complete subject
1970 // path could go into the new relative URL instead, but some people don't
1971 // like that):
1972 OUStringBuffer aSynRelURIRef;
1973 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1974 ++p)
1976 if (*p == '/')
1977 aSynRelURIRef.append("../");
1980 // If the new relative URL would start with "//" (i.e., it would be
1981 // mistaken for a relative URL starting with an authority part), or if the
1982 // new relative URL would neither be empty nor start with <"/"> nor start
1983 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1984 // with a scheme part), then the new relative URL is prefixed with "./":
1985 if (aSynRelURIRef.isEmpty())
1987 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1988 && pSubjectPathBegin[nMatch] == '/'
1989 && pSubjectPathBegin[nMatch + 1] == '/')
1991 aSynRelURIRef.append("./");
1993 else
1995 for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
1996 p != pSubjectPathEnd && *p != '/'; ++p)
1998 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
2000 aSynRelURIRef.append("./");
2001 break;
2007 // The remainder of the subject path, starting at nMatch, is appended to
2008 // the new relative URL:
2009 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2010 eDecodeMechanism, eCharset));
2012 // If the subject has defined query or fragment parts, they are appended
2013 // to the new relative URL:
2014 if (aSubject.m_aQuery.isPresent())
2016 aSynRelURIRef.append('?');
2017 aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery,
2018 eDecodeMechanism, eCharset));
2020 if (aSubject.m_aFragment.isPresent())
2022 aSynRelURIRef.append('#');
2023 aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2024 eDecodeMechanism, eCharset));
2027 rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2028 return true;
2031 // static
2032 bool INetURLObject::convertIntToExt(OUString const & rTheIntURIRef,
2033 OUString & rTheExtURIRef,
2034 DecodeMechanism eDecodeMechanism,
2035 rtl_TextEncoding eCharset)
2037 OUString aSynExtURIRef(encodeText(rTheIntURIRef, PART_VISIBLE,
2038 EncodeMechanism::NotCanonical, eCharset, true));
2039 sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2040 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2041 sal_Unicode const * p = pBegin;
2042 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2043 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::Internal;
2044 if (bConvert)
2046 aSynExtURIRef =
2047 aSynExtURIRef.replaceAt(0, p - pBegin,
2048 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2050 rTheExtURIRef = decode(aSynExtURIRef, eDecodeMechanism, eCharset);
2051 return bConvert;
2054 // static
2055 bool INetURLObject::convertExtToInt(OUString const & rTheExtURIRef,
2056 OUString & rTheIntURIRef,
2057 DecodeMechanism eDecodeMechanism,
2058 rtl_TextEncoding eCharset)
2060 OUString aSynIntURIRef(encodeText(rTheExtURIRef, PART_VISIBLE,
2061 EncodeMechanism::NotCanonical, eCharset, true));
2062 sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2063 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2064 sal_Unicode const * p = pBegin;
2065 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2066 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::External;
2067 if (bConvert)
2069 aSynIntURIRef =
2070 aSynIntURIRef.replaceAt(0, p - pBegin,
2071 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2073 rTheIntURIRef = decode(aSynIntURIRef, eDecodeMechanism, eCharset);
2074 return bConvert;
2077 // static
2078 INetURLObject::PrefixInfo const * INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2079 sal_Unicode const * pEnd)
2081 static PrefixInfo const aMap[]
2082 = { // dummy entry at front needed, because pLast may point here:
2083 { nullptr, nullptr, INetProtocol::NotValid, PrefixInfo::Kind::Internal },
2084 { ".component:", "staroffice.component:", INetProtocol::Component,
2085 PrefixInfo::Kind::Internal },
2086 { ".uno:", "staroffice.uno:", INetProtocol::Uno,
2087 PrefixInfo::Kind::Internal },
2088 { "cid:", nullptr, INetProtocol::Cid, PrefixInfo::Kind::Official },
2089 { "data:", nullptr, INetProtocol::Data, PrefixInfo::Kind::Official },
2090 { "db:", "staroffice.db:", INetProtocol::Db, PrefixInfo::Kind::Internal },
2091 { "file:", nullptr, INetProtocol::File, PrefixInfo::Kind::Official },
2092 { "ftp:", nullptr, INetProtocol::Ftp, PrefixInfo::Kind::Official },
2093 { "hid:", "staroffice.hid:", INetProtocol::Hid,
2094 PrefixInfo::Kind::Internal },
2095 { "http:", nullptr, INetProtocol::Http, PrefixInfo::Kind::Official },
2096 { "https:", nullptr, INetProtocol::Https, PrefixInfo::Kind::Official },
2097 { "javascript:", nullptr, INetProtocol::Javascript, PrefixInfo::Kind::Official },
2098 { "ldap:", nullptr, INetProtocol::Ldap, PrefixInfo::Kind::Official },
2099 { "macro:", "staroffice.macro:", INetProtocol::Macro,
2100 PrefixInfo::Kind::Internal },
2101 { "mailto:", nullptr, INetProtocol::Mailto, PrefixInfo::Kind::Official },
2102 { "private:", "staroffice.private:", INetProtocol::PrivSoffice,
2103 PrefixInfo::Kind::Internal },
2104 { "private:factory/", "staroffice.factory:",
2105 INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal },
2106 { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice,
2107 PrefixInfo::Kind::Internal },
2108 { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice,
2109 PrefixInfo::Kind::Internal },
2110 { "private:searchfolder:", "staroffice.searchfolder:",
2111 INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal },
2112 { "private:trashcan:", "staroffice.trashcan:",
2113 INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal },
2114 { "sftp:", nullptr, INetProtocol::Sftp, PrefixInfo::Kind::Official },
2115 { "slot:", "staroffice.slot:", INetProtocol::Slot,
2116 PrefixInfo::Kind::Internal },
2117 { "smb:", nullptr, INetProtocol::Smb, PrefixInfo::Kind::Official },
2118 { "staroffice.component:", ".component:", INetProtocol::Component,
2119 PrefixInfo::Kind::External },
2120 { "staroffice.db:", "db:", INetProtocol::Db, PrefixInfo::Kind::External },
2121 { "staroffice.factory:", "private:factory/",
2122 INetProtocol::PrivSoffice, PrefixInfo::Kind::External },
2123 { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice,
2124 PrefixInfo::Kind::External },
2125 { "staroffice.hid:", "hid:", INetProtocol::Hid,
2126 PrefixInfo::Kind::External },
2127 { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice,
2128 PrefixInfo::Kind::External },
2129 { "staroffice.macro:", "macro:", INetProtocol::Macro,
2130 PrefixInfo::Kind::External },
2131 { "staroffice.private:", "private:", INetProtocol::PrivSoffice,
2132 PrefixInfo::Kind::External },
2133 { "staroffice.searchfolder:", "private:searchfolder:",
2134 INetProtocol::PrivSoffice, PrefixInfo::Kind::External },
2135 { "staroffice.slot:", "slot:", INetProtocol::Slot,
2136 PrefixInfo::Kind::External },
2137 { "staroffice.trashcan:", "private:trashcan:",
2138 INetProtocol::PrivSoffice, PrefixInfo::Kind::External },
2139 { "staroffice.uno:", ".uno:", INetProtocol::Uno,
2140 PrefixInfo::Kind::External },
2141 { "staroffice:", "private:", INetProtocol::PrivSoffice,
2142 PrefixInfo::Kind::External },
2143 { "telnet:", nullptr, INetProtocol::Telnet, PrefixInfo::Kind::Official },
2144 { "vnd.libreoffice.cmis:", nullptr, INetProtocol::Cmis, PrefixInfo::Kind::Internal },
2145 { "vnd.sun.star.cmd:", nullptr, INetProtocol::VndSunStarCmd,
2146 PrefixInfo::Kind::Official },
2147 { "vnd.sun.star.expand:", nullptr, INetProtocol::VndSunStarExpand,
2148 PrefixInfo::Kind::Official },
2149 { "vnd.sun.star.help:", nullptr, INetProtocol::VndSunStarHelp,
2150 PrefixInfo::Kind::Official },
2151 { "vnd.sun.star.hier:", nullptr, INetProtocol::VndSunStarHier,
2152 PrefixInfo::Kind::Official },
2153 { "vnd.sun.star.pkg:", nullptr, INetProtocol::VndSunStarPkg,
2154 PrefixInfo::Kind::Official },
2155 { "vnd.sun.star.tdoc:", nullptr, INetProtocol::VndSunStarTdoc,
2156 PrefixInfo::Kind::Official },
2157 { "vnd.sun.star.webdav:", nullptr, INetProtocol::VndSunStarWebdav,
2158 PrefixInfo::Kind::Official }
2160 /* This list needs to be sorted, or you'll introduce serious bugs */
2162 PrefixInfo const * pFirst = aMap + 1;
2163 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2164 PrefixInfo const * pMatch = nullptr;
2165 sal_Unicode const * pMatched = rBegin;
2166 sal_Unicode const * p = rBegin;
2167 sal_Int32 i = 0;
2168 for (; pFirst < pLast; ++i)
2170 if (pFirst->m_pPrefix[i] == '\0')
2172 pMatch = pFirst++;
2173 pMatched = p;
2175 if (p >= pEnd)
2176 break;
2177 sal_uInt32 nChar = rtl::toAsciiLowerCase(*p++);
2178 while (pFirst <= pLast && static_cast<unsigned char>(pFirst->m_pPrefix[i]) < nChar)
2179 ++pFirst;
2180 while (pFirst <= pLast && static_cast<unsigned char>(pLast->m_pPrefix[i]) > nChar)
2181 --pLast;
2183 if (pFirst == pLast)
2185 char const * q = pFirst->m_pPrefix + i;
2186 while (p < pEnd && *q != '\0'
2187 && rtl::toAsciiLowerCase(*p) == static_cast<unsigned char>(*q))
2189 ++p;
2190 ++q;
2192 if (*q == '\0')
2194 rBegin = p;
2195 return pFirst;
2198 rBegin = pMatched;
2199 return pMatch;
2202 sal_Int32 INetURLObject::getAuthorityBegin() const
2204 DBG_ASSERT(getSchemeInfo().m_bAuthority,
2205 "INetURLObject::getAuthority(): Bad scheme");
2206 sal_Int32 nBegin;
2207 if (m_aUser.isPresent())
2208 nBegin = m_aUser.getBegin();
2209 else if (m_aHost.isPresent())
2210 nBegin = m_aHost.getBegin();
2211 else
2212 nBegin = m_aPath.getBegin();
2213 nBegin -= RTL_CONSTASCII_LENGTH("//");
2214 DBG_ASSERT(m_aAbsURIRef[nBegin] == '/' && m_aAbsURIRef[nBegin + 1] == '/',
2215 "INetURLObject::getAuthority(): Bad authority");
2216 return nBegin;
2219 INetURLObject::SubString INetURLObject::getAuthority() const
2221 sal_Int32 nBegin = getAuthorityBegin();
2222 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2223 m_aHost.isPresent() ? m_aHost.getEnd() :
2224 m_aAuth.isPresent() ? m_aAuth.getEnd() :
2225 m_aUser.isPresent() ? m_aUser.getEnd() :
2226 nBegin + RTL_CONSTASCII_LENGTH("//");
2227 return SubString(nBegin, nEnd - nBegin);
2230 bool INetURLObject::setUser(OUString const & rTheUser,
2231 rtl_TextEncoding eCharset)
2233 if (
2234 !getSchemeInfo().m_bUser
2237 return false;
2240 OUString aNewUser(encodeText(rTheUser, PART_USER_PASSWORD,
2241 EncodeMechanism::WasEncoded, eCharset, false));
2242 sal_Int32 nDelta;
2243 if (m_aUser.isPresent())
2244 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2245 else if (m_aHost.isPresent())
2247 m_aAbsURIRef.insert(m_aHost.getBegin(), u'@');
2248 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2250 else if (getSchemeInfo().m_bHost)
2251 return false;
2252 else
2253 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2254 m_aAuth += nDelta;
2255 m_aHost += nDelta;
2256 m_aPort += nDelta;
2257 m_aPath += nDelta;
2258 m_aQuery += nDelta;
2259 m_aFragment += nDelta;
2260 return true;
2263 namespace
2265 void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2267 OUString sTemp(rBuf.makeStringAndClear());
2268 rBuf.append(sTemp.replaceAt(index, count, OUString()));
2272 bool INetURLObject::clearPassword()
2274 if (!getSchemeInfo().m_bPassword)
2275 return false;
2276 if (m_aAuth.isPresent())
2278 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2279 m_aAuth.getLength() + 1);
2280 sal_Int32 nDelta = m_aAuth.clear() - 1;
2281 m_aHost += nDelta;
2282 m_aPort += nDelta;
2283 m_aPath += nDelta;
2284 m_aQuery += nDelta;
2285 m_aFragment += nDelta;
2287 return true;
2290 bool INetURLObject::setPassword(OUString const & rThePassword,
2291 rtl_TextEncoding eCharset)
2293 if (!getSchemeInfo().m_bPassword)
2294 return false;
2295 OUString aNewAuth(encodeText(rThePassword, PART_USER_PASSWORD,
2296 EncodeMechanism::WasEncoded, eCharset, false));
2297 sal_Int32 nDelta;
2298 if (m_aAuth.isPresent())
2299 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2300 else if (m_aUser.isPresent())
2302 m_aAbsURIRef.insert(m_aUser.getEnd(), u':');
2303 nDelta
2304 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2306 else if (m_aHost.isPresent())
2308 m_aAbsURIRef.insert(m_aHost.getBegin(), ":@" );
2309 m_aUser.set(m_aAbsURIRef, OUString(), m_aHost.getBegin());
2310 nDelta
2311 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2313 else if (getSchemeInfo().m_bHost)
2314 return false;
2315 else
2317 m_aAbsURIRef.insert(m_aPath.getBegin(), u':');
2318 m_aUser.set(m_aAbsURIRef, OUString(), m_aPath.getBegin());
2319 nDelta
2320 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2322 m_aHost += nDelta;
2323 m_aPort += nDelta;
2324 m_aPath += nDelta;
2325 m_aQuery += nDelta;
2326 m_aFragment += nDelta;
2327 return true;
2330 // static
2331 bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2332 OUString & rCanonic)
2334 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2335 // IPv4 address directly follows the abbreviating "::". The ABNF in
2336 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2337 // mentions "::13:1.68.3". This algorithm accepts both variants:
2338 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2339 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2340 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2341 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2342 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2343 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2344 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2345 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2346 OUStringBuffer aTheCanonic(32);
2347 sal_uInt32 nNumber = 0;
2348 int nDigits = 0;
2349 int nOctets = 0;
2350 State eState = STATE_INITIAL;
2351 sal_Unicode const * p = rBegin;
2352 for (; p != pEnd; ++p)
2353 switch (eState)
2355 case STATE_INITIAL:
2356 if (*p == '[')
2358 aTheCanonic.append('[');
2359 eState = STATE_IP6;
2361 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2362 eState = STATE_TOPLABEL;
2363 else if (rtl::isAsciiDigit(*p))
2365 nNumber = INetMIME::getWeight(*p);
2366 nDigits = 1;
2367 nOctets = 1;
2368 eState = STATE_IP4;
2370 else
2371 goto done;
2372 break;
2374 case STATE_LABEL:
2375 if (*p == '.')
2376 eState = STATE_LABEL_DOT;
2377 else if (*p == '-')
2378 eState = STATE_LABEL_HYPHEN;
2379 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2380 goto done;
2381 break;
2383 case STATE_LABEL_HYPHEN:
2384 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2385 eState = STATE_LABEL;
2386 else if (*p != '-')
2387 goto done;
2388 break;
2390 case STATE_LABEL_DOT:
2391 if (rtl::isAsciiAlpha(*p) || *p == '_')
2392 eState = STATE_TOPLABEL;
2393 else if (rtl::isAsciiDigit(*p))
2394 eState = STATE_LABEL;
2395 else
2396 goto done;
2397 break;
2399 case STATE_TOPLABEL:
2400 if (*p == '.')
2401 eState = STATE_TOPLABEL_DOT;
2402 else if (*p == '-')
2403 eState = STATE_TOPLABEL_HYPHEN;
2404 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2405 goto done;
2406 break;
2408 case STATE_TOPLABEL_HYPHEN:
2409 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2410 eState = STATE_TOPLABEL;
2411 else if (*p != '-')
2412 goto done;
2413 break;
2415 case STATE_TOPLABEL_DOT:
2416 if (rtl::isAsciiAlpha(*p) || *p == '_')
2417 eState = STATE_TOPLABEL;
2418 else if (rtl::isAsciiDigit(*p))
2419 eState = STATE_LABEL;
2420 else
2421 goto done;
2422 break;
2424 case STATE_IP4:
2425 if (*p == '.')
2426 if (nOctets < 4)
2428 aTheCanonic.append( OUString::number(nNumber) );
2429 aTheCanonic.append( '.' );
2430 ++nOctets;
2431 eState = STATE_IP4_DOT;
2433 else
2434 eState = STATE_LABEL_DOT;
2435 else if (*p == '-')
2436 eState = STATE_LABEL_HYPHEN;
2437 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2438 eState = STATE_LABEL;
2439 else if (rtl::isAsciiDigit(*p))
2440 if (nDigits < 3)
2442 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2443 ++nDigits;
2445 else
2446 eState = STATE_LABEL;
2447 else
2448 goto done;
2449 break;
2451 case STATE_IP4_DOT:
2452 if (rtl::isAsciiAlpha(*p) || *p == '_')
2453 eState = STATE_TOPLABEL;
2454 else if (rtl::isAsciiDigit(*p))
2456 nNumber = INetMIME::getWeight(*p);
2457 nDigits = 1;
2458 eState = STATE_IP4;
2460 else
2461 goto done;
2462 break;
2464 case STATE_IP6:
2465 if (*p == ':')
2466 eState = STATE_IP6_COLON;
2467 else if (rtl::isAsciiHexDigit(*p))
2469 nNumber = INetMIME::getHexWeight(*p);
2470 nDigits = 1;
2471 eState = STATE_IP6_HEXSEQ1;
2473 else
2474 goto done;
2475 break;
2477 case STATE_IP6_COLON:
2478 if (*p == ':')
2480 aTheCanonic.append("::");
2481 eState = STATE_IP6_2COLON;
2483 else
2484 goto done;
2485 break;
2487 case STATE_IP6_2COLON:
2488 if (*p == ']')
2489 eState = STATE_IP6_DONE;
2490 else if (*p == ':')
2492 aTheCanonic.append(':');
2493 eState = STATE_IP6_3COLON;
2495 else if (rtl::isAsciiDigit(*p))
2497 nNumber = INetMIME::getWeight(*p);
2498 nDigits = 1;
2499 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2501 else if (rtl::isAsciiHexDigit(*p))
2503 nNumber = INetMIME::getHexWeight(*p);
2504 nDigits = 1;
2505 eState = STATE_IP6_HEXSEQ2;
2507 else
2508 goto done;
2509 break;
2511 case STATE_IP6_3COLON:
2512 if (rtl::isAsciiDigit(*p))
2514 nNumber = INetMIME::getWeight(*p);
2515 nDigits = 1;
2516 nOctets = 1;
2517 eState = STATE_IP6_IP4;
2519 else
2520 goto done;
2521 break;
2523 case STATE_IP6_HEXSEQ1:
2524 if (*p == ']')
2526 aTheCanonic.append(
2527 OUString::number(nNumber, 16));
2528 eState = STATE_IP6_DONE;
2530 else if (*p == ':')
2532 aTheCanonic.append(
2533 OUString::number(nNumber, 16));
2534 aTheCanonic.append(':');
2535 eState = STATE_IP6_HEXSEQ1_COLON;
2537 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2539 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2540 ++nDigits;
2542 else
2543 goto done;
2544 break;
2546 case STATE_IP6_HEXSEQ1_COLON:
2547 if (*p == ':')
2549 aTheCanonic.append(':');
2550 eState = STATE_IP6_2COLON;
2552 else if (rtl::isAsciiDigit(*p))
2554 nNumber = INetMIME::getWeight(*p);
2555 nDigits = 1;
2556 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2558 else if (rtl::isAsciiHexDigit(*p))
2560 nNumber = INetMIME::getHexWeight(*p);
2561 nDigits = 1;
2562 eState = STATE_IP6_HEXSEQ1;
2564 else
2565 goto done;
2566 break;
2568 case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2569 if (*p == ']')
2571 aTheCanonic.append(
2572 OUString::number(nNumber, 16));
2573 eState = STATE_IP6_DONE;
2575 else if (*p == ':')
2577 aTheCanonic.append(
2578 OUString::number(nNumber, 16));
2579 aTheCanonic.append(':');
2580 eState = STATE_IP6_HEXSEQ1_COLON;
2582 else if (*p == '.')
2584 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2585 + (nNumber & 15);
2586 aTheCanonic.append(
2587 OUString::number(nNumber));
2588 aTheCanonic.append('.');
2589 nOctets = 2;
2590 eState = STATE_IP6_IP4_DOT;
2592 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2594 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2595 ++nDigits;
2597 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2599 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2600 ++nDigits;
2601 eState = STATE_IP6_HEXSEQ1;
2603 else
2604 goto done;
2605 break;
2607 case STATE_IP6_HEXSEQ2:
2608 if (*p == ']')
2610 aTheCanonic.append(
2611 OUString::number(nNumber, 16));
2612 eState = STATE_IP6_DONE;
2614 else if (*p == ':')
2616 aTheCanonic.append(
2617 OUString::number(nNumber, 16));
2618 aTheCanonic.append(':');
2619 eState = STATE_IP6_HEXSEQ2_COLON;
2621 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2623 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2624 ++nDigits;
2626 else
2627 goto done;
2628 break;
2630 case STATE_IP6_HEXSEQ2_COLON:
2631 if (rtl::isAsciiDigit(*p))
2633 nNumber = INetMIME::getWeight(*p);
2634 nDigits = 1;
2635 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2637 else if (rtl::isAsciiHexDigit(*p))
2639 nNumber = INetMIME::getHexWeight(*p);
2640 nDigits = 1;
2641 eState = STATE_IP6_HEXSEQ2;
2643 else
2644 goto done;
2645 break;
2647 case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2648 if (*p == ']')
2650 aTheCanonic.append(
2651 OUString::number(nNumber, 16));
2652 eState = STATE_IP6_DONE;
2654 else if (*p == ':')
2656 aTheCanonic.append(
2657 OUString::number(nNumber, 16));
2658 aTheCanonic.append(':');
2659 eState = STATE_IP6_HEXSEQ2_COLON;
2661 else if (*p == '.')
2663 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2664 + (nNumber & 15);
2665 aTheCanonic.append(
2666 OUString::number(nNumber));
2667 aTheCanonic.append('.');
2668 nOctets = 2;
2669 eState = STATE_IP6_IP4_DOT;
2671 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2673 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2674 ++nDigits;
2676 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2678 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2679 ++nDigits;
2680 eState = STATE_IP6_HEXSEQ2;
2682 else
2683 goto done;
2684 break;
2686 case STATE_IP6_IP4:
2687 if (*p == ']')
2688 if (nOctets == 4)
2690 aTheCanonic.append(
2691 OUString::number(nNumber));
2692 eState = STATE_IP6_DONE;
2694 else
2695 goto done;
2696 else if (*p == '.')
2697 if (nOctets < 4)
2699 aTheCanonic.append(
2700 OUString::number(nNumber));
2701 aTheCanonic.append('.');
2702 ++nOctets;
2703 eState = STATE_IP6_IP4_DOT;
2705 else
2706 goto done;
2707 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2709 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2710 ++nDigits;
2712 else
2713 goto done;
2714 break;
2716 case STATE_IP6_IP4_DOT:
2717 if (rtl::isAsciiDigit(*p))
2719 nNumber = INetMIME::getWeight(*p);
2720 nDigits = 1;
2721 eState = STATE_IP6_IP4;
2723 else
2724 goto done;
2725 break;
2727 case STATE_IP6_DONE:
2728 goto done;
2730 done:
2731 switch (eState)
2733 case STATE_LABEL:
2734 case STATE_TOPLABEL:
2735 case STATE_TOPLABEL_DOT:
2736 aTheCanonic.setLength(0);
2737 aTheCanonic.append(rBegin, p - rBegin);
2738 rBegin = p;
2739 rCanonic = aTheCanonic.makeStringAndClear();
2740 return true;
2742 case STATE_IP4:
2743 if (nOctets == 4)
2745 aTheCanonic.append(
2746 OUString::number(nNumber));
2747 rBegin = p;
2748 rCanonic = aTheCanonic.makeStringAndClear();
2749 return true;
2751 return false;
2753 case STATE_IP6_DONE:
2754 aTheCanonic.append(']');
2755 rBegin = p;
2756 rCanonic = aTheCanonic.makeStringAndClear();
2757 return true;
2759 default:
2760 return false;
2764 // static
2765 bool INetURLObject::parseHostOrNetBiosName(
2766 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
2767 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2768 OUStringBuffer* pCanonic)
2770 OUString aTheCanonic;
2771 if (pBegin < pEnd)
2773 sal_Unicode const * p = pBegin;
2774 if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2776 if (bNetBiosName)
2778 OUStringBuffer buf;
2779 while (pBegin < pEnd)
2781 EscapeType eEscapeType;
2782 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
2783 eMechanism, eCharset,
2784 eEscapeType);
2785 if (!INetMIME::isVisible(nUTF32))
2786 return false;
2787 if (!rtl::isAsciiAlphanumeric(nUTF32))
2788 switch (nUTF32)
2790 case '"':
2791 case '*':
2792 case '+':
2793 case ',':
2794 case '/':
2795 case ':':
2796 case ';':
2797 case '<':
2798 case '=':
2799 case '>':
2800 case '?':
2801 case '[':
2802 case '\\':
2803 case ']':
2804 case '`':
2805 case '|':
2806 return false;
2808 if (pCanonic != nullptr) {
2809 appendUCS4(
2810 buf, nUTF32, eEscapeType, PART_URIC,
2811 eCharset, true);
2814 aTheCanonic = buf.makeStringAndClear();
2816 else
2817 return false;
2820 if (pCanonic != nullptr) {
2821 *pCanonic = aTheCanonic;
2823 return true;
2826 bool INetURLObject::setHost(OUString const & rTheHost,
2827 rtl_TextEncoding eCharset)
2829 if (!getSchemeInfo().m_bHost)
2830 return false;
2831 OUStringBuffer aSynHost(rTheHost);
2832 bool bNetBiosName = false;
2833 switch (m_eScheme)
2835 case INetProtocol::File:
2837 OUString sTemp(aSynHost.toString());
2838 if (sTemp.equalsIgnoreAsciiCase("localhost"))
2840 aSynHost.setLength(0);
2842 bNetBiosName = true;
2844 break;
2845 case INetProtocol::Ldap:
2846 if (aSynHost.isEmpty() && m_aPort.isPresent())
2847 return false;
2848 break;
2850 default:
2851 if (aSynHost.isEmpty())
2852 return false;
2853 break;
2855 if (!parseHostOrNetBiosName(
2856 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2857 EncodeMechanism::WasEncoded, eCharset, bNetBiosName, &aSynHost))
2858 return false;
2859 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2860 m_aPort += nDelta;
2861 m_aPath += nDelta;
2862 m_aQuery += nDelta;
2863 m_aFragment += nDelta;
2864 return true;
2867 // static
2868 bool INetURLObject::parsePath(INetProtocol eScheme,
2869 sal_Unicode const ** pBegin,
2870 sal_Unicode const * pEnd,
2871 EncodeMechanism eMechanism,
2872 rtl_TextEncoding eCharset,
2873 bool bSkippedInitialSlash,
2874 sal_uInt32 nSegmentDelimiter,
2875 sal_uInt32 nAltSegmentDelimiter,
2876 sal_uInt32 nQueryDelimiter,
2877 sal_uInt32 nFragmentDelimiter,
2878 OUStringBuffer &rSynPath)
2880 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2882 sal_Unicode const * pPos = *pBegin;
2883 OUStringBuffer aTheSynPath(256);
2885 switch (eScheme)
2887 case INetProtocol::NotValid:
2888 return false;
2890 case INetProtocol::Ftp:
2891 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2892 return false;
2893 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2895 EscapeType eEscapeType;
2896 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2897 eCharset, eEscapeType);
2898 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2899 PART_HTTP_PATH, eCharset, true);
2901 if (aTheSynPath.isEmpty())
2902 aTheSynPath.append('/');
2903 break;
2905 case INetProtocol::Http:
2906 case INetProtocol::VndSunStarWebdav:
2907 case INetProtocol::Https:
2908 case INetProtocol::Smb:
2909 case INetProtocol::Cmis:
2910 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2911 return false;
2912 while (pPos < pEnd && *pPos != nQueryDelimiter
2913 && *pPos != nFragmentDelimiter)
2915 EscapeType eEscapeType;
2916 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2917 eCharset, eEscapeType);
2918 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2919 PART_HTTP_PATH, eCharset, true);
2921 if (aTheSynPath.isEmpty())
2922 aTheSynPath.append('/');
2923 break;
2925 case INetProtocol::File:
2927 if (bSkippedInitialSlash)
2928 aTheSynPath.append('/');
2929 else if (pPos < pEnd
2930 && *pPos != nSegmentDelimiter
2931 && *pPos != nAltSegmentDelimiter)
2932 return false;
2933 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2935 EscapeType eEscapeType;
2936 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2937 eCharset, eEscapeType);
2938 if (eEscapeType == EscapeType::NONE)
2940 if (nUTF32 == nSegmentDelimiter
2941 || nUTF32 == nAltSegmentDelimiter)
2943 aTheSynPath.append('/');
2944 continue;
2946 else if (nUTF32 == '|'
2947 && (pPos == pEnd
2948 || *pPos == nFragmentDelimiter
2949 || *pPos == nSegmentDelimiter
2950 || *pPos == nAltSegmentDelimiter)
2951 && aTheSynPath.getLength() == 2
2952 && rtl::isAsciiAlpha(aTheSynPath[1]))
2954 // A first segment of <ALPHA "|"> is translated to
2955 // <ALPHA ":">:
2956 aTheSynPath.append(':');
2957 continue;
2960 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2961 PART_PCHAR, eCharset, true);
2963 if (aTheSynPath.isEmpty())
2964 aTheSynPath.append('/');
2965 break;
2968 case INetProtocol::Mailto:
2969 while (pPos < pEnd && *pPos != nQueryDelimiter
2970 && *pPos != nFragmentDelimiter)
2972 EscapeType eEscapeType;
2973 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2974 eCharset, eEscapeType);
2975 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2976 PART_MAILTO, eCharset, true);
2978 break;
2981 case INetProtocol::PrivSoffice:
2982 case INetProtocol::Slot:
2983 case INetProtocol::Hid:
2984 case INetProtocol::Macro:
2985 case INetProtocol::Uno:
2986 case INetProtocol::Component:
2987 case INetProtocol::Ldap:
2988 while (pPos < pEnd && *pPos != nQueryDelimiter
2989 && *pPos != nFragmentDelimiter)
2991 EscapeType eEscapeType;
2992 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2993 eCharset, eEscapeType);
2994 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2995 PART_PATH_BEFORE_QUERY, eCharset, true);
2997 break;
2999 case INetProtocol::VndSunStarHelp:
3000 if (pPos == pEnd
3001 || *pPos == nQueryDelimiter
3002 || *pPos == nFragmentDelimiter)
3003 aTheSynPath.append('/');
3004 else
3006 if (*pPos != '/')
3007 return false;
3008 while (pPos < pEnd && *pPos != nQueryDelimiter
3009 && *pPos != nFragmentDelimiter)
3011 EscapeType eEscapeType;
3012 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
3013 eMechanism,
3014 eCharset, eEscapeType);
3015 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3016 PART_HTTP_PATH, eCharset, true);
3019 break;
3021 case INetProtocol::Javascript:
3022 case INetProtocol::Data:
3023 case INetProtocol::Cid:
3024 case INetProtocol::Db:
3025 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3027 EscapeType eEscapeType;
3028 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3029 eCharset, eEscapeType);
3030 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3031 PART_URIC, eCharset, true);
3033 break;
3035 case INetProtocol::VndSunStarHier:
3036 case INetProtocol::VndSunStarPkg:
3037 if (pPos < pEnd && *pPos != '/'
3038 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3039 return false;
3040 while (pPos < pEnd && *pPos != nQueryDelimiter
3041 && *pPos != nFragmentDelimiter)
3043 EscapeType eEscapeType;
3044 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3045 eCharset, eEscapeType);
3046 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3047 aTheSynPath.append('/');
3048 else
3049 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3050 PART_PCHAR, eCharset, false);
3052 if (aTheSynPath.isEmpty())
3053 aTheSynPath.append('/');
3054 break;
3056 case INetProtocol::VndSunStarCmd:
3057 case INetProtocol::VndSunStarExpand:
3059 if (pPos == pEnd || *pPos == nFragmentDelimiter)
3060 return false;
3061 Part ePart = PART_URIC_NO_SLASH;
3062 while (pPos != pEnd && *pPos != nFragmentDelimiter)
3064 EscapeType eEscapeType;
3065 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3066 eCharset, eEscapeType);
3067 appendUCS4(aTheSynPath, nUTF32, eEscapeType, ePart,
3068 eCharset, true);
3069 ePart = PART_URIC;
3071 break;
3074 case INetProtocol::Telnet:
3075 if (pPos < pEnd)
3077 if (*pPos != '/' || pEnd - pPos > 1)
3078 return false;
3079 ++pPos;
3081 aTheSynPath.append('/');
3082 break;
3084 case INetProtocol::VndSunStarTdoc:
3085 if (pPos == pEnd || *pPos != '/')
3086 return false;
3087 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3089 EscapeType eEscapeType;
3090 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3091 eCharset, eEscapeType);
3092 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3093 aTheSynPath.append('/');
3094 else
3095 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3096 PART_PCHAR, eCharset, false);
3098 break;
3100 case INetProtocol::Generic:
3101 case INetProtocol::Sftp:
3102 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3104 EscapeType eEscapeType;
3105 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3106 eCharset, eEscapeType);
3107 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3108 PART_URIC, eCharset, true);
3110 if (aTheSynPath.isEmpty())
3111 return false;
3112 break;
3113 default:
3114 OSL_ASSERT(false);
3115 break;
3118 *pBegin = pPos;
3119 rSynPath = aTheSynPath;
3120 return true;
3123 bool INetURLObject::setPath(OUString const & rThePath,
3124 EncodeMechanism eMechanism,
3125 rtl_TextEncoding eCharset)
3127 OUStringBuffer aSynPath;
3128 sal_Unicode const * p = rThePath.getStr();
3129 sal_Unicode const * pEnd = p + rThePath.getLength();
3130 if (!parsePath(m_eScheme, &p, pEnd, eMechanism, eCharset, false,
3131 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3132 || p != pEnd)
3133 return false;
3134 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3135 m_aQuery += nDelta;
3136 m_aFragment += nDelta;
3137 return true;
3140 bool INetURLObject::checkHierarchical() const {
3141 if (m_eScheme == INetProtocol::VndSunStarExpand) {
3142 OSL_FAIL(
3143 "INetURLObject::checkHierarchical vnd.sun.star.expand");
3144 return true;
3145 } else {
3146 return getSchemeInfo().m_bHierarchical;
3150 bool INetURLObject::Append(OUString const & rTheSegment,
3151 EncodeMechanism eMechanism,
3152 rtl_TextEncoding eCharset)
3154 return insertName(rTheSegment, false, LAST_SEGMENT, eMechanism, eCharset);
3157 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3158 bool bIgnoreFinalSlash)
3159 const
3161 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3162 "INetURLObject::getSegment(): Bad index");
3164 if (!checkHierarchical())
3165 return SubString();
3167 sal_Unicode const * pPathBegin
3168 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3169 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3170 sal_Unicode const * pSegBegin;
3171 sal_Unicode const * pSegEnd;
3172 if (nIndex == LAST_SEGMENT)
3174 pSegEnd = pPathEnd;
3175 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3176 --pSegEnd;
3177 if (pSegEnd <= pPathBegin)
3178 return SubString();
3179 pSegBegin = pSegEnd - 1;
3180 while (pSegBegin > pPathBegin && *pSegBegin != '/')
3181 --pSegBegin;
3183 else
3185 pSegBegin = pPathBegin;
3186 while (nIndex-- > 0)
3189 ++pSegBegin;
3190 if (pSegBegin >= pPathEnd)
3191 return SubString();
3193 while (*pSegBegin != '/');
3194 pSegEnd = pSegBegin + 1;
3195 while (pSegEnd < pPathEnd && *pSegEnd != '/')
3196 ++pSegEnd;
3199 return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3200 pSegEnd - pSegBegin);
3203 bool INetURLObject::insertName(OUString const & rTheName,
3204 bool bAppendFinalSlash, sal_Int32 nIndex,
3205 EncodeMechanism eMechanism,
3206 rtl_TextEncoding eCharset)
3208 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3209 "INetURLObject::insertName(): Bad index");
3211 if (!checkHierarchical())
3212 return false;
3214 sal_Unicode const * pPathBegin
3215 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3216 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3217 sal_Unicode const * pPrefixEnd;
3218 bool bInsertSlash;
3219 sal_Unicode const * pSuffixBegin;
3220 if (nIndex == LAST_SEGMENT)
3222 pPrefixEnd = pPathEnd;
3223 if (pPrefixEnd > pPathBegin &&
3224 pPrefixEnd[-1] == '/')
3226 --pPrefixEnd;
3228 bInsertSlash = bAppendFinalSlash;
3229 pSuffixBegin = pPathEnd;
3231 else if (nIndex == 0)
3233 pPrefixEnd = pPathBegin;
3234 bInsertSlash =
3235 (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3236 (pPathBegin == pPathEnd && bAppendFinalSlash);
3237 pSuffixBegin =
3238 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3239 !bAppendFinalSlash)
3240 ? pPathEnd : pPathBegin;
3242 else
3244 pPrefixEnd = pPathBegin;
3245 sal_Unicode const * pEnd = pPathEnd;
3246 if (pEnd > pPathBegin && pEnd[-1] == '/')
3247 --pEnd;
3248 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3249 bInsertSlash = false;
3250 pSuffixBegin = pPathEnd;
3251 while (nIndex-- > 0)
3252 for (;;)
3254 if (bSkip)
3255 ++pPrefixEnd;
3256 bSkip = true;
3257 if (pPrefixEnd >= pEnd)
3259 if (nIndex == 0)
3261 bInsertSlash = bAppendFinalSlash;
3262 break;
3264 else
3265 return false;
3267 if (*pPrefixEnd == '/')
3269 pSuffixBegin = pPrefixEnd;
3270 break;
3275 OUStringBuffer aNewPath(256);
3276 aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3277 aNewPath.append('/');
3278 aNewPath.append(encodeText(rTheName, PART_PCHAR,
3279 eMechanism, eCharset, true));
3280 if (bInsertSlash) {
3281 aNewPath.append('/');
3283 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3285 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
3286 RTL_TEXTENCODING_UTF8);
3289 void INetURLObject::clearQuery()
3291 if (HasError())
3292 return;
3293 if (m_aQuery.isPresent())
3295 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3296 m_aQuery.getLength() + 1);
3297 m_aFragment += m_aQuery.clear() - 1;
3301 bool INetURLObject::setQuery(OUString const & rTheQuery,
3302 EncodeMechanism eMechanism,
3303 rtl_TextEncoding eCharset)
3305 if (!getSchemeInfo().m_bQuery)
3306 return false;
3307 OUString aNewQuery(encodeText(rTheQuery, PART_URIC,
3308 eMechanism, eCharset, true));
3309 sal_Int32 nDelta;
3310 if (m_aQuery.isPresent())
3311 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3312 else
3314 m_aAbsURIRef.insert(m_aPath.getEnd(), u'?');
3315 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3316 + 1;
3318 m_aFragment += nDelta;
3319 return true;
3322 bool INetURLObject::clearFragment()
3324 if (HasError())
3325 return false;
3326 if (m_aFragment.isPresent())
3328 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3329 m_aFragment.clear();
3331 return true;
3334 bool INetURLObject::setFragment(OUString const & rTheFragment,
3335 EncodeMechanism eMechanism,
3336 rtl_TextEncoding eCharset)
3338 if (HasError())
3339 return false;
3340 OUString aNewFragment(encodeText(rTheFragment, PART_URIC,
3341 eMechanism, eCharset, true));
3342 if (m_aFragment.isPresent())
3343 m_aFragment.set(m_aAbsURIRef, aNewFragment);
3344 else
3346 m_aAbsURIRef.append('#');
3347 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3349 return true;
3352 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3354 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3355 return (eStyle & FSysStyle::Dos)
3356 && m_aPath.getLength() >= 3
3357 && p[0] == '/'
3358 && rtl::isAsciiAlpha(p[1])
3359 && p[2] == ':'
3360 && (m_aPath.getLength() == 3 || p[3] == '/');
3363 // static
3364 OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3365 sal_Unicode const * pEnd,
3366 Part ePart, EncodeMechanism eMechanism,
3367 rtl_TextEncoding eCharset,
3368 bool bKeepVisibleEscapes)
3370 OUStringBuffer aResult(256);
3371 while (pBegin < pEnd)
3373 EscapeType eEscapeType;
3374 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3375 eMechanism, eCharset, eEscapeType);
3376 appendUCS4(aResult, nUTF32, eEscapeType, ePart,
3377 eCharset, bKeepVisibleEscapes);
3379 return aResult.makeStringAndClear();
3382 // static
3383 OUString INetURLObject::decode(sal_Unicode const * pBegin,
3384 sal_Unicode const * pEnd,
3385 DecodeMechanism eMechanism,
3386 rtl_TextEncoding eCharset)
3388 switch (eMechanism)
3390 case DecodeMechanism::NONE:
3391 return OUString(pBegin, pEnd - pBegin);
3393 case DecodeMechanism::ToIUri:
3394 eCharset = RTL_TEXTENCODING_UTF8;
3395 break;
3397 default:
3398 break;
3400 OUStringBuffer aResult(static_cast<int>(pEnd-pBegin));
3401 while (pBegin < pEnd)
3403 EscapeType eEscapeType;
3404 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3405 EncodeMechanism::WasEncoded, eCharset, eEscapeType);
3406 switch (eEscapeType)
3408 case EscapeType::NONE:
3409 aResult.appendUtf32(nUTF32);
3410 break;
3412 case EscapeType::Octet:
3413 appendEscape(aResult, nUTF32);
3414 break;
3416 case EscapeType::Utf32:
3417 if (
3418 rtl::isAscii(nUTF32) &&
3420 eMechanism == DecodeMechanism::ToIUri ||
3422 eMechanism == DecodeMechanism::Unambiguous &&
3423 mustEncode(nUTF32, PART_UNAMBIGUOUS)
3428 appendEscape(aResult, nUTF32);
3430 else
3431 aResult.appendUtf32(nUTF32);
3432 break;
3435 return aResult.makeStringAndClear();
3438 OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3439 rtl_TextEncoding eCharset) const
3441 INetURLObject aTemp(*this);
3442 aTemp.clearPassword();
3443 return aTemp.GetMainURL(eMechanism, eCharset);
3446 OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3447 rtl_TextEncoding eCharset) const
3449 INetURLObject aTemp(*this);
3450 aTemp.clearFragment();
3451 return aTemp.GetMainURL(eMechanism, eCharset);
3454 OUString
3455 INetURLObject::getAbbreviated(
3456 uno::Reference< util::XStringWidth > const & rStringWidth,
3457 sal_Int32 nWidth,
3458 DecodeMechanism eMechanism,
3459 rtl_TextEncoding eCharset)
3460 const
3462 OSL_ENSURE(rStringWidth.is(), "specification violation");
3463 OUStringBuffer aBuffer;
3464 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3465 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3466 if (m_eScheme != INetProtocol::Generic)
3468 aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3470 else
3472 if (!m_aAbsURIRef.isEmpty())
3474 sal_Unicode const * pSchemeBegin
3475 = m_aAbsURIRef.getStr();
3476 sal_Unicode const * pSchemeEnd = pSchemeBegin;
3478 while (pSchemeEnd[0] != ':')
3480 ++pSchemeEnd;
3482 aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3485 aBuffer.append(':');
3486 bool bAuthority = getSchemeInfo().m_bAuthority;
3487 sal_Unicode const * pCoreBegin
3488 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3489 m_aPath.getBegin());
3490 sal_Unicode const * pCoreEnd
3491 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3492 bool bSegment = false;
3493 if (getSchemeInfo().m_bHierarchical)
3495 OUString aRest;
3496 if (m_aQuery.isPresent())
3497 aRest = "?...";
3498 else if (m_aFragment.isPresent())
3499 aRest = "#...";
3500 OUStringBuffer aTrailer;
3501 sal_Unicode const * pBegin = pCoreBegin;
3502 sal_Unicode const * pEnd = pCoreEnd;
3503 sal_Unicode const * pPrefixBegin = pBegin;
3504 sal_Unicode const * pSuffixEnd = pEnd;
3505 bool bPrefix = true;
3506 bool bSuffix = true;
3509 if (bSuffix)
3511 sal_Unicode const * p = pSuffixEnd - 1;
3512 if (pSuffixEnd == pCoreEnd && *p == '/')
3513 --p;
3514 while (*p != '/')
3515 --p;
3516 if (bAuthority && p == pCoreBegin + 1)
3517 --p;
3518 OUString
3519 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3520 1 : 0),
3521 pSuffixEnd,
3522 eMechanism,
3523 eCharset));
3524 pSuffixEnd = p;
3525 OUStringBuffer aResult(aBuffer);
3526 if (pSuffixEnd != pBegin)
3527 aResult.append("...");
3528 aResult.append(aSegment);
3529 aResult.append(aTrailer.toString());
3530 aResult.append(aRest);
3531 if (rStringWidth->
3532 queryStringWidth(aResult.makeStringAndClear())
3533 <= nWidth)
3535 aTrailer.insert(0, aSegment);
3536 bSegment = true;
3537 pEnd = pSuffixEnd;
3539 else
3540 bSuffix = false;
3541 if (pPrefixBegin > pSuffixEnd)
3542 pPrefixBegin = pSuffixEnd;
3543 if (pBegin == pEnd)
3544 break;
3546 if (bPrefix)
3548 sal_Unicode const * p
3549 = pPrefixBegin
3550 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3552 OSL_ASSERT(p <= pEnd);
3553 while (p < pEnd && *p != '/')
3554 ++p;
3555 if (p == pCoreEnd - 1 && *p == '/')
3556 ++p;
3557 OUString
3558 aSegment(decode(pPrefixBegin
3559 + (pPrefixBegin == pCoreBegin ? 0 :
3561 p == pEnd ? p : p + 1,
3562 eMechanism,
3563 eCharset));
3564 pPrefixBegin = p;
3565 OUStringBuffer aResult(aBuffer);
3566 aResult.append(aSegment);
3567 if (pPrefixBegin != pEnd)
3568 aResult.append("...");
3569 aResult.append(aTrailer.toString());
3570 aResult.append(aRest);
3571 if (rStringWidth->
3572 queryStringWidth(aResult.makeStringAndClear())
3573 <= nWidth)
3575 aBuffer.append(aSegment);
3576 bSegment = true;
3577 pBegin = pPrefixBegin;
3579 else
3580 bPrefix = false;
3581 if (pPrefixBegin > pSuffixEnd)
3582 pSuffixEnd = pPrefixBegin;
3583 if (pBegin == pEnd)
3584 break;
3587 while (bPrefix || bSuffix);
3588 if (bSegment)
3590 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3591 aBuffer.append("...");
3592 aBuffer.append(aTrailer.toString());
3595 if (!bSegment)
3596 aBuffer.append(decode(pCoreBegin,
3597 pCoreEnd,
3598 eMechanism,
3599 eCharset));
3600 if (m_aQuery.isPresent())
3602 aBuffer.append('?');
3603 aBuffer.append(decode(m_aQuery, eMechanism, eCharset));
3605 if (m_aFragment.isPresent())
3607 aBuffer.append('#');
3608 aBuffer.append(decode(m_aFragment, eMechanism, eCharset));
3610 if (!aBuffer.isEmpty())
3612 OUStringBuffer aResult(aBuffer);
3613 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3614 > nWidth)
3615 for (sal_Int32 i = aBuffer.getLength();;)
3617 if (i == 0)
3619 aBuffer.setLength(aBuffer.getLength() - 1);
3620 if (aBuffer.isEmpty())
3621 break;
3623 else
3625 aBuffer.setLength(--i);
3626 aBuffer.append("...");
3628 aResult = aBuffer;
3629 if (rStringWidth->
3630 queryStringWidth(aResult.makeStringAndClear())
3631 <= nWidth)
3632 break;
3635 return aBuffer.makeStringAndClear();
3638 bool INetURLObject::operator ==(INetURLObject const & rObject) const
3640 if (m_eScheme != rObject.m_eScheme)
3641 return false;
3642 if (m_eScheme == INetProtocol::NotValid)
3643 return m_aAbsURIRef.toString() == rObject.m_aAbsURIRef.toString();
3644 if ((m_aScheme.compare(
3645 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
3646 != 0)
3647 || GetUser(DecodeMechanism::NONE) != rObject.GetUser(DecodeMechanism::NONE)
3648 || GetPass(DecodeMechanism::NONE) != rObject.GetPass(DecodeMechanism::NONE)
3649 || !GetHost(DecodeMechanism::NONE).equalsIgnoreAsciiCase(
3650 rObject.GetHost(DecodeMechanism::NONE))
3651 || GetPort() != rObject.GetPort()
3652 || HasParam() != rObject.HasParam()
3653 || GetParam() != rObject.GetParam())
3654 return false;
3655 OUString aPath1(GetURLPath(DecodeMechanism::NONE));
3656 OUString aPath2(rObject.GetURLPath(DecodeMechanism::NONE));
3657 switch (m_eScheme)
3659 case INetProtocol::File:
3661 // If the URL paths of two file URLs only differ in that one has a
3662 // final '/' and the other has not, take the two paths as
3663 // equivalent (this could be useful for other schemes, too):
3664 sal_Int32 nLength = aPath1.getLength();
3665 switch (nLength - aPath2.getLength())
3667 case -1:
3668 if (aPath2[nLength] != '/')
3669 return false;
3670 break;
3672 case 0:
3673 break;
3675 case 1:
3676 if (aPath1[--nLength] != '/')
3677 return false;
3678 break;
3680 default:
3681 return false;
3683 return aPath1.compareTo(aPath2, nLength) == 0;
3686 default:
3687 return aPath1 == aPath2;
3691 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
3692 OUString const & rTheUser,
3693 OUString const & rThePassword,
3694 OUString const & rTheHost,
3695 sal_uInt32 nThePort,
3696 OUString const & rThePath)
3698 setInvalid();
3699 m_eScheme = eTheScheme;
3700 if (HasError() || m_eScheme == INetProtocol::Generic)
3701 return false;
3702 m_aAbsURIRef.setLength(0);
3703 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
3704 m_aAbsURIRef.append(':');
3705 if (getSchemeInfo().m_bAuthority)
3707 m_aAbsURIRef.append("//");
3708 bool bUserInfo = false;
3709 if (getSchemeInfo().m_bUser)
3711 if (!rTheUser.isEmpty())
3713 m_aUser.set(m_aAbsURIRef,
3714 encodeText(rTheUser, PART_USER_PASSWORD,
3715 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false),
3716 m_aAbsURIRef.getLength());
3717 bUserInfo = true;
3720 else if (!rTheUser.isEmpty())
3722 setInvalid();
3723 return false;
3725 if (!rThePassword.isEmpty())
3727 if (getSchemeInfo().m_bPassword)
3729 m_aAbsURIRef.append(':');
3730 m_aAuth.set(m_aAbsURIRef,
3731 encodeText(rThePassword, PART_USER_PASSWORD,
3732 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false),
3733 m_aAbsURIRef.getLength());
3734 bUserInfo = true;
3736 else
3738 setInvalid();
3739 return false;
3742 if (bUserInfo && getSchemeInfo().m_bHost)
3743 m_aAbsURIRef.append('@');
3744 if (getSchemeInfo().m_bHost)
3746 OUStringBuffer aSynHost(rTheHost);
3747 bool bNetBiosName = false;
3748 switch (m_eScheme)
3750 case INetProtocol::File:
3752 OUString sTemp(aSynHost.toString());
3753 if (sTemp.equalsIgnoreAsciiCase( "localhost" ))
3755 aSynHost.setLength(0);
3757 bNetBiosName = true;
3759 break;
3761 case INetProtocol::Ldap:
3762 if (aSynHost.isEmpty() && nThePort != 0)
3764 setInvalid();
3765 return false;
3767 break;
3769 default:
3770 if (aSynHost.isEmpty())
3772 setInvalid();
3773 return false;
3775 break;
3777 if (!parseHostOrNetBiosName(
3778 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
3779 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, bNetBiosName, &aSynHost))
3781 setInvalid();
3782 return false;
3784 m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
3785 m_aAbsURIRef.getLength());
3786 if (nThePort != 0)
3788 if (getSchemeInfo().m_bPort)
3790 m_aAbsURIRef.append(':');
3791 m_aPort.set(m_aAbsURIRef,
3792 OUString::number(nThePort),
3793 m_aAbsURIRef.getLength());
3795 else
3797 setInvalid();
3798 return false;
3802 else if (!rTheHost.isEmpty() || nThePort != 0)
3804 setInvalid();
3805 return false;
3808 OUStringBuffer aSynPath;
3809 sal_Unicode const * p = rThePath.getStr();
3810 sal_Unicode const * pEnd = p + rThePath.getLength();
3811 if (!parsePath(m_eScheme, &p, pEnd, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false, '/',
3812 0x80000000, 0x80000000, 0x80000000, aSynPath)
3813 || p != pEnd)
3815 setInvalid();
3816 return false;
3818 m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
3819 m_aAbsURIRef.getLength());
3820 return true;
3823 // static
3824 OUString INetURLObject::GetAbsURL(OUString const & rTheBaseURIRef,
3825 OUString const & rTheRelURIRef,
3826 EncodeMechanism eEncodeMechanism,
3827 DecodeMechanism eDecodeMechanism,
3828 rtl_TextEncoding eCharset)
3830 // Backwards compatibility:
3831 if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#')
3832 return rTheRelURIRef;
3834 INetURLObject aTheAbsURIRef;
3835 bool bWasAbsolute;
3836 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
3837 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef,
3838 bWasAbsolute, eEncodeMechanism,
3839 eCharset, false, false,
3840 false, FSysStyle::Detect)
3841 || eEncodeMechanism != EncodeMechanism::WasEncoded
3842 || eDecodeMechanism != DecodeMechanism::ToIUri
3843 || eCharset != RTL_TEXTENCODING_UTF8 ?
3844 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
3845 rTheRelURIRef;
3848 OUString INetURLObject::getExternalURL() const
3850 OUString aTheExtURIRef;
3851 translateToExternal(
3852 m_aAbsURIRef.toString(), aTheExtURIRef);
3853 return aTheExtURIRef;
3856 bool INetURLObject::isSchemeEqualTo(std::u16string_view scheme) const {
3857 return m_aScheme.isPresent()
3858 && (rtl_ustr_compareIgnoreAsciiCase_WithLength(
3859 scheme.data(), scheme.size(),
3860 m_aAbsURIRef.getStr() + m_aScheme.getBegin(),
3861 m_aScheme.getLength())
3862 == 0);
3865 bool INetURLObject::isAnyKnownWebDAVScheme() const {
3866 return ( isSchemeEqualTo( INetProtocol::Http ) ||
3867 isSchemeEqualTo( INetProtocol::Https ) ||
3868 isSchemeEqualTo( INetProtocol::VndSunStarWebdav ) ||
3869 isSchemeEqualTo( u"vnd.sun.star.webdavs" ) ||
3870 isSchemeEqualTo( u"webdav" ) ||
3871 isSchemeEqualTo( u"webdavs" ));
3874 // static
3875 OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
3877 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
3880 // static
3881 OUString INetURLObject::GetSchemeName(INetProtocol eTheScheme)
3883 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pScheme);
3886 // static
3887 INetProtocol INetURLObject::CompareProtocolScheme(OUString const &
3888 rTheAbsURIRef)
3890 sal_Unicode const * p = rTheAbsURIRef.getStr();
3891 PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
3892 return pPrefix ? pPrefix->m_eScheme : INetProtocol::NotValid;
3895 OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
3896 rtl_TextEncoding eCharset) const
3898 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
3899 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
3900 if (!getSchemeInfo().m_bHost)
3901 return OUString();
3902 OUStringBuffer aHostPort(decode(m_aHost, eMechanism, eCharset));
3903 if (m_aPort.isPresent())
3905 aHostPort.append(':');
3906 aHostPort.append(decode(m_aPort, eMechanism, eCharset));
3908 return aHostPort.makeStringAndClear();
3911 sal_uInt32 INetURLObject::GetPort() const
3913 if (m_aPort.isPresent())
3915 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
3916 sal_Unicode const * pEnd = p + m_aPort.getLength();
3917 sal_uInt32 nThePort;
3918 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
3919 return nThePort;
3921 return 0;
3924 bool INetURLObject::SetPort(sal_uInt32 nThePort)
3926 if (getSchemeInfo().m_bPort && m_aHost.isPresent())
3928 OUString aNewPort(OUString::number(nThePort));
3929 sal_Int32 nDelta;
3930 if (m_aPort.isPresent())
3931 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
3932 else
3934 m_aAbsURIRef.insert(m_aHost.getEnd(), u':');
3935 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
3936 + 1;
3938 m_aPath += nDelta;
3939 m_aQuery += nDelta;
3940 m_aFragment += nDelta;
3941 return true;
3943 return false;
3946 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
3948 if (!checkHierarchical())
3949 return 0;
3951 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3952 sal_Unicode const * pEnd = p + m_aPath.getLength();
3953 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
3954 --pEnd;
3955 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
3956 while (p != pEnd)
3957 if (*p++ == '/')
3958 ++n;
3959 return n;
3962 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
3964 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
3965 if (!aSegment.isPresent())
3966 return false;
3968 OUStringBuffer aNewPath(m_aPath.getLength());
3969 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
3970 aSegment.getBegin() - m_aPath.getBegin());
3971 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
3972 aNewPath.append('/');
3973 else
3974 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
3975 m_aPath.getEnd() - aSegment.getEnd());
3976 if (aNewPath.isEmpty() && !aSegment.isEmpty() &&
3977 m_aAbsURIRef[aSegment.getBegin()] == '/')
3979 aNewPath.append('/');
3982 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
3983 RTL_TEXTENCODING_UTF8);
3986 OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
3987 DecodeMechanism eMechanism,
3988 rtl_TextEncoding eCharset) const
3990 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
3991 if (!aSegment.isPresent())
3992 return OUString();
3994 sal_Unicode const * pSegBegin
3995 = m_aAbsURIRef.getStr() + aSegment.getBegin();
3996 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
3998 if (pSegBegin < pSegEnd && *pSegBegin == '/')
3999 ++pSegBegin;
4000 sal_Unicode const * p = pSegBegin;
4001 while (p != pSegEnd && *p != ';')
4002 ++p;
4004 return decode(pSegBegin, p, eMechanism, eCharset);
4007 bool INetURLObject::setName(OUString const& rTheName, EncodeMechanism eMechanism,
4008 rtl_TextEncoding eCharset)
4010 SubString aSegment(getSegment(LAST_SEGMENT, true));
4011 if (!aSegment.isPresent())
4012 return false;
4014 sal_Unicode const * pPathBegin
4015 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4016 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4017 sal_Unicode const * pSegBegin
4018 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4019 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4021 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4022 ++pSegBegin;
4023 sal_Unicode const * p = pSegBegin;
4024 while (p != pSegEnd && *p != ';')
4025 ++p;
4027 OUStringBuffer aNewPath(256);
4028 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4029 aNewPath.append(encodeText(rTheName, PART_PCHAR, eMechanism, eCharset, true));
4030 aNewPath.append(p, pPathEnd - p);
4032 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4033 RTL_TEXTENCODING_UTF8);
4036 bool INetURLObject::hasExtension()
4037 const
4039 SubString aSegment(getSegment(LAST_SEGMENT, true/*bIgnoreFinalSlash*/));
4040 if (!aSegment.isPresent())
4041 return false;
4043 sal_Unicode const * pSegBegin
4044 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4045 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4047 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4048 ++pSegBegin;
4049 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4050 if (*p == '.' && p != pSegBegin)
4051 return true;
4052 return false;
4055 OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4056 DecodeMechanism eMechanism,
4057 rtl_TextEncoding eCharset) const
4059 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4060 if (!aSegment.isPresent())
4061 return OUString();
4063 sal_Unicode const * pSegBegin
4064 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4065 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4067 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4068 ++pSegBegin;
4069 sal_Unicode const * pExtension = nullptr;
4070 sal_Unicode const * p = pSegBegin;
4071 for (; p != pSegEnd && *p != ';'; ++p)
4072 if (*p == '.' && p != pSegBegin)
4073 pExtension = p;
4074 if (!pExtension)
4075 pExtension = p;
4077 return decode(pSegBegin, pExtension, eMechanism, eCharset);
4080 bool INetURLObject::setBase(OUString const & rTheBase, sal_Int32 nIndex,
4081 EncodeMechanism eMechanism,
4082 rtl_TextEncoding eCharset)
4084 SubString aSegment(getSegment(nIndex, true/*bIgnoreFinalSlash*/));
4085 if (!aSegment.isPresent())
4086 return false;
4088 sal_Unicode const * pPathBegin
4089 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4090 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4091 sal_Unicode const * pSegBegin
4092 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4093 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4095 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4096 ++pSegBegin;
4097 sal_Unicode const * pExtension = nullptr;
4098 sal_Unicode const * p = pSegBegin;
4099 for (; p != pSegEnd && *p != ';'; ++p)
4100 if (*p == '.' && p != pSegBegin)
4101 pExtension = p;
4102 if (!pExtension)
4103 pExtension = p;
4105 OUStringBuffer aNewPath;
4106 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4107 aNewPath.append(encodeText(rTheBase, PART_PCHAR,
4108 eMechanism, eCharset, true));
4109 aNewPath.append(pExtension, pPathEnd - pExtension);
4111 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4112 RTL_TEXTENCODING_UTF8);
4115 OUString INetURLObject::getExtension(sal_Int32 nIndex,
4116 bool bIgnoreFinalSlash,
4117 DecodeMechanism eMechanism,
4118 rtl_TextEncoding eCharset) const
4120 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4121 if (!aSegment.isPresent())
4122 return OUString();
4124 sal_Unicode const * pSegBegin
4125 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4126 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4128 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4129 ++pSegBegin;
4130 sal_Unicode const * pExtension = nullptr;
4131 sal_Unicode const * p = pSegBegin;
4132 for (; p != pSegEnd && *p != ';'; ++p)
4133 if (*p == '.' && p != pSegBegin)
4134 pExtension = p;
4136 if (!pExtension)
4137 return OUString();
4139 return decode(pExtension + 1, p, eMechanism, eCharset);
4142 bool INetURLObject::setExtension(OUString const & rTheExtension,
4143 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4144 rtl_TextEncoding eCharset)
4146 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4147 if (!aSegment.isPresent())
4148 return false;
4150 sal_Unicode const * pPathBegin
4151 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4152 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4153 sal_Unicode const * pSegBegin
4154 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4155 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4157 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4158 ++pSegBegin;
4159 sal_Unicode const * pExtension = nullptr;
4160 sal_Unicode const * p = pSegBegin;
4161 for (; p != pSegEnd && *p != ';'; ++p)
4162 if (*p == '.' && p != pSegBegin)
4163 pExtension = p;
4164 if (!pExtension)
4165 pExtension = p;
4167 OUStringBuffer aNewPath(128);
4168 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4169 aNewPath.append('.');
4170 aNewPath.append(encodeText(rTheExtension, PART_PCHAR,
4171 EncodeMechanism::WasEncoded, eCharset, true));
4172 aNewPath.append(p, pPathEnd - p);
4174 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4175 RTL_TEXTENCODING_UTF8);
4178 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4180 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4181 if (!aSegment.isPresent())
4182 return false;
4184 sal_Unicode const * pPathBegin
4185 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4186 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4187 sal_Unicode const * pSegBegin
4188 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4189 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4191 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4192 ++pSegBegin;
4193 sal_Unicode const * pExtension = nullptr;
4194 sal_Unicode const * p = pSegBegin;
4195 for (; p != pSegEnd && *p != ';'; ++p)
4196 if (*p == '.' && p != pSegBegin)
4197 pExtension = p;
4198 if (!pExtension)
4199 return true;
4201 OUString aNewPath =
4202 OUString::Concat(std::u16string_view(pPathBegin, pExtension - pPathBegin)) +
4203 std::u16string_view(p, pPathEnd - p);
4205 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4208 bool INetURLObject::hasFinalSlash() const
4210 if (!checkHierarchical())
4211 return false;
4213 sal_Unicode const * pPathBegin
4214 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4215 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4216 return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4219 bool INetURLObject::setFinalSlash()
4221 if (!checkHierarchical())
4222 return false;
4224 sal_Unicode const * pPathBegin
4225 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4226 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4227 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4228 return true;
4230 OUString aNewPath
4231 = OUString::Concat(std::u16string_view(pPathBegin, pPathEnd - pPathBegin)) + "/";
4233 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4236 bool INetURLObject::removeFinalSlash()
4238 if (!checkHierarchical())
4239 return false;
4241 sal_Unicode const * pPathBegin
4242 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4243 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4244 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4245 return true;
4247 --pPathEnd;
4248 if (pPathEnd == pPathBegin && *pPathBegin == '/')
4249 return false;
4250 OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4252 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4255 OUString INetURLObject::getFSysPath(FSysStyle eStyle,
4256 sal_Unicode * pDelimiter) const
4258 if (m_eScheme != INetProtocol::File)
4259 return OUString();
4261 if (((eStyle & FSysStyle::Vos) ? 1 : 0)
4262 + ((eStyle & FSysStyle::Unix) ? 1 : 0)
4263 + ((eStyle & FSysStyle::Dos) ? 1 : 0)
4264 > 1)
4266 if(eStyle & FSysStyle::Vos && m_aHost.isPresent() && m_aHost.getLength() > 0)
4268 eStyle= FSysStyle::Vos;
4270 else
4272 if(hasDosVolume(eStyle) || ((eStyle & FSysStyle::Dos) && m_aHost.isPresent() && m_aHost.getLength() > 0))
4274 eStyle = FSysStyle::Dos;
4276 else
4278 if(eStyle & FSysStyle::Unix && (!m_aHost.isPresent() || m_aHost.getLength() == 0))
4280 eStyle = FSysStyle::Unix;
4282 else
4284 eStyle= FSysStyle(0);
4290 switch (eStyle)
4292 case FSysStyle::Vos:
4294 if (pDelimiter)
4295 *pDelimiter = '/';
4297 OUStringBuffer aSynFSysPath;
4298 aSynFSysPath.append("//");
4299 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4300 aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4301 RTL_TEXTENCODING_UTF8));
4302 else
4303 aSynFSysPath.append('.');
4304 aSynFSysPath.append(decode(m_aPath, DecodeMechanism::WithCharset,
4305 RTL_TEXTENCODING_UTF8));
4306 return aSynFSysPath.makeStringAndClear();
4309 case FSysStyle::Unix:
4311 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4312 return OUString();
4314 if (pDelimiter)
4315 *pDelimiter = '/';
4317 return decode(m_aPath, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8);
4320 case FSysStyle::Dos:
4322 if (pDelimiter)
4323 *pDelimiter = '\\';
4325 OUStringBuffer aSynFSysPath(64);
4326 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4328 aSynFSysPath.append("\\\\");
4329 aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4330 RTL_TEXTENCODING_UTF8));
4331 aSynFSysPath.append('\\');
4333 sal_Unicode const * p
4334 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4335 sal_Unicode const * pEnd = p + m_aPath.getLength();
4336 DBG_ASSERT(p < pEnd && *p == '/',
4337 "INetURLObject::getFSysPath(): Bad path");
4338 ++p;
4339 while (p < pEnd)
4341 EscapeType eEscapeType;
4342 sal_uInt32 nUTF32 = getUTF32(p, pEnd, EncodeMechanism::WasEncoded,
4343 RTL_TEXTENCODING_UTF8,
4344 eEscapeType);
4345 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
4346 aSynFSysPath.append('\\');
4347 else
4348 aSynFSysPath.appendUtf32(nUTF32);
4350 return aSynFSysPath.makeStringAndClear();
4353 default:
4354 return OUString();
4358 // static
4359 void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText,
4360 sal_uInt32 nUCS4)
4362 DBG_ASSERT(nUCS4 < 0x80000000,
4363 "INetURLObject::appendUCS4Escape(): Bad char");
4364 if (nUCS4 < 0x80)
4365 appendEscape(rTheText, nUCS4);
4366 else if (nUCS4 < 0x800)
4368 appendEscape(rTheText, nUCS4 >> 6 | 0xC0);
4369 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4371 else if (nUCS4 < 0x10000)
4373 appendEscape(rTheText, nUCS4 >> 12 | 0xE0);
4374 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4375 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4377 else if (nUCS4 < 0x200000)
4379 appendEscape(rTheText, nUCS4 >> 18 | 0xF0);
4380 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4381 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4382 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4384 else if (nUCS4 < 0x4000000)
4386 appendEscape(rTheText, nUCS4 >> 24 | 0xF8);
4387 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4388 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4389 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4390 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4392 else
4394 appendEscape(rTheText, nUCS4 >> 30 | 0xFC);
4395 appendEscape(rTheText, (nUCS4 >> 24 & 0x3F) | 0x80);
4396 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4397 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4398 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4399 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4403 // static
4404 void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4,
4405 EscapeType eEscapeType,
4406 Part ePart, rtl_TextEncoding eCharset,
4407 bool bKeepVisibleEscapes)
4409 bool bEscape;
4410 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
4411 switch (eEscapeType)
4413 case EscapeType::NONE:
4414 if (mustEncode(nUCS4, ePart))
4416 bEscape = true;
4417 eTargetCharset = RTL_TEXTENCODING_UTF8;
4419 else
4420 bEscape = false;
4421 break;
4423 case EscapeType::Octet:
4424 bEscape = true;
4425 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
4426 break;
4428 case EscapeType::Utf32:
4429 if (mustEncode(nUCS4, ePart))
4431 bEscape = true;
4432 eTargetCharset = eCharset;
4434 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
4436 bEscape = true;
4437 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
4439 else
4440 bEscape = false;
4441 break;
4442 default:
4443 bEscape = false;
4446 if (bEscape)
4448 switch (eTargetCharset)
4450 default:
4451 OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
4452 [[fallthrough]];
4453 case RTL_TEXTENCODING_ASCII_US:
4454 case RTL_TEXTENCODING_ISO_8859_1:
4455 appendEscape(rTheText, nUCS4);
4456 break;
4457 case RTL_TEXTENCODING_UTF8:
4458 appendUCS4Escape(rTheText, nUCS4);
4459 break;
4462 else
4463 rTheText.append(sal_Unicode(nUCS4));
4466 // static
4467 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
4468 sal_Unicode const * pEnd,
4469 EncodeMechanism eMechanism,
4470 rtl_TextEncoding eCharset,
4471 EscapeType & rEscapeType)
4473 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
4474 sal_uInt32 nUTF32 = INetMIME::getUTF32Character(rBegin, pEnd);
4475 switch (eMechanism)
4477 case EncodeMechanism::All:
4478 rEscapeType = EscapeType::NONE;
4479 break;
4481 case EncodeMechanism::WasEncoded:
4483 int nWeight1;
4484 int nWeight2;
4485 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4486 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
4487 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
4489 rBegin += 2;
4490 nUTF32 = nWeight1 << 4 | nWeight2;
4491 switch (eCharset)
4493 default:
4494 OSL_FAIL(
4495 "INetURLObject::getUTF32(): Unsupported charset");
4496 [[fallthrough]];
4497 case RTL_TEXTENCODING_ASCII_US:
4498 rEscapeType = rtl::isAscii(nUTF32) ?
4499 EscapeType::Utf32 : EscapeType::Octet;
4500 break;
4502 case RTL_TEXTENCODING_ISO_8859_1:
4503 rEscapeType = EscapeType::Utf32;
4504 break;
4506 case RTL_TEXTENCODING_UTF8:
4507 if (rtl::isAscii(nUTF32))
4508 rEscapeType = EscapeType::Utf32;
4509 else
4511 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
4513 sal_uInt32 nEncoded;
4514 int nShift;
4515 sal_uInt32 nMin;
4516 if (nUTF32 <= 0xDF)
4518 nEncoded = (nUTF32 & 0x1F) << 6;
4519 nShift = 0;
4520 nMin = 0x80;
4522 else if (nUTF32 <= 0xEF)
4524 nEncoded = (nUTF32 & 0x0F) << 12;
4525 nShift = 6;
4526 nMin = 0x800;
4528 else
4530 nEncoded = (nUTF32 & 0x07) << 18;
4531 nShift = 12;
4532 nMin = 0x10000;
4534 sal_Unicode const * p = rBegin;
4535 bool bUTF8 = true;
4536 for (;;)
4538 if (pEnd - p < 3
4539 || p[0] != '%'
4540 || (nWeight1
4541 = INetMIME::getHexWeight(p[1]))
4543 || nWeight1 > 11
4544 || (nWeight2
4545 = INetMIME::getHexWeight(p[2]))
4546 < 0)
4548 bUTF8 = false;
4549 break;
4551 p += 3;
4552 nEncoded
4553 |= ((nWeight1 & 3) << 4 | nWeight2)
4554 << nShift;
4555 if (nShift == 0)
4556 break;
4557 nShift -= 6;
4559 if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
4560 && nEncoded >= nMin)
4562 rBegin = p;
4563 nUTF32 = nEncoded;
4564 rEscapeType = EscapeType::Utf32;
4565 break;
4568 rEscapeType = EscapeType::Octet;
4570 break;
4573 else
4574 rEscapeType = EscapeType::NONE;
4575 break;
4578 case EncodeMechanism::NotCanonical:
4580 int nWeight1;
4581 int nWeight2;
4582 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4583 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
4584 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
4586 rBegin += 2;
4587 nUTF32 = nWeight1 << 4 | nWeight2;
4588 rEscapeType = EscapeType::Octet;
4590 else
4591 rEscapeType = EscapeType::NONE;
4592 break;
4595 return nUTF32;
4598 // static
4599 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
4600 sal_Unicode const * pEnd,
4601 bool bEager)
4603 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
4604 State eState = STATE_DOT;
4605 sal_Int32 nLabels = 0;
4606 sal_Unicode const * pLastAlphanumeric = nullptr;
4607 for (sal_Unicode const * p = rBegin;; ++p)
4608 switch (eState)
4610 case STATE_DOT:
4611 if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_'))
4613 ++nLabels;
4614 eState = STATE_LABEL;
4615 break;
4617 if (bEager || nLabels == 0)
4618 return 0;
4619 rBegin = p - 1;
4620 return nLabels;
4622 case STATE_LABEL:
4623 if (p != pEnd)
4625 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4626 break;
4627 else if (*p == '.')
4629 eState = STATE_DOT;
4630 break;
4632 else if (*p == '-')
4634 pLastAlphanumeric = p;
4635 eState = STATE_HYPHEN;
4636 break;
4639 rBegin = p;
4640 return nLabels;
4642 case STATE_HYPHEN:
4643 if (p != pEnd)
4645 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4647 eState = STATE_LABEL;
4648 break;
4650 else if (*p == '-')
4651 break;
4653 if (bEager)
4654 return 0;
4655 rBegin = pLastAlphanumeric;
4656 return nLabels;
4660 // static
4661 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
4662 sal_Unicode const * pEnd)
4664 if (rBegin != pEnd && *rBegin == '[') {
4665 sal_Unicode const * p = rBegin + 1;
4666 //TODO: check for valid IPv6address (RFC 2373):
4667 while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.'))
4669 ++p;
4671 if (p != pEnd && *p == ']') {
4672 rBegin = p + 1;
4673 return true;
4676 return false;
4679 OUString INetURLObject::GetPartBeforeLastName()
4680 const
4682 if (!checkHierarchical())
4683 return OUString();
4684 INetURLObject aTemp(*this);
4685 aTemp.clearFragment();
4686 aTemp.clearQuery();
4687 aTemp.removeSegment(LAST_SEGMENT, false);
4688 aTemp.setFinalSlash();
4689 return aTemp.GetMainURL(DecodeMechanism::ToIUri);
4692 OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
4693 rtl_TextEncoding eCharset) const
4695 return getName(LAST_SEGMENT, true, eMechanism, eCharset);
4698 OUString INetURLObject::GetFileExtension() const
4700 return getExtension(LAST_SEGMENT, false);
4703 void INetURLObject::CutLastName()
4705 INetURLObject aTemp(*this);
4706 aTemp.clearFragment();
4707 aTemp.clearQuery();
4708 if (!aTemp.removeSegment(LAST_SEGMENT, false))
4709 return;
4710 *this = aTemp;
4713 OUString INetURLObject::PathToFileName() const
4715 if (m_eScheme != INetProtocol::File)
4716 return OUString();
4717 OUString aSystemPath;
4718 if (osl::FileBase::getSystemPathFromFileURL(
4719 decode(m_aAbsURIRef.getStr(),
4720 m_aAbsURIRef.getStr() + m_aPath.getEnd(),
4721 DecodeMechanism::NONE, RTL_TEXTENCODING_UTF8),
4722 aSystemPath)
4723 != osl::FileBase::E_None)
4724 return OUString();
4725 return aSystemPath;
4728 OUString INetURLObject::GetFull() const
4730 INetURLObject aTemp(*this);
4731 aTemp.removeFinalSlash();
4732 return aTemp.PathToFileName();
4735 OUString INetURLObject::GetPath() const
4737 INetURLObject aTemp(*this);
4738 aTemp.removeSegment();
4739 aTemp.removeFinalSlash();
4740 return aTemp.PathToFileName();
4743 void INetURLObject::SetBase(OUString const & rTheBase)
4745 setBase(rTheBase, LAST_SEGMENT, EncodeMechanism::All);
4748 OUString INetURLObject::GetBase() const
4750 return getBase(LAST_SEGMENT, true, DecodeMechanism::WithCharset);
4753 void INetURLObject::SetExtension(OUString const & rTheExtension)
4755 setExtension(rTheExtension, LAST_SEGMENT, false);
4758 OUString INetURLObject::CutExtension()
4760 OUString aTheExtension(getExtension(LAST_SEGMENT, false));
4761 return removeExtension(LAST_SEGMENT, false)
4762 ? aTheExtension : OUString();
4765 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */