Branch libreoffice-5-0-4
[LibreOffice.git] / tools / source / fsys / urlobj.cxx
blob2fdb0d018bb783edb7537c3c4309dbafb2ca21b7
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <boost/checked_delete.hpp>
23 #include <tools/urlobj.hxx>
24 #include <tools/debug.hxx>
25 #include <tools/inetmime.hxx>
26 #include <tools/stream.hxx>
27 #include <com/sun/star/uno/Reference.hxx>
28 #include <com/sun/star/util/XStringWidth.hpp>
29 #include <o3tl/enumarray.hxx>
30 #include <osl/diagnose.h>
31 #include <osl/file.hxx>
32 #include <rtl/character.hxx>
33 #include <rtl/string.h>
34 #include <rtl/textenc.h>
35 #include <rtl/ustring.hxx>
36 #include <sal/log.hxx>
37 #include <sal/types.h>
39 #include <algorithm>
40 #include <limits>
41 #include <memory>
43 #include <string.h>
45 #include <com/sun/star/uno/Sequence.hxx>
46 #include <sax/tools/converter.hxx>
47 #include <rtl/uri.hxx>
49 namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj;
50 // unnamed namespaces don't work well yet...
52 using namespace css;
54 // INetURLObject
56 /* The URI grammar (using RFC 2234 conventions).
58 Constructs of the form
59 {reference <rule1> using rule2}
60 stand for a rule matching the given rule1 specified in the given reference,
61 encoded to URI syntax using rule2 (as specified in this URI grammar).
64 ; RFC 1738, RFC 2396, RFC 2732, private
65 login = [user [":" password] "@"] hostport
66 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
67 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
68 hostport = host [":" port]
69 host = incomplete-hostname / hostname / IPv4address / IPv6reference
70 incomplete-hostname = *(domainlabel ".") domainlabel
71 hostname = *(domainlabel ".") toplabel ["."]
72 domainlabel = alphanum [*(alphanum / "-") alphanum]
73 toplabel = ALPHA [*(alphanum / "-") alphanum]
74 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
75 IPv6reference = "[" hexpart [":" IPv4address] "]"
76 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
77 hexseq = hex4 *(":" hex4)
78 hex4 = 1*4HEXDIG
79 port = *DIGIT
80 escaped = "%" HEXDIG HEXDIG
81 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
82 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
83 alphanum = ALPHA / DIGIT
84 unreserved = alphanum / mark
85 uric = escaped / reserved / unreserved
86 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
89 ; RFC 1738, RFC 2396
90 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
91 segment = *pchar
94 ; RFC 1738, RFC 2396
95 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
96 segment = *(pchar / ";")
99 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
100 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
101 segment = *pchar
102 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
105 ; RFC 2368, RFC 2396
106 mailto-url = "MAILTO:" [to] [headers]
107 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
108 headers = "?" header *("&" header)
109 header = hname "=" hvalue
110 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
111 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
114 ; private (see RFC 1738, RFC 2396)
115 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
116 segment = *(pchar / ";")
119 ; private
120 private-url = "PRIVATE:" path ["?" *uric]
121 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
124 ; private
125 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
126 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
127 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
130 ; private
131 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
132 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
135 ; private
136 slot-url = "SLOT:" path ["?" *uric]
137 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
140 ; private
141 macro-url = "MACRO:" path ["?" *uric]
142 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
145 ; private
146 javascript-url = "JAVASCRIPT:" *uric
149 ; RFC 2397
150 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
151 mediatype = [type "/" subtype] *(";" attribute "=" value)
152 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
153 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
154 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
155 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
158 ; RFC 2392, RFC 2396
159 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
162 ; private
163 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
164 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
167 ; private
168 uno-url = ".UNO:" path ["?" *uric]
169 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
172 ; private
173 component-url = ".COMPONENT:" path ["?" *uric]
174 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
177 ; private
178 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
179 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
182 ; RFC 2255
183 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
184 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
185 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
186 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
187 extension = ["!"] ["X-"] extoken ["=" exvalue]
188 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
189 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
192 ; private
193 db-url = "DB:" *uric
196 ; private
197 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
198 opaque_part = uric_no_slash *uric
199 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
202 ; RFC 1738
203 telnet-url = "TELNET://" login ["/"]
206 ; private
207 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
208 opaque_part = uric_no_slash *uric
209 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
212 ; private
213 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
214 segment = *pchar
217 ; private
218 unknown-url = scheme ":" 1*uric
219 scheme = ALPHA *(alphanum / "+" / "-" / ".")
222 ; private (http://ubiqx.org/cifs/Appendix-D.html):
223 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
224 segment = *(pchar / ";")
227 inline sal_Int32 INetURLObject::SubString::clear()
229 sal_Int32 nDelta = -m_nLength;
230 m_nBegin = -1;
231 m_nLength = 0;
232 return nDelta;
235 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
236 OUString const & rSubString)
238 OUString sTemp(rString.makeStringAndClear());
239 sal_Int32 nDelta = set(sTemp, rSubString);
240 rString.append(sTemp);
241 return nDelta;
244 inline sal_Int32 INetURLObject::SubString::set(OUString & rString,
245 OUString const & rSubString)
247 sal_Int32 nDelta = rSubString.getLength() - m_nLength;
249 rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
251 m_nLength = rSubString.getLength();
252 return nDelta;
255 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
256 OUString const & rSubString,
257 sal_Int32 nTheBegin)
259 m_nBegin = nTheBegin;
260 return set(rString, rSubString);
263 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
265 if (isPresent())
266 m_nBegin = m_nBegin + nDelta;
269 int INetURLObject::SubString::compare(SubString const & rOther,
270 OUStringBuffer const & rThisString,
271 OUStringBuffer const & rOtherString) const
273 sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
274 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
275 sal_Unicode const * end = p1 + len;
276 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
277 while (p1 != end) {
278 if (*p1 < *p2) {
279 return -1;
280 } else if (*p1 > *p2) {
281 return 1;
283 ++p1;
284 ++p2;
286 return m_nLength < rOther.m_nLength ? -1
287 : m_nLength > rOther.m_nLength ? 1
288 : 0;
291 struct INetURLObject::SchemeInfo
293 sal_Char const * m_pScheme;
294 sal_Char const * m_pPrefix;
295 sal_uInt16 m_nDefaultPort;
296 bool m_bAuthority;
297 bool m_bUser;
298 bool m_bAuth;
299 bool m_bPassword;
300 bool m_bHost;
301 bool m_bPort;
302 bool m_bHierarchical;
303 bool m_bQuery;
306 struct INetURLObject::PrefixInfo
308 enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important!
310 sal_Char const * m_pPrefix;
311 sal_Char const * m_pTranslatedPrefix;
312 INetProtocol m_eScheme;
313 Kind m_eKind;
316 // static
317 inline INetURLObject::SchemeInfo const &
318 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
320 static o3tl::enumarray<INetProtocol, SchemeInfo> const map = {
321 SchemeInfo{
322 "", "", 0, false, false, false, false, false, false, false, false},
323 SchemeInfo{
324 "ftp", "ftp://", 21, true, true, false, true, true, true, true,
325 false},
326 SchemeInfo{
327 "http", "http://", 80, true, false, false, false, true, true, true,
328 true},
329 SchemeInfo{
330 "file", "file://", 0, true, false, false, false, true, false, true,
331 false},
332 SchemeInfo{
333 "mailto", "mailto:", 0, false, false, false, false, false, false,
334 false, true},
335 SchemeInfo{
336 "vnd.sun.star.webdav", "vnd.sun.star.webdav://", 80, true, false,
337 false, false, true, true, true, true},
338 SchemeInfo{
339 "private", "private:", 0, false, false, false, false, false, false,
340 false, true},
341 SchemeInfo{
342 "vnd.sun.star.help", "vnd.sun.star.help://", 0, true, false, false,
343 false, false, false, true, true},
344 SchemeInfo{
345 "https", "https://", 443, true, false, false, false, true, true,
346 true, true},
347 SchemeInfo{
348 "slot", "slot:", 0, false, false, false, false, false, false, false,
349 true},
350 SchemeInfo{
351 "macro", "macro:", 0, false, false, false, false, false, false,
352 false, true},
353 SchemeInfo{
354 "javascript", "javascript:", 0, false, false, false, false, false,
355 false, false, false},
356 SchemeInfo{
357 "data", "data:", 0, false, false, false, false, false, false, false,
358 false},
359 SchemeInfo{
360 "cid", "cid:", 0, false, false, false, false, false, false, false,
361 false},
362 SchemeInfo{
363 "vnd.sun.star.hier", "vnd.sun.star.hier:", 0, true, false, false,
364 false, false, false, true, false},
365 SchemeInfo{
366 ".uno", ".uno:", 0, false, false, false, false, false, false, false,
367 true},
368 SchemeInfo{
369 ".component", ".component:", 0, false, false, false, false, false,
370 false, false, true},
371 SchemeInfo{
372 "vnd.sun.star.pkg", "vnd.sun.star.pkg://", 0, true, false, false,
373 false, false, false, true, true},
374 SchemeInfo{
375 "ldap", "ldap://", 389, true, false, false, false, true, true,
376 false, true},
377 SchemeInfo{
378 "db", "db:", 0, false, false, false, false, false, false, false,
379 false},
380 SchemeInfo{
381 "vnd.sun.star.cmd", "vnd.sun.star.cmd:", 0, false, false, false,
382 false, false, false, false, false},
383 SchemeInfo{
384 "telnet", "telnet://", 23, true, true, false, true, true, true,
385 true, false},
386 SchemeInfo{
387 "vnd.sun.star.expand", "vnd.sun.star.expand:", 0, false, false,
388 false, false, false, false, false, false},
389 SchemeInfo{
390 "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", 0, false, false, false,
391 false, false, false, true, false},
392 SchemeInfo{
393 "", "", 0, false, false, false, false, true, true, true, false },
394 SchemeInfo{
395 "smb", "smb://", 139, true, true, false, true, true, true, true,
396 true},
397 SchemeInfo{
398 "hid", "hid:", 0, false, false, false, false, false, false, false,
399 true},
400 SchemeInfo{
401 "sftp", "sftp://", 22, true, true, false, true, true, true, true,
402 true},
403 SchemeInfo{
404 "vnd.libreoffice.cmis", "vnd.libreoffice.cmis://", 0, true, true,
405 false, false, true, false, true, true} };
406 return map[eTheScheme];
409 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
411 return getSchemeInfo(m_eScheme);
414 // static
415 inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
416 sal_uInt32 nOctet)
418 rTheText.append( '%' );
419 rTheText.append( (sal_Unicode)INetMIME::getHexDigit(int(nOctet >> 4)) );
420 rTheText.append( (sal_Unicode)INetMIME::getHexDigit(int(nOctet & 15)) );
423 namespace unnamed_tools_urlobj {
425 enum
427 PA = INetURLObject::PART_USER_PASSWORD,
428 PD = INetURLObject::PART_FPATH,
429 PE = INetURLObject::PART_AUTHORITY,
430 PF = INetURLObject::PART_REL_SEGMENT_EXTRA,
431 PG = INetURLObject::PART_URIC,
432 PH = INetURLObject::PART_HTTP_PATH,
433 PI = INetURLObject::PART_MESSAGE_ID_PATH,
434 PJ = INetURLObject::PART_MAILTO,
435 PK = INetURLObject::PART_PATH_BEFORE_QUERY,
436 PL = INetURLObject::PART_PCHAR,
437 PM = INetURLObject::PART_VISIBLE,
438 PN = INetURLObject::PART_VISIBLE_NONSPECIAL,
439 PO = INetURLObject::PART_UNO_PARAM_VALUE,
440 PP = INetURLObject::PART_UNAMBIGUOUS,
441 PQ = INetURLObject::PART_URIC_NO_SLASH,
442 PR = INetURLObject::PART_HTTP_QUERY,
445 static sal_uInt32 const aMustEncodeMap[128]
446 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 /* */ PP,
449 /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
450 /* " */ PM+PN +PP,
451 /* # */ PM,
452 /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
453 /* % */ PM,
454 /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR,
455 /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
456 /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
457 /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
458 /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
459 /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
460 /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR,
461 /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
462 /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
463 /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO,
464 /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
465 /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
466 /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
467 /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
468 /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
469 /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
470 /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
471 /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
472 /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
473 /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
474 /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
475 /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR,
476 /* < */ +PI +PM+PN +PP,
477 /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR,
478 /* > */ +PI +PM+PN +PP,
479 /* ? */ +PG +PM +PO +PQ,
480 /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
481 /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
482 /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
483 /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
484 /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
485 /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
486 /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
487 /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
488 /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
489 /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
490 /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
491 /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
492 /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
493 /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
494 /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
495 /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
496 /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
497 /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
498 /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
499 /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
500 /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
501 /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
502 /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
503 /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
504 /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
505 /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
506 /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
507 /* [ */ PG +PM+PN+PO,
508 /* \ */ +PM+PN +PP,
509 /* ] */ PG +PM+PN+PO,
510 /* ^ */ PM+PN +PP,
511 /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
512 /* ` */ PM+PN +PP,
513 /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
514 /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
515 /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
516 /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
517 /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
518 /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
519 /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
520 /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
521 /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
522 /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
523 /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
524 /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
525 /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
526 /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
527 /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
528 /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
529 /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
530 /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
531 /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
532 /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
533 /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
534 /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
535 /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
536 /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
537 /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
538 /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
539 /* { */ PM+PN +PP,
540 /* | */ +PM+PN +PP,
541 /* } */ PM+PN +PP,
542 /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
543 0 };
545 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
547 return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
552 void INetURLObject::setInvalid()
554 m_aAbsURIRef.setLength(0);
555 m_eScheme = INetProtocol::NotValid;
556 m_aScheme.clear();
557 m_aUser.clear();
558 m_aAuth.clear();
559 m_aHost.clear();
560 m_aPort.clear();
561 m_aPath.clear();
562 m_aQuery.clear();
563 m_aFragment.clear();
566 namespace {
568 std::unique_ptr<SvMemoryStream> memoryStream(
569 void const * data, sal_Int32 length)
571 std::unique_ptr<char, boost::checked_array_deleter<char> > b(
572 new char[length]);
573 memcpy(b.get(), data, length);
574 std::unique_ptr<SvMemoryStream> s(
575 new SvMemoryStream(b.get(), length, StreamMode::READ));
576 s->ObjectOwnsMemory(true);
577 b.release();
578 return s;
583 std::unique_ptr<SvMemoryStream> INetURLObject::getData()
585 if( GetProtocol() != INetProtocol::Data )
587 return nullptr;
590 OUString sURLPath = GetURLPath( DECODE_WITH_CHARSET, RTL_TEXTENCODING_ISO_8859_1 );
591 sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath.getStr(), sURLPath.getStr() + sURLPath.getLength(), NULL, NULL, NULL );
592 sal_Int32 nCharactersSkipped = pSkippedMediatype == NULL
593 ? 0 : pSkippedMediatype-sURLPath.getStr();
594 if (sURLPath.match(",", nCharactersSkipped))
596 nCharactersSkipped += strlen(",");
597 OString sURLEncodedData(
598 sURLPath.getStr() + nCharactersSkipped,
599 sURLPath.getLength() - nCharactersSkipped,
600 RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
601 return memoryStream(
602 sURLEncodedData.getStr(), sURLEncodedData.getLength());
604 else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
606 nCharactersSkipped += strlen(";base64,");
607 OUString sBase64Data = sURLPath.copy( nCharactersSkipped );
608 css::uno::Sequence< sal_Int8 > aDecodedData;
609 if (sax::Converter::decodeBase64SomeChars(aDecodedData, sBase64Data)
610 == sBase64Data.getLength())
612 return memoryStream(
613 aDecodedData.getArray(), aDecodedData.getLength());
616 return nullptr;
619 namespace unnamed_tools_urlobj {
621 INetURLObject::FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
622 sal_Unicode const * pEnd,
623 INetURLObject::FSysStyle eStyle)
625 DBG_ASSERT(eStyle
626 & (INetURLObject::FSYS_UNX
627 | INetURLObject::FSYS_DOS),
628 "guessFSysStyleByCounting(): Bad style");
629 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
630 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
631 "guessFSysStyleByCounting(): Too big");
632 sal_Int32 nSlashCount
633 = eStyle & INetURLObject::FSYS_UNX ?
634 0 : std::numeric_limits< sal_Int32 >::min();
635 sal_Int32 nBackslashCount
636 = eStyle & INetURLObject::FSYS_DOS ?
637 0 : std::numeric_limits< sal_Int32 >::min();
638 while (pBegin != pEnd)
639 switch (*pBegin++)
641 case '/':
642 ++nSlashCount;
643 break;
645 case '\\':
646 ++nBackslashCount;
647 break;
649 return nSlashCount >= nBackslashCount ?
650 INetURLObject::FSYS_UNX : INetURLObject::FSYS_DOS;
653 OUString parseScheme(
654 sal_Unicode const ** begin, sal_Unicode const * end,
655 sal_uInt32 fragmentDelimiter)
657 sal_Unicode const * p = *begin;
658 if (p != end && rtl::isAsciiAlpha(*p)) {
659 do {
660 ++p;
661 } while (p != end
662 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
663 || *p == '.'));
664 // #i34835# To avoid problems with Windows file paths like "C:\foo",
665 // do not accept generic schemes that are only one character long:
666 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
667 && p - *begin >= 2)
669 OUString scheme(
670 OUString(*begin, p - *begin).toAsciiLowerCase());
671 *begin = p + 1;
672 return scheme;
675 return OUString();
680 bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef,
681 bool bOctets,
682 EncodeMechanism eMechanism,
683 rtl_TextEncoding eCharset,
684 bool bSmart,
685 FSysStyle eStyle)
687 sal_Unicode const * pPos = rTheAbsURIRef.getStr();
688 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
690 setInvalid();
692 sal_uInt32 nFragmentDelimiter = '#';
694 OUStringBuffer aSynAbsURIRef;
696 // Parse <scheme>:
697 sal_Unicode const * p = pPos;
698 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
699 if (pPrefix)
701 pPos = p;
702 m_eScheme = pPrefix->m_eScheme;
704 OUString sTemp(OUString::createFromAscii(pPrefix->m_eKind
705 >= PrefixInfo::EXTERNAL ?
706 pPrefix->m_pTranslatedPrefix :
707 pPrefix->m_pPrefix));
708 aSynAbsURIRef.append(sTemp);
709 m_aScheme = SubString( 0, sTemp.indexOf(':') );
711 else
713 if (bSmart)
715 // For scheme detection, the first (if any) of the following
716 // productions that matches the input string (and for which the
717 // appropriate style bit is set in eStyle, if applicable)
718 // determines the scheme. The productions use the auxiliary rules
720 // domain = label *("." label)
721 // label = alphanum [*(alphanum / "-") alphanum]
722 // alphanum = ALPHA / DIGIT
723 // IPv6reference = "[" IPv6address "]"
724 // IPv6address = hexpart [":" IPv4address]
725 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
726 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
727 // hexseq = hex4 *(":" hex4)
728 // hex4 = 1*4HEXDIG
729 // UCS4 = <any UCS4 character>
731 // 1st Production (known scheme):
732 // <one of the known schemes, ignoring case> ":" *UCS4
733 // 2nd Production (mailto):
734 // domain "@" domain
735 // 3rd Production (ftp):
736 // "FTP" 2*("." label) ["/" *UCS4]
737 // 4th Production (http):
738 // label 2*("." label) ["/" *UCS4]
739 // 5th Production (file):
740 // "//" (domain / IPv6reference) ["/" *UCS4]
741 // 6th Production (Unix file):
742 // "/" *UCS4
743 // 7th Production (UNC file; FSYS_DOS only):
744 // "\\" domain ["\" *UCS4]
745 // 8th Production (Unix-like DOS file; FSYS_DOS only):
746 // ALPHA ":" ["/" *UCS4]
747 // 9th Production (DOS file; FSYS_DOS only):
748 // ALPHA ":" ["\" *UCS4]
750 // For the 'non URL' file productions 6--9, the interpretation of
751 // the input as a (degenerate) URI is turned off, i.e., escape
752 // sequences and fragments are never detected as such, but are
753 // taken as literal characters.
755 sal_Unicode const * p1 = pPos;
756 if (eStyle & FSYS_DOS
757 && pEnd - p1 >= 2
758 && rtl::isAsciiAlpha(p1[0])
759 && p1[1] == ':'
760 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
762 m_eScheme = INetProtocol::File; // 8th, 9th
763 eMechanism = ENCODE_ALL;
764 nFragmentDelimiter = 0x80000000;
766 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
768 p1 += 2;
769 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
770 && (p1 == pEnd || *p1 == '/'))
771 m_eScheme = INetProtocol::File; // 5th
773 else if (p1 != pEnd && *p1 == '/')
775 m_eScheme = INetProtocol::File; // 6th
776 eMechanism = ENCODE_ALL;
777 nFragmentDelimiter = 0x80000000;
779 else if (eStyle & FSYS_DOS
780 && pEnd - p1 >= 2
781 && p1[0] == '\\'
782 && p1[1] == '\\')
784 p1 += 2;
785 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
786 p1, pEnd - p1, '\\');
787 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
788 if (
789 parseHostOrNetBiosName(
790 p1, pe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
791 true, NULL) ||
792 (scanDomain(p1, pe) > 0 && p1 == pe)
795 m_eScheme = INetProtocol::File; // 7th
796 eMechanism = ENCODE_ALL;
797 nFragmentDelimiter = 0x80000000;
800 else
802 sal_Unicode const * pDomainEnd = p1;
803 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
804 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
806 ++pDomainEnd;
807 if (scanDomain(pDomainEnd, pEnd) > 0
808 && pDomainEnd == pEnd)
809 m_eScheme = INetProtocol::Mailto; // 2nd
811 else if (nLabels >= 3
812 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
813 m_eScheme
814 = pDomainEnd - p1 >= 4
815 && (p1[0] == 'f' || p1[0] == 'F')
816 && (p1[1] == 't' || p1[1] == 'T')
817 && (p1[2] == 'p' || p1[2] == 'P')
818 && p1[3] == '.' ?
819 INetProtocol::Ftp : INetProtocol::Http; // 3rd, 4th
823 OUString aSynScheme;
824 if (m_eScheme == INetProtocol::NotValid) {
825 sal_Unicode const * p1 = pPos;
826 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
827 if (!aSynScheme.isEmpty())
829 m_eScheme = INetProtocol::Generic;
830 pPos = p1;
834 if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
835 && *pPos != nFragmentDelimiter)
837 m_eScheme = m_eSmartScheme;
840 if (m_eScheme == INetProtocol::NotValid)
842 setInvalid();
843 return false;
846 if (m_eScheme != INetProtocol::Generic) {
847 aSynScheme = OUString::createFromAscii(getSchemeInfo().m_pScheme);
849 m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
850 aSynAbsURIRef.append(':');
853 sal_uInt32 nSegmentDelimiter = '/';
854 sal_uInt32 nAltSegmentDelimiter = 0x80000000;
855 bool bSkippedInitialSlash = false;
857 // Parse //<user>;AUTH=<auth>@<host>:<port> or
858 // //<user>:<password>@<host>:<port> or
859 // //<reg_name>
860 if (getSchemeInfo().m_bAuthority)
862 sal_Unicode const * pUserInfoBegin = 0;
863 sal_Unicode const * pUserInfoEnd = 0;
864 sal_Unicode const * pHostPortBegin = 0;
865 sal_Unicode const * pHostPortEnd = 0;
867 switch (m_eScheme)
869 case INetProtocol::VndSunStarHelp:
871 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
873 setInvalid();
874 return false;
876 aSynAbsURIRef.append("//");
877 OUStringBuffer aSynAuthority;
878 while (pPos < pEnd
879 && *pPos != '/' && *pPos != '?'
880 && *pPos != nFragmentDelimiter)
882 EscapeType eEscapeType;
883 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
884 eMechanism,
885 eCharset, eEscapeType);
886 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
887 PART_AUTHORITY, eCharset, false);
889 m_aHost.set(aSynAbsURIRef,
890 aSynAuthority.makeStringAndClear(),
891 aSynAbsURIRef.getLength());
892 // misusing m_aHost to store the authority
893 break;
896 case INetProtocol::VndSunStarHier:
898 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
900 pPos += 2;
901 aSynAbsURIRef.append("//");
902 OUStringBuffer aSynAuthority;
903 while (pPos < pEnd
904 && *pPos != '/' && *pPos != '?'
905 && *pPos != nFragmentDelimiter)
907 EscapeType eEscapeType;
908 sal_uInt32 nUTF32 = getUTF32(pPos,
909 pEnd,
910 bOctets,
911 eMechanism,
912 eCharset,
913 eEscapeType);
914 appendUCS4(aSynAuthority,
915 nUTF32,
916 eEscapeType,
917 bOctets,
918 PART_AUTHORITY,
919 eCharset,
920 false);
922 if (aSynAuthority.isEmpty())
924 setInvalid();
925 return false;
927 m_aHost.set(aSynAbsURIRef,
928 aSynAuthority.makeStringAndClear(),
929 aSynAbsURIRef.getLength());
930 // misusing m_aHost to store the authority
932 break;
935 case INetProtocol::VndSunStarPkg:
936 case INetProtocol::Cmis:
938 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
940 setInvalid();
941 return false;
943 aSynAbsURIRef.append("//");
944 OUStringBuffer aSynUser;
946 bool bHasUser = false;
947 while (pPos < pEnd && *pPos != '@'
948 && *pPos != '/' && *pPos != '?'
949 && *pPos != nFragmentDelimiter)
951 EscapeType eEscapeType;
952 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
953 eMechanism,
954 eCharset, eEscapeType);
955 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets,
956 PART_USER_PASSWORD, eCharset, false);
958 bHasUser = *pPos == '@';
961 OUStringBuffer aSynAuthority;
962 if ( !bHasUser )
964 aSynAuthority = aSynUser;
966 else
968 m_aUser.set(aSynAbsURIRef,
969 aSynUser.makeStringAndClear(),
970 aSynAbsURIRef.getLength());
971 aSynAbsURIRef.append("@");
972 ++pPos;
974 while (pPos < pEnd
975 && *pPos != '/' && *pPos != '?'
976 && *pPos != nFragmentDelimiter)
978 EscapeType eEscapeType;
979 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
980 eMechanism,
981 eCharset, eEscapeType);
982 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
983 PART_AUTHORITY, eCharset, false);
986 if (aSynAuthority.isEmpty())
988 setInvalid();
989 return false;
991 m_aHost.set(aSynAbsURIRef,
992 aSynAuthority.makeStringAndClear(),
993 aSynAbsURIRef.getLength());
994 // misusing m_aHost to store the authority
995 break;
998 case INetProtocol::File:
999 if (bSmart)
1001 // The first of the following seven productions that
1002 // matches the rest of the input string (and for which the
1003 // appropriate style bit is set in eStyle, if applicable)
1004 // determines the used notation. The productions use the
1005 // auxiliary rules
1007 // domain = label *("." label)
1008 // label = alphanum [*(alphanum / "-") alphanum]
1009 // alphanum = ALPHA / DIGIT
1010 // IPv6reference = "[" IPv6address "]"
1011 // IPv6address = hexpart [":" IPv4address]
1012 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1013 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1014 // hexseq = hex4 *(":" hex4)
1015 // hex4 = 1*4HEXDIG
1016 // path = <any UCS4 character except "#">
1017 // UCS4 = <any UCS4 character>
1019 // 1st Production (URL):
1020 // "//" [domain / IPv6reference] ["/" *path]
1021 // ["#" *UCS4]
1022 // becomes
1023 // "file://" domain "/" *path ["#" *UCS4]
1024 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1026 sal_Unicode const * p1 = pPos + 2;
1027 while (p1 != pEnd && *p1 != '/' &&
1028 *p1 != nFragmentDelimiter)
1030 ++p1;
1032 if (parseHostOrNetBiosName(
1033 pPos + 2, p1, bOctets, ENCODE_ALL,
1034 RTL_TEXTENCODING_DONTKNOW, true, NULL))
1036 aSynAbsURIRef.append("//");
1037 pHostPortBegin = pPos + 2;
1038 pHostPortEnd = p1;
1039 pPos = p1;
1040 break;
1044 // 2nd Production (MS IE generated 1; FSYS_DOS only):
1045 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1046 // becomes
1047 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1048 // replacing "\" by "/" within <*path>
1049 // 3rd Production (MS IE generated 2; FSYS_DOS only):
1050 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1051 // becomes
1052 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1053 // replacing "\" by "/" within <*path>
1054 // 4th Production (misscounted slashes):
1055 // "//" *path ["#" *UCS4]
1056 // becomes
1057 // "file:///" *path ["#" *UCS4]
1058 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1060 aSynAbsURIRef.append("//");
1061 pPos += 2;
1062 bSkippedInitialSlash = true;
1063 if ((eStyle & FSYS_DOS) != 0
1064 && pEnd - pPos >= 2
1065 && rtl::isAsciiAlpha(pPos[0])
1066 && pPos[1] == ':'
1067 && (pEnd - pPos == 2
1068 || pPos[2] == '/' || pPos[2] == '\\'))
1069 nAltSegmentDelimiter = '\\';
1070 break;
1073 // 5th Production (Unix):
1074 // "/" *path ["#" *UCS4]
1075 // becomes
1076 // "file:///" *path ["#" *UCS4]
1077 if (pPos < pEnd && *pPos == '/')
1079 aSynAbsURIRef.append("//");
1080 break;
1083 // 6th Production (UNC; FSYS_DOS only):
1084 // "\\" domain ["\" *path] ["#" *UCS4]
1085 // becomes
1086 // "file://" domain "/" *path ["#" *UCS4]
1087 // replacing "\" by "/" within <*path>
1088 if (eStyle & FSYS_DOS
1089 && pEnd - pPos >= 2
1090 && pPos[0] == '\\'
1091 && pPos[1] == '\\')
1093 sal_Unicode const * p1 = pPos + 2;
1094 sal_Unicode const * pe = p1;
1095 while (pe < pEnd && *pe != '\\' &&
1096 *pe != nFragmentDelimiter)
1098 ++pe;
1100 if (
1101 parseHostOrNetBiosName(
1102 p1, pe, bOctets, ENCODE_ALL,
1103 RTL_TEXTENCODING_DONTKNOW, true, NULL) ||
1104 (scanDomain(p1, pe) > 0 && p1 == pe)
1107 aSynAbsURIRef.append("//");
1108 pHostPortBegin = pPos + 2;
1109 pHostPortEnd = pe;
1110 pPos = pe;
1111 nSegmentDelimiter = '\\';
1112 break;
1116 // 7th Production (Unix-like DOS; FSYS_DOS only):
1117 // ALPHA ":" ["/" *path] ["#" *UCS4]
1118 // becomes
1119 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1120 // replacing "\" by "/" within <*path>
1121 // 8th Production (DOS; FSYS_DOS only):
1122 // ALPHA ":" ["\" *path] ["#" *UCS4]
1123 // becomes
1124 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1125 // replacing "\" by "/" within <*path>
1126 if (eStyle & FSYS_DOS
1127 && pEnd - pPos >= 2
1128 && rtl::isAsciiAlpha(pPos[0])
1129 && pPos[1] == ':'
1130 && (pEnd - pPos == 2
1131 || pPos[2] == '/'
1132 || pPos[2] == '\\'))
1134 aSynAbsURIRef.append("//");
1135 nAltSegmentDelimiter = '\\';
1136 bSkippedInitialSlash = true;
1137 break;
1140 // 9th Production (any):
1141 // *path ["#" *UCS4]
1142 // becomes
1143 // "file:///" *path ["#" *UCS4]
1144 // replacing the delimiter by "/" within <*path>. The
1145 // delimiter is that character from the set { "/", "\"}
1146 // which appears most often in <*path> (if FSYS_UNX
1147 // is not among the style bits, "/" is removed from the
1148 // set; if FSYS_DOS is not among the style bits, "\" is
1149 // removed from the set). If two or
1150 // more characters appear the same number of times, the
1151 // character mentioned first in that set is chosen. If
1152 // the first character of <*path> is the delimiter, that
1153 // character is not copied
1154 if (eStyle & (FSYS_UNX | FSYS_DOS))
1156 aSynAbsURIRef.appendAscii("//");
1157 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1159 case FSYS_UNX:
1160 nSegmentDelimiter = '/';
1161 break;
1163 case FSYS_DOS:
1164 nSegmentDelimiter = '\\';
1165 break;
1167 default:
1168 OSL_FAIL(
1169 "INetURLObject::setAbsURIRef():"
1170 " Bad guessFSysStyleByCounting");
1171 break;
1173 bSkippedInitialSlash
1174 = pPos != pEnd && *pPos != nSegmentDelimiter;
1175 break;
1178 default:
1180 // For INetProtocol::File, allow an empty authority ("//") to be
1181 // missing if the following path starts with an explicit "/"
1182 // (Java is notorious in generating such file URLs, so be
1183 // liberal here):
1184 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1185 pPos += 2;
1186 else if (!bSmart
1187 && !(m_eScheme == INetProtocol::File
1188 && pPos != pEnd && *pPos == '/'))
1190 setInvalid();
1191 return false;
1193 aSynAbsURIRef.append("//");
1195 sal_Unicode const * pAuthority = pPos;
1196 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1197 while (pPos < pEnd && *pPos != '/' && *pPos != c
1198 && *pPos != nFragmentDelimiter)
1199 ++pPos;
1200 if (getSchemeInfo().m_bUser)
1201 if (getSchemeInfo().m_bHost)
1203 sal_Unicode const * p1 = pAuthority;
1204 while (p1 < pPos && *p1 != '@')
1205 ++p1;
1206 if (p1 == pPos)
1208 pHostPortBegin = pAuthority;
1209 pHostPortEnd = pPos;
1211 else
1213 pUserInfoBegin = pAuthority;
1214 pUserInfoEnd = p1;
1215 pHostPortBegin = p1 + 1;
1216 pHostPortEnd = pPos;
1219 else
1221 pUserInfoBegin = pAuthority;
1222 pUserInfoEnd = pPos;
1224 else if (getSchemeInfo().m_bHost)
1226 pHostPortBegin = pAuthority;
1227 pHostPortEnd = pPos;
1229 else if (pPos != pAuthority)
1231 setInvalid();
1232 return false;
1234 break;
1238 if (pUserInfoBegin)
1240 Part ePart = PART_USER_PASSWORD;
1241 bool bSupportsPassword = getSchemeInfo().m_bPassword;
1242 bool bSupportsAuth
1243 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1244 bool bHasAuth = false;
1245 OUStringBuffer aSynUser;
1246 sal_Unicode const * p1 = pUserInfoBegin;
1247 while (p1 < pUserInfoEnd)
1249 EscapeType eEscapeType;
1250 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1251 eMechanism, eCharset, eEscapeType);
1252 if (eEscapeType == ESCAPE_NO)
1254 if (nUTF32 == ':' && bSupportsPassword)
1256 bHasAuth = true;
1257 break;
1259 else if (nUTF32 == ';' && bSupportsAuth
1260 && pUserInfoEnd - p1
1261 > RTL_CONSTASCII_LENGTH("auth=")
1262 && INetMIME::equalIgnoreCase(
1264 p1 + RTL_CONSTASCII_LENGTH("auth="),
1265 "auth="))
1267 p1 += RTL_CONSTASCII_LENGTH("auth=");
1268 bHasAuth = true;
1269 break;
1272 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets, ePart,
1273 eCharset, false);
1275 m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1276 aSynAbsURIRef.getLength());
1277 if (bHasAuth)
1279 if (bSupportsPassword)
1281 aSynAbsURIRef.append(':');
1282 OUStringBuffer aSynAuth;
1283 while (p1 < pUserInfoEnd)
1285 EscapeType eEscapeType;
1286 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1287 eMechanism, eCharset,
1288 eEscapeType);
1289 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1290 ePart, eCharset, false);
1292 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1293 aSynAbsURIRef.getLength());
1295 else
1297 aSynAbsURIRef.append(";AUTH=");
1298 OUStringBuffer aSynAuth;
1299 while (p1 < pUserInfoEnd)
1301 EscapeType eEscapeType;
1302 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1303 eMechanism, eCharset,
1304 eEscapeType);
1305 if (!INetMIME::isIMAPAtomChar(nUTF32))
1307 setInvalid();
1308 return false;
1310 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1311 ePart, eCharset, false);
1313 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1314 aSynAbsURIRef.getLength());
1317 if (pHostPortBegin)
1318 aSynAbsURIRef.append('@');
1321 if (pHostPortBegin)
1323 sal_Unicode const * pPort = pHostPortEnd;
1324 if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1326 sal_Unicode const * p1 = pHostPortEnd - 1;
1327 while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
1328 --p1;
1329 if (*p1 == ':')
1330 pPort = p1;
1332 bool bNetBiosName = false;
1333 switch (m_eScheme)
1335 case INetProtocol::File:
1336 // If the host equals "LOCALHOST" (unencoded and ignoring
1337 // case), turn it into an empty host:
1338 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1339 "localhost"))
1340 pHostPortBegin = pPort;
1341 bNetBiosName = true;
1342 break;
1344 case INetProtocol::Ldap:
1345 case INetProtocol::Smb:
1346 if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1348 setInvalid();
1349 return false;
1351 break;
1352 default:
1353 if (pHostPortBegin == pPort)
1355 setInvalid();
1356 return false;
1358 break;
1360 OUStringBuffer aSynHost;
1361 if (!parseHostOrNetBiosName(
1362 pHostPortBegin, pPort, bOctets, eMechanism, eCharset,
1363 bNetBiosName, &aSynHost))
1365 setInvalid();
1366 return false;
1368 m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1369 aSynAbsURIRef.getLength());
1370 if (pPort != pHostPortEnd)
1372 aSynAbsURIRef.append(':');
1373 m_aPort.set(aSynAbsURIRef,
1374 OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1375 aSynAbsURIRef.getLength());
1380 // Parse <path>
1381 OUStringBuffer aSynPath;
1382 if (!parsePath(m_eScheme, &pPos, pEnd, bOctets, eMechanism, eCharset,
1383 bSkippedInitialSlash, nSegmentDelimiter,
1384 nAltSegmentDelimiter,
1385 getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1386 nFragmentDelimiter, aSynPath))
1388 setInvalid();
1389 return false;
1391 m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1392 aSynAbsURIRef.getLength());
1394 // Parse ?<query>
1395 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1397 aSynAbsURIRef.append('?');
1398 OUStringBuffer aSynQuery;
1399 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1401 EscapeType eEscapeType;
1402 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
1403 eMechanism, eCharset, eEscapeType);
1404 appendUCS4(aSynQuery, nUTF32, eEscapeType, bOctets,
1405 PART_URIC, eCharset, true);
1407 m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1408 aSynAbsURIRef.getLength());
1411 // Parse #<fragment>
1412 if (pPos < pEnd && *pPos == nFragmentDelimiter)
1414 aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1415 OUStringBuffer aSynFragment;
1416 for (++pPos; pPos < pEnd;)
1418 EscapeType eEscapeType;
1419 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
1420 eMechanism, eCharset, eEscapeType);
1421 appendUCS4(aSynFragment, nUTF32, eEscapeType, bOctets, PART_URIC,
1422 eCharset, true);
1424 m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1425 aSynAbsURIRef.getLength());
1428 if (pPos != pEnd)
1430 setInvalid();
1431 return false;
1434 m_aAbsURIRef = aSynAbsURIRef;
1436 // At this point references of type "\\server\paths" have
1437 // been converted to file:://server/path".
1438 #ifdef LINUX
1439 if (m_eScheme==INetProtocol::File && !m_aHost.isEmpty()) {
1440 // Change "file:://server/path" URIs to "smb:://server/path" on
1441 // Linux
1442 // Leave "file::path" URIs unchanged.
1443 changeScheme(INetProtocol::Smb);
1445 #endif
1447 #ifdef WIN
1448 if (m_eScheme==INetProtocol::Smb) {
1449 // Change "smb://server/path" URIs to "file://server/path"
1450 // URIs on Windows, since Windows doesn't understand the
1451 // SMB scheme.
1452 changeScheme(INetProtocol::File);
1454 #endif
1456 return true;
1459 void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
1460 OUString aTmpStr=m_aAbsURIRef.makeStringAndClear();
1461 int oldSchemeLen=strlen(getSchemeInfo().m_pScheme);
1462 m_eScheme=eTargetScheme;
1463 int newSchemeLen=strlen(getSchemeInfo().m_pScheme);
1464 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1465 m_aAbsURIRef.append(aTmpStr.getStr()+oldSchemeLen);
1466 int delta=newSchemeLen-oldSchemeLen;
1467 m_aUser+=delta;
1468 m_aAuth+=delta;
1469 m_aHost+=delta;
1470 m_aPort+=delta;
1471 m_aPath+=delta;
1472 m_aQuery+=delta;
1473 m_aFragment+=delta;
1476 bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
1477 bool bOctets,
1478 INetURLObject & rTheAbsURIRef,
1479 bool & rWasAbsolute,
1480 EncodeMechanism eMechanism,
1481 rtl_TextEncoding eCharset,
1482 bool bIgnoreFragment, bool bSmart,
1483 bool bRelativeNonURIs, FSysStyle eStyle)
1484 const
1486 sal_Unicode const * p = rTheRelURIRef.getStr();
1487 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1489 sal_Unicode const * pPrefixBegin = p;
1490 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1491 bool hasScheme = pPrefix != 0;
1492 if (!hasScheme) {
1493 pPrefixBegin = p;
1494 hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
1497 sal_uInt32 nSegmentDelimiter = '/';
1498 sal_uInt32 nQueryDelimiter
1499 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1500 sal_uInt32 nFragmentDelimiter = '#';
1501 Part ePart = PART_VISIBLE;
1503 if (!hasScheme && bSmart)
1505 // If the input matches any of the following productions (for which
1506 // the appropriate style bit is set in eStyle), it is assumed to be an
1507 // absolute file system path, rather than a relative URI reference.
1508 // (This is only a subset of the productions used for scheme detection
1509 // in INetURLObject::setAbsURIRef(), because most of those productions
1510 // interfere with the syntax of relative URI references.) The
1511 // productions use the auxiliary rules
1513 // domain = label *("." label)
1514 // label = alphanum [*(alphanum / "-") alphanum]
1515 // alphanum = ALPHA / DIGIT
1516 // UCS4 = <any UCS4 character>
1518 // 1st Production (UNC file; FSYS_DOS only):
1519 // "\\" domain ["\" *UCS4]
1520 // 2nd Production (Unix-like DOS file; FSYS_DOS only):
1521 // ALPHA ":" ["/" *UCS4]
1522 // 3rd Production (DOS file; FSYS_DOS only):
1523 // ALPHA ":" ["\" *UCS4]
1524 if (eStyle & FSYS_DOS)
1526 bool bFSys = false;
1527 sal_Unicode const * q = p;
1528 if (pEnd - q >= 2
1529 && rtl::isAsciiAlpha(q[0])
1530 && q[1] == ':'
1531 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1532 bFSys = true; // 2nd, 3rd
1533 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1535 q += 2;
1536 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1537 q, pEnd - q, '\\');
1538 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1539 if (parseHostOrNetBiosName(
1540 q, qe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
1541 true, NULL))
1543 bFSys = true; // 1st
1546 if (bFSys)
1548 INetURLObject aNewURI;
1549 aNewURI.setAbsURIRef(rTheRelURIRef, bOctets, eMechanism,
1550 eCharset, true, eStyle);
1551 if (!aNewURI.HasError())
1553 rTheAbsURIRef = aNewURI;
1554 rWasAbsolute = true;
1555 return true;
1560 // When the base URL is a file URL, accept relative file system paths
1561 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1562 // and "#"), as well as relative URIs using "/" as delimiter:
1563 if (m_eScheme == INetProtocol::File)
1564 switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1566 case FSYS_UNX:
1567 nSegmentDelimiter = '/';
1568 break;
1570 case FSYS_DOS:
1571 nSegmentDelimiter = '\\';
1572 bRelativeNonURIs = true;
1573 break;
1575 default:
1576 OSL_FAIL("INetURLObject::convertRelToAbs():"
1577 " Bad guessFSysStyleByCounting");
1578 break;
1581 if (bRelativeNonURIs)
1583 eMechanism = ENCODE_ALL;
1584 nQueryDelimiter = 0x80000000;
1585 nFragmentDelimiter = 0x80000000;
1586 ePart = PART_VISIBLE_NONSPECIAL;
1590 // If the relative URI has the same scheme as the base URI, and that
1591 // scheme is hierarchical, then ignore its presence in the relative
1592 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1593 // step 3):
1594 if (pPrefix && pPrefix->m_eScheme == m_eScheme
1595 && getSchemeInfo().m_bHierarchical)
1597 hasScheme = false;
1598 while (p != pEnd && *p++ != ':') ;
1600 rWasAbsolute = hasScheme;
1602 // Fast solution for non-relative URIs:
1603 if (hasScheme)
1605 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1606 if (aNewURI.HasError())
1608 rWasAbsolute = false;
1609 return false;
1612 if (bIgnoreFragment)
1613 aNewURI.clearFragment();
1614 rTheAbsURIRef = aNewURI;
1615 return true;
1618 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1619 STATE_DONE };
1621 OUStringBuffer aSynAbsURIRef;
1622 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1623 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1624 if (m_eScheme != INetProtocol::Generic)
1626 aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1628 else
1630 sal_Unicode const * pSchemeBegin
1631 = m_aAbsURIRef.getStr();
1632 sal_Unicode const * pSchemeEnd = pSchemeBegin;
1633 while (pSchemeEnd[0] != ':')
1635 ++pSchemeEnd;
1637 aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1639 aSynAbsURIRef.append(':');
1641 State eState = STATE_AUTH;
1642 bool bSameDoc = true;
1644 if (getSchemeInfo().m_bAuthority)
1646 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1648 aSynAbsURIRef.append("//");
1649 p += 2;
1650 eState = STATE_ABS_PATH;
1651 bSameDoc = false;
1652 while (p != pEnd)
1654 EscapeType eEscapeType;
1655 sal_uInt32 nUTF32
1656 = getUTF32(p, pEnd, bOctets, eMechanism,
1657 eCharset, eEscapeType);
1658 if (eEscapeType == ESCAPE_NO)
1660 if (nUTF32 == nSegmentDelimiter)
1661 break;
1662 else if (nUTF32 == nFragmentDelimiter)
1664 eState = STATE_FRAGMENT;
1665 break;
1668 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1669 PART_VISIBLE, eCharset, true);
1672 else
1674 SubString aAuthority(getAuthority());
1675 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1676 + aAuthority.getBegin(),
1677 aAuthority.getLength());
1681 if (eState == STATE_AUTH)
1683 if (p == pEnd)
1684 eState = STATE_DONE;
1685 else if (*p == nFragmentDelimiter)
1687 ++p;
1688 eState = STATE_FRAGMENT;
1690 else if (*p == nSegmentDelimiter)
1692 ++p;
1693 eState = STATE_ABS_PATH;
1694 bSameDoc = false;
1696 else
1698 eState = STATE_REL_PATH;
1699 bSameDoc = false;
1703 if (eState == STATE_ABS_PATH)
1705 aSynAbsURIRef.append('/');
1706 eState = STATE_DONE;
1707 while (p != pEnd)
1709 EscapeType eEscapeType;
1710 sal_uInt32 nUTF32
1711 = getUTF32(p, pEnd, bOctets, eMechanism, eCharset, eEscapeType);
1712 if (eEscapeType == ESCAPE_NO)
1714 if (nUTF32 == nFragmentDelimiter)
1716 eState = STATE_FRAGMENT;
1717 break;
1719 else if (nUTF32 == nSegmentDelimiter)
1720 nUTF32 = '/';
1722 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1723 eCharset, true);
1726 else if (eState == STATE_REL_PATH)
1728 if (!getSchemeInfo().m_bHierarchical)
1730 // Detect cases where a relative input could not be made absolute
1731 // because the given base URL is broken (most probably because it is
1732 // empty):
1733 SAL_WARN_IF(
1734 HasError(), "tools.urlobj",
1735 "cannot make <" << rTheRelURIRef
1736 << "> absolute against broken base <"
1737 << GetMainURL(NO_DECODE) << ">");
1738 rWasAbsolute = false;
1739 return false;
1742 sal_Unicode const * pBasePathBegin
1743 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1744 sal_Unicode const * pBasePathEnd
1745 = pBasePathBegin + m_aPath.getLength();
1746 while (pBasePathEnd != pBasePathBegin)
1747 if (*(--pBasePathEnd) == '/')
1749 ++pBasePathEnd;
1750 break;
1753 sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1754 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1755 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1756 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1757 "INetURLObject::convertRelToAbs(): Bad base path");
1759 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1761 if (*p == '.')
1763 if (pEnd - p == 1
1764 || p[1] == nSegmentDelimiter
1765 || p[1] == nQueryDelimiter
1766 || p[1] == nFragmentDelimiter)
1768 ++p;
1769 if (p != pEnd && *p == nSegmentDelimiter)
1770 ++p;
1771 continue;
1773 else if (pEnd - p >= 2
1774 && p[1] == '.'
1775 && (pEnd - p == 2
1776 || p[2] == nSegmentDelimiter
1777 || p[2] == nQueryDelimiter
1778 || p[2] == nFragmentDelimiter)
1779 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1781 p += 2;
1782 if (p != pEnd && *p == nSegmentDelimiter)
1783 ++p;
1785 sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1786 while (i > nPathBegin && aSynAbsURIRef[i] != '/')
1787 --i;
1788 aSynAbsURIRef.setLength(i + 1);
1789 DBG_ASSERT(
1790 aSynAbsURIRef.getLength() > nPathBegin
1791 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1792 "INetURLObject::convertRelToAbs(): Bad base path");
1793 continue;
1797 while (p != pEnd
1798 && *p != nSegmentDelimiter
1799 && *p != nQueryDelimiter
1800 && *p != nFragmentDelimiter)
1802 EscapeType eEscapeType;
1803 sal_uInt32 nUTF32
1804 = getUTF32(p, pEnd, bOctets, eMechanism,
1805 eCharset, eEscapeType);
1806 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1807 eCharset, true);
1809 if (p != pEnd && *p == nSegmentDelimiter)
1811 aSynAbsURIRef.append('/');
1812 ++p;
1816 while (p != pEnd && *p != nFragmentDelimiter)
1818 EscapeType eEscapeType;
1819 sal_uInt32 nUTF32
1820 = getUTF32(p, pEnd, bOctets, eMechanism, eCharset, eEscapeType);
1821 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1822 eCharset, true);
1825 if (p == pEnd)
1826 eState = STATE_DONE;
1827 else
1829 ++p;
1830 eState = STATE_FRAGMENT;
1833 else if (bSameDoc)
1835 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1836 m_aPath.getLength());
1837 if (m_aQuery.isPresent())
1838 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1839 + m_aQuery.getBegin() - 1,
1840 m_aQuery.getLength() + 1);
1843 if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1845 aSynAbsURIRef.append('#');
1846 while (p != pEnd)
1848 EscapeType eEscapeType;
1849 sal_uInt32 nUTF32
1850 = getUTF32(p, pEnd, bOctets, eMechanism, eCharset, eEscapeType);
1851 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1852 PART_VISIBLE, eCharset, true);
1856 INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1857 if (aNewURI.HasError())
1859 // Detect cases where a relative input could not be made absolute
1860 // because the given base URL is broken (most probably because it is
1861 // empty):
1862 SAL_WARN_IF(
1863 HasError(), "tools.urlobj",
1864 "cannot make <" << rTheRelURIRef
1865 << "> absolute against broken base <" << GetMainURL(NO_DECODE)
1866 << ">");
1867 rWasAbsolute = false;
1868 return false;
1871 rTheAbsURIRef = aNewURI;
1872 return true;
1875 bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef,
1876 bool bOctets, OUString & rTheRelURIRef,
1877 EncodeMechanism eEncodeMechanism,
1878 DecodeMechanism eDecodeMechanism,
1879 rtl_TextEncoding eCharset,
1880 FSysStyle eStyle) const
1882 // Check for hierarchical base URL:
1883 if (!getSchemeInfo().m_bHierarchical)
1885 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1886 return false;
1889 // Convert the input (absolute or relative URI ref) to an absolute URI
1890 // ref:
1891 INetURLObject aSubject;
1892 bool bWasAbsolute;
1893 if (!convertRelToAbs(rTheAbsURIRef, bOctets, aSubject, bWasAbsolute,
1894 eEncodeMechanism, eCharset, false, false, false,
1895 eStyle))
1897 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1898 return false;
1901 // Check for differing scheme or authority parts:
1902 if ((m_aScheme.compare(
1903 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1904 != 0)
1905 || (m_aUser.compare(
1906 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1907 != 0)
1908 || (m_aAuth.compare(
1909 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1910 != 0)
1911 || (m_aHost.compare(
1912 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1913 != 0)
1914 || (m_aPort.compare(
1915 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1916 != 0))
1918 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1919 return false;
1922 sal_Unicode const * pBasePathBegin
1923 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1924 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1925 sal_Unicode const * pSubjectPathBegin
1926 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1927 sal_Unicode const * pSubjectPathEnd
1928 = pSubjectPathBegin + aSubject.m_aPath.getLength();
1930 // Make nMatch point past the last matching slash, or past the end of the
1931 // paths, in case they are equal:
1932 sal_Unicode const * pSlash = 0;
1933 sal_Unicode const * p1 = pBasePathBegin;
1934 sal_Unicode const * p2 = pSubjectPathBegin;
1935 for (;;)
1937 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1939 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1940 pSlash = p1;
1941 break;
1944 sal_Unicode c = *p1++;
1945 if (c != *p2++)
1946 break;
1947 if (c == '/')
1948 pSlash = p1;
1950 if (!pSlash)
1952 // One of the paths does not start with '/':
1953 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1954 return false;
1956 sal_Int32 nMatch = pSlash - pBasePathBegin;
1958 // If the two URLs are DOS file URLs starting with different volumes
1959 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1960 // relative (it could be, but some people do not like that):
1961 if (m_eScheme == INetProtocol::File
1962 && nMatch <= 1
1963 && hasDosVolume(eStyle)
1964 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1966 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1967 return false;
1970 // For every slash in the base path after nMatch, a prefix of "../" is
1971 // added to the new relative URL (if the common prefix of the two paths is
1972 // only "/"---but see handling of file URLs above---, the complete subject
1973 // path could go into the new relative URL instead, but some people don't
1974 // like that):
1975 OUStringBuffer aSynRelURIRef;
1976 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1977 ++p)
1979 if (*p == '/')
1980 aSynRelURIRef.append("../");
1983 // If the new relative URL would start with "//" (i.e., it would be
1984 // mistaken for a relative URL starting with an authority part), or if the
1985 // new relative URL would neither be empty nor start with <"/"> nor start
1986 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1987 // with a scheme part), then the new relative URL is prefixed with "./":
1988 if (aSynRelURIRef.isEmpty())
1990 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1991 && pSubjectPathBegin[nMatch] == '/'
1992 && pSubjectPathBegin[nMatch + 1] == '/')
1994 aSynRelURIRef.append("./");
1996 else
1998 for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
1999 p != pSubjectPathEnd && *p != '/'; ++p)
2001 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
2003 aSynRelURIRef.append("./");
2004 break;
2010 // The remainder of the subject path, starting at nMatch, is appended to
2011 // the new relative URL:
2012 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2013 eDecodeMechanism, eCharset));
2015 // If the subject has defined query or fragment parts, they are appended
2016 // to the new relative URL:
2017 if (aSubject.m_aQuery.isPresent())
2019 aSynRelURIRef.append('?');
2020 aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery,
2021 eDecodeMechanism, eCharset));
2023 if (aSubject.m_aFragment.isPresent())
2025 aSynRelURIRef.append('#');
2026 aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2027 eDecodeMechanism, eCharset));
2030 rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2031 return true;
2034 // static
2035 bool INetURLObject::convertIntToExt(OUString const & rTheIntURIRef,
2036 bool bOctets, OUString & rTheExtURIRef,
2037 DecodeMechanism eDecodeMechanism,
2038 rtl_TextEncoding eCharset)
2040 OUString aSynExtURIRef(encodeText(rTheIntURIRef, bOctets, PART_VISIBLE,
2041 NOT_CANONIC, eCharset, true));
2042 sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2043 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2044 sal_Unicode const * p = pBegin;
2045 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2046 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL;
2047 if (bConvert)
2049 aSynExtURIRef =
2050 aSynExtURIRef.replaceAt(0, p - pBegin,
2051 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2053 rTheExtURIRef = decode(aSynExtURIRef, eDecodeMechanism, eCharset);
2054 return bConvert;
2057 // static
2058 bool INetURLObject::convertExtToInt(OUString const & rTheExtURIRef,
2059 bool bOctets, OUString & rTheIntURIRef,
2060 DecodeMechanism eDecodeMechanism,
2061 rtl_TextEncoding eCharset)
2063 OUString aSynIntURIRef(encodeText(rTheExtURIRef, bOctets, PART_VISIBLE,
2064 NOT_CANONIC, eCharset, true));
2065 sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2066 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2067 sal_Unicode const * p = pBegin;
2068 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2069 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL;
2070 if (bConvert)
2072 aSynIntURIRef =
2073 aSynIntURIRef.replaceAt(0, p - pBegin,
2074 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2076 rTheIntURIRef = decode(aSynIntURIRef, eDecodeMechanism, eCharset);
2077 return bConvert;
2080 // static
2081 INetURLObject::PrefixInfo const * INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2082 sal_Unicode const * pEnd)
2084 static PrefixInfo const aMap[]
2085 = { // dummy entry at front needed, because pLast may point here:
2086 { 0, 0, INetProtocol::NotValid, PrefixInfo::INTERNAL },
2087 { ".component:", "staroffice.component:", INetProtocol::Component,
2088 PrefixInfo::INTERNAL },
2089 { ".uno:", "staroffice.uno:", INetProtocol::Uno,
2090 PrefixInfo::INTERNAL },
2091 { "cid:", 0, INetProtocol::Cid, PrefixInfo::OFFICIAL },
2092 { "data:", 0, INetProtocol::Data, PrefixInfo::OFFICIAL },
2093 { "db:", "staroffice.db:", INetProtocol::Db, PrefixInfo::INTERNAL },
2094 { "file:", 0, INetProtocol::File, PrefixInfo::OFFICIAL },
2095 { "ftp:", 0, INetProtocol::Ftp, PrefixInfo::OFFICIAL },
2096 { "hid:", "staroffice.hid:", INetProtocol::Hid,
2097 PrefixInfo::INTERNAL },
2098 { "http:", 0, INetProtocol::Http, PrefixInfo::OFFICIAL },
2099 { "https:", 0, INetProtocol::Https, PrefixInfo::OFFICIAL },
2100 { "javascript:", 0, INetProtocol::Javascript, PrefixInfo::OFFICIAL },
2101 { "ldap:", 0, INetProtocol::Ldap, PrefixInfo::OFFICIAL },
2102 { "macro:", "staroffice.macro:", INetProtocol::Macro,
2103 PrefixInfo::INTERNAL },
2104 { "mailto:", 0, INetProtocol::Mailto, PrefixInfo::OFFICIAL },
2105 { "private:", "staroffice.private:", INetProtocol::PrivSoffice,
2106 PrefixInfo::INTERNAL },
2107 { "private:factory/", "staroffice.factory:",
2108 INetProtocol::PrivSoffice, PrefixInfo::INTERNAL },
2109 { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice,
2110 PrefixInfo::INTERNAL },
2111 { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice,
2112 PrefixInfo::INTERNAL },
2113 { "private:searchfolder:", "staroffice.searchfolder:",
2114 INetProtocol::PrivSoffice, PrefixInfo::INTERNAL },
2115 { "private:trashcan:", "staroffice.trashcan:",
2116 INetProtocol::PrivSoffice, PrefixInfo::INTERNAL },
2117 { "sftp:", 0, INetProtocol::Sftp, PrefixInfo::OFFICIAL },
2118 { "slot:", "staroffice.slot:", INetProtocol::Slot,
2119 PrefixInfo::INTERNAL },
2120 { "smb:", 0, INetProtocol::Smb, PrefixInfo::OFFICIAL },
2121 { "staroffice.component:", ".component:", INetProtocol::Component,
2122 PrefixInfo::EXTERNAL },
2123 { "staroffice.db:", "db:", INetProtocol::Db, PrefixInfo::EXTERNAL },
2124 { "staroffice.factory:", "private:factory/",
2125 INetProtocol::PrivSoffice, PrefixInfo::EXTERNAL },
2126 { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice,
2127 PrefixInfo::EXTERNAL },
2128 { "staroffice.hid:", "hid:", INetProtocol::Hid,
2129 PrefixInfo::EXTERNAL },
2130 { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice,
2131 PrefixInfo::EXTERNAL },
2132 { "staroffice.macro:", "macro:", INetProtocol::Macro,
2133 PrefixInfo::EXTERNAL },
2134 { "staroffice.private:", "private:", INetProtocol::PrivSoffice,
2135 PrefixInfo::EXTERNAL },
2136 { "staroffice.searchfolder:", "private:searchfolder:",
2137 INetProtocol::PrivSoffice, PrefixInfo::EXTERNAL },
2138 { "staroffice.slot:", "slot:", INetProtocol::Slot,
2139 PrefixInfo::EXTERNAL },
2140 { "staroffice.trashcan:", "private:trashcan:",
2141 INetProtocol::PrivSoffice, PrefixInfo::EXTERNAL },
2142 { "staroffice.uno:", ".uno:", INetProtocol::Uno,
2143 PrefixInfo::EXTERNAL },
2144 { "staroffice:", "private:", INetProtocol::PrivSoffice,
2145 PrefixInfo::EXTERNAL },
2146 { "telnet:", 0, INetProtocol::Telnet, PrefixInfo::OFFICIAL },
2147 { "vnd.libreoffice.cmis:", 0, INetProtocol::Cmis, PrefixInfo::INTERNAL },
2148 { "vnd.sun.star.cmd:", 0, INetProtocol::VndSunStarCmd,
2149 PrefixInfo::OFFICIAL },
2150 { "vnd.sun.star.expand:", 0, INetProtocol::VndSunStarExpand,
2151 PrefixInfo::OFFICIAL },
2152 { "vnd.sun.star.help:", 0, INetProtocol::VndSunStarHelp,
2153 PrefixInfo::OFFICIAL },
2154 { "vnd.sun.star.hier:", 0, INetProtocol::VndSunStarHier,
2155 PrefixInfo::OFFICIAL },
2156 { "vnd.sun.star.pkg:", 0, INetProtocol::VndSunStarPkg,
2157 PrefixInfo::OFFICIAL },
2158 { "vnd.sun.star.tdoc:", 0, INetProtocol::VndSunStarTdoc,
2159 PrefixInfo::OFFICIAL },
2160 { "vnd.sun.star.webdav:", 0, INetProtocol::VndSunStarWebdav,
2161 PrefixInfo::OFFICIAL } };
2162 /* This list needs to be sorted, or you'll introduce serious bugs */
2164 PrefixInfo const * pFirst = aMap + 1;
2165 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2166 PrefixInfo const * pMatch = 0;
2167 sal_Unicode const * pMatched = rBegin;
2168 sal_Unicode const * p = rBegin;
2169 sal_Int32 i = 0;
2170 for (; pFirst < pLast; ++i)
2172 if (pFirst->m_pPrefix[i] == '\0')
2174 pMatch = pFirst++;
2175 pMatched = p;
2177 if (p >= pEnd)
2178 break;
2179 sal_uInt32 nChar = rtl::toAsciiLowerCase(*p++);
2180 while (pFirst <= pLast && static_cast<unsigned char>(pFirst->m_pPrefix[i]) < nChar)
2181 ++pFirst;
2182 while (pFirst <= pLast && static_cast<unsigned char>(pLast->m_pPrefix[i]) > nChar)
2183 --pLast;
2185 if (pFirst == pLast)
2187 sal_Char const * q = pFirst->m_pPrefix + i;
2188 while (p < pEnd && *q != '\0'
2189 && rtl::toAsciiLowerCase(*p) == static_cast<unsigned char>(*q))
2191 ++p;
2192 ++q;
2194 if (*q == '\0')
2196 rBegin = p;
2197 return pFirst;
2200 rBegin = pMatched;
2201 return pMatch;
2204 sal_Int32 INetURLObject::getAuthorityBegin() const
2206 DBG_ASSERT(getSchemeInfo().m_bAuthority,
2207 "INetURLObject::getAuthority(): Bad scheme");
2208 sal_Int32 nBegin;
2209 if (m_aUser.isPresent())
2210 nBegin = m_aUser.getBegin();
2211 else if (m_aHost.isPresent())
2212 nBegin = m_aHost.getBegin();
2213 else
2214 nBegin = m_aPath.getBegin();
2215 nBegin -= RTL_CONSTASCII_LENGTH("//");
2216 DBG_ASSERT(m_aAbsURIRef[nBegin] == '/' && m_aAbsURIRef[nBegin + 1] == '/',
2217 "INetURLObject::getAuthority(): Bad authority");
2218 return nBegin;
2221 INetURLObject::SubString INetURLObject::getAuthority() const
2223 sal_Int32 nBegin = getAuthorityBegin();
2224 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2225 m_aHost.isPresent() ? m_aHost.getEnd() :
2226 m_aAuth.isPresent() ? m_aAuth.getEnd() :
2227 m_aUser.isPresent() ? m_aUser.getEnd() :
2228 nBegin + RTL_CONSTASCII_LENGTH("//");
2229 return SubString(nBegin, nEnd - nBegin);
2232 bool INetURLObject::setUser(OUString const & rTheUser,
2233 bool bOctets, EncodeMechanism eMechanism,
2234 rtl_TextEncoding eCharset)
2236 if (
2237 !getSchemeInfo().m_bUser
2240 return false;
2243 OUString aNewUser(encodeText(rTheUser, bOctets, PART_USER_PASSWORD,
2244 eMechanism, eCharset, false));
2245 sal_Int32 nDelta;
2246 if (m_aUser.isPresent())
2247 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2248 else if (m_aHost.isPresent())
2250 m_aAbsURIRef.insert(m_aHost.getBegin(), sal_Unicode('@'));
2251 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2253 else if (getSchemeInfo().m_bHost)
2254 return false;
2255 else
2256 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2257 m_aAuth += nDelta;
2258 m_aHost += nDelta;
2259 m_aPort += nDelta;
2260 m_aPath += nDelta;
2261 m_aQuery += nDelta;
2262 m_aFragment += nDelta;
2263 return true;
2266 namespace
2268 void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2270 OUString sTemp(rBuf.makeStringAndClear());
2271 rBuf.append(sTemp.replaceAt(index, count, OUString()));
2275 bool INetURLObject::clearPassword()
2277 if (!getSchemeInfo().m_bPassword)
2278 return false;
2279 if (m_aAuth.isPresent())
2281 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2282 m_aAuth.getLength() + 1);
2283 sal_Int32 nDelta = m_aAuth.clear() - 1;
2284 m_aHost += nDelta;
2285 m_aPort += nDelta;
2286 m_aPath += nDelta;
2287 m_aQuery += nDelta;
2288 m_aFragment += nDelta;
2290 return true;
2293 bool INetURLObject::setPassword(OUString const & rThePassword,
2294 bool bOctets, EncodeMechanism eMechanism,
2295 rtl_TextEncoding eCharset)
2297 if (!getSchemeInfo().m_bPassword)
2298 return false;
2299 OUString aNewAuth(encodeText(rThePassword, bOctets, PART_USER_PASSWORD,
2300 eMechanism, eCharset, false));
2301 sal_Int32 nDelta;
2302 if (m_aAuth.isPresent())
2303 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2304 else if (m_aUser.isPresent())
2306 m_aAbsURIRef.insert(m_aUser.getEnd(), sal_Unicode(':'));
2307 nDelta
2308 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2310 else if (m_aHost.isPresent())
2312 m_aAbsURIRef.insert(m_aHost.getBegin(),
2313 OUString( ":@" ));
2314 m_aUser.set(m_aAbsURIRef, OUString(), m_aHost.getBegin());
2315 nDelta
2316 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2318 else if (getSchemeInfo().m_bHost)
2319 return false;
2320 else
2322 m_aAbsURIRef.insert(m_aPath.getBegin(), sal_Unicode(':'));
2323 m_aUser.set(m_aAbsURIRef, OUString(), m_aPath.getBegin());
2324 nDelta
2325 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2327 m_aHost += nDelta;
2328 m_aPort += nDelta;
2329 m_aPath += nDelta;
2330 m_aQuery += nDelta;
2331 m_aFragment += nDelta;
2332 return true;
2335 // static
2336 bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2337 OUString & rCanonic)
2339 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2340 // IPv4 address directly follows the abbreviating "::". The ABNF in
2341 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2342 // mentions "::13:1.68.3". This algorithm accepts both variants:
2343 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2344 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2345 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2346 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2347 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2348 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2349 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2350 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2351 OUStringBuffer aTheCanonic;
2352 sal_uInt32 nNumber = 0;
2353 int nDigits = 0;
2354 int nOctets = 0;
2355 State eState = STATE_INITIAL;
2356 sal_Unicode const * p = rBegin;
2357 for (; p != pEnd; ++p)
2358 switch (eState)
2360 case STATE_INITIAL:
2361 if (*p == '[')
2363 aTheCanonic.append('[');
2364 eState = STATE_IP6;
2366 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2367 eState = STATE_TOPLABEL;
2368 else if (rtl::isAsciiDigit(*p))
2370 nNumber = INetMIME::getWeight(*p);
2371 nDigits = 1;
2372 nOctets = 1;
2373 eState = STATE_IP4;
2375 else
2376 goto done;
2377 break;
2379 case STATE_LABEL:
2380 if (*p == '.')
2381 eState = STATE_LABEL_DOT;
2382 else if (*p == '-')
2383 eState = STATE_LABEL_HYPHEN;
2384 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2385 goto done;
2386 break;
2388 case STATE_LABEL_HYPHEN:
2389 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2390 eState = STATE_LABEL;
2391 else if (*p != '-')
2392 goto done;
2393 break;
2395 case STATE_LABEL_DOT:
2396 if (rtl::isAsciiAlpha(*p) || *p == '_')
2397 eState = STATE_TOPLABEL;
2398 else if (rtl::isAsciiDigit(*p))
2399 eState = STATE_LABEL;
2400 else
2401 goto done;
2402 break;
2404 case STATE_TOPLABEL:
2405 if (*p == '.')
2406 eState = STATE_TOPLABEL_DOT;
2407 else if (*p == '-')
2408 eState = STATE_TOPLABEL_HYPHEN;
2409 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2410 goto done;
2411 break;
2413 case STATE_TOPLABEL_HYPHEN:
2414 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2415 eState = STATE_TOPLABEL;
2416 else if (*p != '-')
2417 goto done;
2418 break;
2420 case STATE_TOPLABEL_DOT:
2421 if (rtl::isAsciiAlpha(*p) || *p == '_')
2422 eState = STATE_TOPLABEL;
2423 else if (rtl::isAsciiDigit(*p))
2424 eState = STATE_LABEL;
2425 else
2426 goto done;
2427 break;
2429 case STATE_IP4:
2430 if (*p == '.')
2431 if (nOctets < 4)
2433 aTheCanonic.append( OUString::number(nNumber) );
2434 aTheCanonic.append( '.' );
2435 ++nOctets;
2436 eState = STATE_IP4_DOT;
2438 else
2439 eState = STATE_LABEL_DOT;
2440 else if (*p == '-')
2441 eState = STATE_LABEL_HYPHEN;
2442 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2443 eState = STATE_LABEL;
2444 else if (rtl::isAsciiDigit(*p))
2445 if (nDigits < 3)
2447 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2448 ++nDigits;
2450 else
2451 eState = STATE_LABEL;
2452 else
2453 goto done;
2454 break;
2456 case STATE_IP4_DOT:
2457 if (rtl::isAsciiAlpha(*p) || *p == '_')
2458 eState = STATE_TOPLABEL;
2459 else if (rtl::isAsciiDigit(*p))
2461 nNumber = INetMIME::getWeight(*p);
2462 nDigits = 1;
2463 eState = STATE_IP4;
2465 else
2466 goto done;
2467 break;
2469 case STATE_IP6:
2470 if (*p == ':')
2471 eState = STATE_IP6_COLON;
2472 else if (rtl::isAsciiHexDigit(*p))
2474 nNumber = INetMIME::getHexWeight(*p);
2475 nDigits = 1;
2476 eState = STATE_IP6_HEXSEQ1;
2478 else
2479 goto done;
2480 break;
2482 case STATE_IP6_COLON:
2483 if (*p == ':')
2485 aTheCanonic.append("::");
2486 eState = STATE_IP6_2COLON;
2488 else
2489 goto done;
2490 break;
2492 case STATE_IP6_2COLON:
2493 if (*p == ']')
2494 eState = STATE_IP6_DONE;
2495 else if (*p == ':')
2497 aTheCanonic.append(':');
2498 eState = STATE_IP6_3COLON;
2500 else if (rtl::isAsciiDigit(*p))
2502 nNumber = INetMIME::getWeight(*p);
2503 nDigits = 1;
2504 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2506 else if (rtl::isAsciiHexDigit(*p))
2508 nNumber = INetMIME::getHexWeight(*p);
2509 nDigits = 1;
2510 eState = STATE_IP6_HEXSEQ2;
2512 else
2513 goto done;
2514 break;
2516 case STATE_IP6_3COLON:
2517 if (rtl::isAsciiDigit(*p))
2519 nNumber = INetMIME::getWeight(*p);
2520 nDigits = 1;
2521 nOctets = 1;
2522 eState = STATE_IP6_IP4;
2524 else
2525 goto done;
2526 break;
2528 case STATE_IP6_HEXSEQ1:
2529 if (*p == ']')
2531 aTheCanonic.append(
2532 OUString::number(nNumber, 16));
2533 eState = STATE_IP6_DONE;
2535 else if (*p == ':')
2537 aTheCanonic.append(
2538 OUString::number(nNumber, 16));
2539 aTheCanonic.append(':');
2540 eState = STATE_IP6_HEXSEQ1_COLON;
2542 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2544 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2545 ++nDigits;
2547 else
2548 goto done;
2549 break;
2551 case STATE_IP6_HEXSEQ1_COLON:
2552 if (*p == ':')
2554 aTheCanonic.append(':');
2555 eState = STATE_IP6_2COLON;
2557 else if (rtl::isAsciiDigit(*p))
2559 nNumber = INetMIME::getWeight(*p);
2560 nDigits = 1;
2561 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2563 else if (rtl::isAsciiHexDigit(*p))
2565 nNumber = INetMIME::getHexWeight(*p);
2566 nDigits = 1;
2567 eState = STATE_IP6_HEXSEQ1;
2569 else
2570 goto done;
2571 break;
2573 case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2574 if (*p == ']')
2576 aTheCanonic.append(
2577 OUString::number(nNumber, 16));
2578 eState = STATE_IP6_DONE;
2580 else if (*p == ':')
2582 aTheCanonic.append(
2583 OUString::number(nNumber, 16));
2584 aTheCanonic.append(':');
2585 eState = STATE_IP6_HEXSEQ1_COLON;
2587 else if (*p == '.')
2589 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2590 + (nNumber & 15);
2591 aTheCanonic.append(
2592 OUString::number(nNumber));
2593 aTheCanonic.append('.');
2594 nOctets = 2;
2595 eState = STATE_IP6_IP4_DOT;
2597 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2599 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2600 ++nDigits;
2602 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2604 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2605 ++nDigits;
2606 eState = STATE_IP6_HEXSEQ1;
2608 else
2609 goto done;
2610 break;
2612 case STATE_IP6_HEXSEQ2:
2613 if (*p == ']')
2615 aTheCanonic.append(
2616 OUString::number(nNumber, 16));
2617 eState = STATE_IP6_DONE;
2619 else if (*p == ':')
2621 aTheCanonic.append(
2622 OUString::number(nNumber, 16));
2623 aTheCanonic.append(':');
2624 eState = STATE_IP6_HEXSEQ2_COLON;
2626 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2628 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2629 ++nDigits;
2631 else
2632 goto done;
2633 break;
2635 case STATE_IP6_HEXSEQ2_COLON:
2636 if (rtl::isAsciiDigit(*p))
2638 nNumber = INetMIME::getWeight(*p);
2639 nDigits = 1;
2640 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2642 else if (rtl::isAsciiHexDigit(*p))
2644 nNumber = INetMIME::getHexWeight(*p);
2645 nDigits = 1;
2646 eState = STATE_IP6_HEXSEQ2;
2648 else
2649 goto done;
2650 break;
2652 case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2653 if (*p == ']')
2655 aTheCanonic.append(
2656 OUString::number(nNumber, 16));
2657 eState = STATE_IP6_DONE;
2659 else if (*p == ':')
2661 aTheCanonic.append(
2662 OUString::number(nNumber, 16));
2663 aTheCanonic.append(':');
2664 eState = STATE_IP6_HEXSEQ2_COLON;
2666 else if (*p == '.')
2668 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2669 + (nNumber & 15);
2670 aTheCanonic.append(
2671 OUString::number(nNumber));
2672 aTheCanonic.append('.');
2673 nOctets = 2;
2674 eState = STATE_IP6_IP4_DOT;
2676 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2678 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2679 ++nDigits;
2681 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2683 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2684 ++nDigits;
2685 eState = STATE_IP6_HEXSEQ2;
2687 else
2688 goto done;
2689 break;
2691 case STATE_IP6_IP4:
2692 if (*p == ']')
2693 if (nOctets == 4)
2695 aTheCanonic.append(
2696 OUString::number(nNumber));
2697 eState = STATE_IP6_DONE;
2699 else
2700 goto done;
2701 else if (*p == '.')
2702 if (nOctets < 4)
2704 aTheCanonic.append(
2705 OUString::number(nNumber));
2706 aTheCanonic.append('.');
2707 ++nOctets;
2708 eState = STATE_IP6_IP4_DOT;
2710 else
2711 goto done;
2712 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2714 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2715 ++nDigits;
2717 else
2718 goto done;
2719 break;
2721 case STATE_IP6_IP4_DOT:
2722 if (rtl::isAsciiDigit(*p))
2724 nNumber = INetMIME::getWeight(*p);
2725 nDigits = 1;
2726 eState = STATE_IP6_IP4;
2728 else
2729 goto done;
2730 break;
2732 case STATE_IP6_DONE:
2733 goto done;
2735 done:
2736 switch (eState)
2738 case STATE_LABEL:
2739 case STATE_TOPLABEL:
2740 case STATE_TOPLABEL_DOT:
2741 aTheCanonic.setLength(0);
2742 aTheCanonic.append(rBegin, p - rBegin);
2743 rBegin = p;
2744 rCanonic = aTheCanonic.makeStringAndClear();
2745 return true;
2747 case STATE_IP4:
2748 if (nOctets == 4)
2750 aTheCanonic.append(
2751 OUString::number(nNumber));
2752 rBegin = p;
2753 rCanonic = aTheCanonic.makeStringAndClear();
2754 return true;
2756 return false;
2758 case STATE_IP6_DONE:
2759 aTheCanonic.append(']');
2760 rBegin = p;
2761 rCanonic = aTheCanonic.makeStringAndClear();
2762 return true;
2764 default:
2765 return false;
2769 // static
2770 bool INetURLObject::parseHostOrNetBiosName(
2771 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
2772 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2773 OUStringBuffer* pCanonic)
2775 OUString aTheCanonic;
2776 if (pBegin < pEnd)
2778 sal_Unicode const * p = pBegin;
2779 if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2781 if (bNetBiosName)
2783 OUStringBuffer buf;
2784 while (pBegin < pEnd)
2786 EscapeType eEscapeType;
2787 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets,
2788 eMechanism, eCharset,
2789 eEscapeType);
2790 if (!INetMIME::isVisible(nUTF32))
2791 return false;
2792 if (!rtl::isAsciiAlphanumeric(nUTF32))
2793 switch (nUTF32)
2795 case '"':
2796 case '*':
2797 case '+':
2798 case ',':
2799 case '/':
2800 case ':':
2801 case ';':
2802 case '<':
2803 case '=':
2804 case '>':
2805 case '?':
2806 case '[':
2807 case '\\':
2808 case ']':
2809 case '`':
2810 case '|':
2811 return false;
2813 if (pCanonic != NULL) {
2814 appendUCS4(
2815 buf, nUTF32, eEscapeType, bOctets, PART_URIC,
2816 eCharset, true);
2819 aTheCanonic = buf.makeStringAndClear();
2821 else
2822 return false;
2825 if (pCanonic != NULL) {
2826 *pCanonic = aTheCanonic;
2828 return true;
2831 bool INetURLObject::setHost(OUString const & rTheHost, bool bOctets,
2832 EncodeMechanism eMechanism,
2833 rtl_TextEncoding eCharset)
2835 if (!getSchemeInfo().m_bHost)
2836 return false;
2837 OUStringBuffer aSynHost(rTheHost);
2838 bool bNetBiosName = false;
2839 switch (m_eScheme)
2841 case INetProtocol::File:
2843 OUString sTemp(aSynHost.toString());
2844 if (sTemp.equalsIgnoreAsciiCase("localhost"))
2846 aSynHost.setLength(0);
2848 bNetBiosName = true;
2850 break;
2851 case INetProtocol::Ldap:
2852 if (aSynHost.isEmpty() && m_aPort.isPresent())
2853 return false;
2854 break;
2856 default:
2857 if (aSynHost.isEmpty())
2858 return false;
2859 break;
2861 if (!parseHostOrNetBiosName(
2862 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2863 bOctets, eMechanism, eCharset, bNetBiosName, &aSynHost))
2864 return false;
2865 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2866 m_aPort += nDelta;
2867 m_aPath += nDelta;
2868 m_aQuery += nDelta;
2869 m_aFragment += nDelta;
2870 return true;
2873 // static
2874 bool INetURLObject::parsePath(INetProtocol eScheme,
2875 sal_Unicode const ** pBegin,
2876 sal_Unicode const * pEnd,
2877 bool bOctets,
2878 EncodeMechanism eMechanism,
2879 rtl_TextEncoding eCharset,
2880 bool bSkippedInitialSlash,
2881 sal_uInt32 nSegmentDelimiter,
2882 sal_uInt32 nAltSegmentDelimiter,
2883 sal_uInt32 nQueryDelimiter,
2884 sal_uInt32 nFragmentDelimiter,
2885 OUStringBuffer &rSynPath)
2887 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2889 sal_Unicode const * pPos = *pBegin;
2890 OUStringBuffer aTheSynPath;
2892 switch (eScheme)
2894 case INetProtocol::NotValid:
2895 return false;
2897 case INetProtocol::Ftp:
2898 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2899 return false;
2900 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2902 EscapeType eEscapeType;
2903 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
2904 eCharset, eEscapeType);
2905 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2906 PART_HTTP_PATH, eCharset, true);
2908 if (aTheSynPath.isEmpty())
2909 aTheSynPath.append('/');
2910 break;
2912 case INetProtocol::Http:
2913 case INetProtocol::VndSunStarWebdav:
2914 case INetProtocol::Https:
2915 case INetProtocol::Smb:
2916 case INetProtocol::Cmis:
2917 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2918 return false;
2919 while (pPos < pEnd && *pPos != nQueryDelimiter
2920 && *pPos != nFragmentDelimiter)
2922 EscapeType eEscapeType;
2923 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
2924 eCharset, eEscapeType);
2925 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2926 PART_HTTP_PATH, eCharset, true);
2928 if (aTheSynPath.isEmpty())
2929 aTheSynPath.append('/');
2930 break;
2932 case INetProtocol::File:
2934 if (bSkippedInitialSlash)
2935 aTheSynPath.append('/');
2936 else if (pPos < pEnd
2937 && *pPos != nSegmentDelimiter
2938 && *pPos != nAltSegmentDelimiter)
2939 return false;
2940 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2942 EscapeType eEscapeType;
2943 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
2944 eCharset, eEscapeType);
2945 if (eEscapeType == ESCAPE_NO)
2947 if (nUTF32 == nSegmentDelimiter
2948 || nUTF32 == nAltSegmentDelimiter)
2950 aTheSynPath.append('/');
2951 continue;
2953 else if (nUTF32 == '|'
2954 && (pPos == pEnd
2955 || *pPos == nFragmentDelimiter
2956 || *pPos == nSegmentDelimiter
2957 || *pPos == nAltSegmentDelimiter)
2958 && aTheSynPath.getLength() == 2
2959 && rtl::isAsciiAlpha(aTheSynPath[1]))
2961 // A first segment of <ALPHA "|"> is translated to
2962 // <ALPHA ":">:
2963 aTheSynPath.append(':');
2964 continue;
2967 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2968 PART_PCHAR, eCharset, true);
2970 if (aTheSynPath.isEmpty())
2971 aTheSynPath.append('/');
2972 break;
2975 case INetProtocol::Mailto:
2976 while (pPos < pEnd && *pPos != nQueryDelimiter
2977 && *pPos != nFragmentDelimiter)
2979 EscapeType eEscapeType;
2980 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
2981 eCharset, eEscapeType);
2982 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2983 PART_MAILTO, eCharset, true);
2985 break;
2988 case INetProtocol::PrivSoffice:
2989 case INetProtocol::Slot:
2990 case INetProtocol::Hid:
2991 case INetProtocol::Macro:
2992 case INetProtocol::Uno:
2993 case INetProtocol::Component:
2994 case INetProtocol::Ldap:
2995 while (pPos < pEnd && *pPos != nQueryDelimiter
2996 && *pPos != nFragmentDelimiter)
2998 EscapeType eEscapeType;
2999 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
3000 eCharset, eEscapeType);
3001 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3002 PART_PATH_BEFORE_QUERY, eCharset, true);
3004 break;
3006 case INetProtocol::VndSunStarHelp:
3007 if (pPos == pEnd
3008 || *pPos == nQueryDelimiter
3009 || *pPos == nFragmentDelimiter)
3010 aTheSynPath.append('/');
3011 else
3013 if (*pPos != '/')
3014 return false;
3015 while (pPos < pEnd && *pPos != nQueryDelimiter
3016 && *pPos != nFragmentDelimiter)
3018 EscapeType eEscapeType;
3019 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3020 eMechanism,
3021 eCharset, eEscapeType);
3022 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3023 PART_HTTP_PATH, eCharset, true);
3026 break;
3028 case INetProtocol::Javascript:
3029 case INetProtocol::Data:
3030 case INetProtocol::Cid:
3031 case INetProtocol::Db:
3032 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3034 EscapeType eEscapeType;
3035 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
3036 eCharset, eEscapeType);
3037 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3038 PART_URIC, eCharset, true);
3040 break;
3042 case INetProtocol::VndSunStarHier:
3043 case INetProtocol::VndSunStarPkg:
3044 if (pPos < pEnd && *pPos != '/'
3045 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3046 return false;
3047 while (pPos < pEnd && *pPos != nQueryDelimiter
3048 && *pPos != nFragmentDelimiter)
3050 EscapeType eEscapeType;
3051 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
3052 eCharset, eEscapeType);
3053 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3054 aTheSynPath.append('/');
3055 else
3056 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3057 PART_PCHAR, eCharset, false);
3059 if (aTheSynPath.isEmpty())
3060 aTheSynPath.append('/');
3061 break;
3063 case INetProtocol::VndSunStarCmd:
3064 case INetProtocol::VndSunStarExpand:
3066 if (pPos == pEnd || *pPos == nFragmentDelimiter)
3067 return false;
3068 Part ePart = PART_URIC_NO_SLASH;
3069 while (pPos != pEnd && *pPos != nFragmentDelimiter)
3071 EscapeType eEscapeType;
3072 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
3073 eCharset, eEscapeType);
3074 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, ePart,
3075 eCharset, true);
3076 ePart = PART_URIC;
3078 break;
3081 case INetProtocol::Telnet:
3082 if (pPos < pEnd)
3084 if (*pPos != '/' || pEnd - pPos > 1)
3085 return false;
3086 ++pPos;
3088 aTheSynPath.append('/');
3089 break;
3091 case INetProtocol::VndSunStarTdoc:
3092 if (pPos == pEnd || *pPos != '/')
3093 return false;
3094 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3096 EscapeType eEscapeType;
3097 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
3098 eCharset, eEscapeType);
3099 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3100 aTheSynPath.append('/');
3101 else
3102 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3103 PART_PCHAR, eCharset, false);
3105 break;
3107 case INetProtocol::Generic:
3108 case INetProtocol::Sftp:
3109 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3111 EscapeType eEscapeType;
3112 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, eMechanism,
3113 eCharset, eEscapeType);
3114 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3115 PART_URIC, eCharset, true);
3117 if (aTheSynPath.isEmpty())
3118 return false;
3119 break;
3120 default:
3121 OSL_ASSERT(false);
3122 break;
3125 *pBegin = pPos;
3126 rSynPath = aTheSynPath;
3127 return true;
3130 bool INetURLObject::setPath(OUString const & rThePath, bool bOctets,
3131 EncodeMechanism eMechanism,
3132 rtl_TextEncoding eCharset)
3134 OUStringBuffer aSynPath;
3135 sal_Unicode const * p = rThePath.getStr();
3136 sal_Unicode const * pEnd = p + rThePath.getLength();
3137 if (!parsePath(m_eScheme, &p, pEnd, bOctets, eMechanism, eCharset, false,
3138 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3139 || p != pEnd)
3140 return false;
3141 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3142 m_aQuery += nDelta;
3143 m_aFragment += nDelta;
3144 return true;
3147 bool INetURLObject::checkHierarchical() const {
3148 if (m_eScheme == INetProtocol::VndSunStarExpand) {
3149 OSL_FAIL(
3150 "INetURLObject::checkHierarchical vnd.sun.star.expand");
3151 return true;
3152 } else {
3153 return getSchemeInfo().m_bHierarchical;
3157 bool INetURLObject::appendSegment(OUString const & rTheSegment,
3158 bool bOctets, EncodeMechanism eMechanism,
3159 rtl_TextEncoding eCharset)
3161 return insertName(rTheSegment, bOctets, false, LAST_SEGMENT, true,
3162 eMechanism, eCharset);
3165 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3166 bool bIgnoreFinalSlash)
3167 const
3169 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3170 "INetURLObject::getSegment(): Bad index");
3172 if (!checkHierarchical())
3173 return SubString();
3175 sal_Unicode const * pPathBegin
3176 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3177 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3178 sal_Unicode const * pSegBegin;
3179 sal_Unicode const * pSegEnd;
3180 if (nIndex == LAST_SEGMENT)
3182 pSegEnd = pPathEnd;
3183 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3184 --pSegEnd;
3185 if (pSegEnd <= pPathBegin)
3186 return SubString();
3187 pSegBegin = pSegEnd - 1;
3188 while (pSegBegin > pPathBegin && *pSegBegin != '/')
3189 --pSegBegin;
3191 else
3193 pSegBegin = pPathBegin;
3194 while (nIndex-- > 0)
3197 ++pSegBegin;
3198 if (pSegBegin >= pPathEnd)
3199 return SubString();
3201 while (*pSegBegin != '/');
3202 pSegEnd = pSegBegin + 1;
3203 while (pSegEnd < pPathEnd && *pSegEnd != '/')
3204 ++pSegEnd;
3207 return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3208 pSegEnd - pSegBegin);
3211 bool INetURLObject::insertName(OUString const & rTheName, bool bOctets,
3212 bool bAppendFinalSlash, sal_Int32 nIndex,
3213 bool bIgnoreFinalSlash,
3214 EncodeMechanism eMechanism,
3215 rtl_TextEncoding eCharset)
3217 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3218 "INetURLObject::insertName(): Bad index");
3220 if (!checkHierarchical())
3221 return false;
3223 sal_Unicode const * pPathBegin
3224 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3225 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3226 sal_Unicode const * pPrefixEnd;
3227 bool bInsertSlash;
3228 sal_Unicode const * pSuffixBegin;
3229 if (nIndex == LAST_SEGMENT)
3231 pPrefixEnd = pPathEnd;
3232 if (bIgnoreFinalSlash && pPrefixEnd > pPathBegin &&
3233 pPrefixEnd[-1] == '/')
3235 --pPrefixEnd;
3237 bInsertSlash = bAppendFinalSlash;
3238 pSuffixBegin = pPathEnd;
3240 else if (nIndex == 0)
3242 pPrefixEnd = pPathBegin;
3243 bInsertSlash =
3244 (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3245 (pPathBegin == pPathEnd && bAppendFinalSlash);
3246 pSuffixBegin =
3247 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3248 !bAppendFinalSlash && bIgnoreFinalSlash)
3249 ? pPathEnd : pPathBegin;
3251 else
3253 pPrefixEnd = pPathBegin;
3254 sal_Unicode const * pEnd = pPathEnd;
3255 if (bIgnoreFinalSlash && pEnd > pPathBegin && pEnd[-1] == '/')
3256 --pEnd;
3257 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3258 bInsertSlash = false;
3259 pSuffixBegin = pPathEnd;
3260 while (nIndex-- > 0)
3261 for (;;)
3263 if (bSkip)
3264 ++pPrefixEnd;
3265 bSkip = true;
3266 if (pPrefixEnd >= pEnd)
3268 if (nIndex == 0)
3270 bInsertSlash = bAppendFinalSlash;
3271 break;
3273 else
3274 return false;
3276 if (*pPrefixEnd == '/')
3278 pSuffixBegin = pPrefixEnd;
3279 break;
3284 OUStringBuffer aNewPath;
3285 aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3286 aNewPath.append('/');
3287 aNewPath.append(encodeText(rTheName, bOctets, PART_PCHAR,
3288 eMechanism, eCharset, true));
3289 if (bInsertSlash) {
3290 aNewPath.append('/');
3292 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3294 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
3295 RTL_TEXTENCODING_UTF8);
3298 bool INetURLObject::clearQuery()
3300 if (HasError())
3301 return false;
3302 if (m_aQuery.isPresent())
3304 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3305 m_aQuery.getLength() + 1);
3306 m_aFragment += m_aQuery.clear() - 1;
3308 return false;
3311 bool INetURLObject::setQuery(OUString const & rTheQuery, bool bOctets,
3312 EncodeMechanism eMechanism,
3313 rtl_TextEncoding eCharset)
3315 if (!getSchemeInfo().m_bQuery)
3316 return false;
3317 OUString aNewQuery(encodeText(rTheQuery, bOctets, PART_URIC,
3318 eMechanism, eCharset, true));
3319 sal_Int32 nDelta;
3320 if (m_aQuery.isPresent())
3321 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3322 else
3324 m_aAbsURIRef.insert(m_aPath.getEnd(), sal_Unicode('?'));
3325 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3326 + 1;
3328 m_aFragment += nDelta;
3329 return true;
3332 bool INetURLObject::clearFragment()
3334 if (HasError())
3335 return false;
3336 if (m_aFragment.isPresent())
3338 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3339 m_aFragment.clear();
3341 return true;
3344 bool INetURLObject::setFragment(OUString const & rTheFragment,
3345 bool bOctets, EncodeMechanism eMechanism,
3346 rtl_TextEncoding eCharset)
3348 if (HasError())
3349 return false;
3350 OUString aNewFragment(encodeText(rTheFragment, bOctets, PART_URIC,
3351 eMechanism, eCharset, true));
3352 if (m_aFragment.isPresent())
3353 m_aFragment.set(m_aAbsURIRef, aNewFragment);
3354 else
3356 m_aAbsURIRef.append('#');
3357 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3359 return true;
3362 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3364 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3365 return (eStyle & FSYS_DOS) != 0
3366 && m_aPath.getLength() >= 3
3367 && p[0] == '/'
3368 && rtl::isAsciiAlpha(p[1])
3369 && p[2] == ':'
3370 && (m_aPath.getLength() == 3 || p[3] == '/');
3373 // static
3374 OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3375 sal_Unicode const * pEnd, bool bOctets,
3376 Part ePart, EncodeMechanism eMechanism,
3377 rtl_TextEncoding eCharset,
3378 bool bKeepVisibleEscapes)
3380 OUStringBuffer aResult;
3381 while (pBegin < pEnd)
3383 EscapeType eEscapeType;
3384 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets,
3385 eMechanism, eCharset, eEscapeType);
3386 appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart,
3387 eCharset, bKeepVisibleEscapes);
3389 return aResult.makeStringAndClear();
3392 // static
3393 OUString INetURLObject::decode(sal_Unicode const * pBegin,
3394 sal_Unicode const * pEnd,
3395 DecodeMechanism eMechanism,
3396 rtl_TextEncoding eCharset)
3398 switch (eMechanism)
3400 case NO_DECODE:
3401 return OUString(pBegin, pEnd - pBegin);
3403 case DECODE_TO_IURI:
3404 eCharset = RTL_TEXTENCODING_UTF8;
3405 break;
3407 default:
3408 break;
3410 OUStringBuffer aResult;
3411 while (pBegin < pEnd)
3413 EscapeType eEscapeType;
3414 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false,
3415 WAS_ENCODED, eCharset, eEscapeType);
3416 switch (eEscapeType)
3418 case ESCAPE_NO:
3419 aResult.appendUtf32(nUTF32);
3420 break;
3422 case ESCAPE_OCTET:
3423 appendEscape(aResult, nUTF32);
3424 break;
3426 case ESCAPE_UTF32:
3427 if (
3428 rtl::isAscii(nUTF32) &&
3430 eMechanism == DECODE_TO_IURI ||
3432 eMechanism == DECODE_UNAMBIGUOUS &&
3433 mustEncode(nUTF32, PART_UNAMBIGUOUS)
3438 appendEscape(aResult, nUTF32);
3440 else
3441 aResult.appendUtf32(nUTF32);
3442 break;
3445 return aResult.makeStringAndClear();
3448 OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3449 rtl_TextEncoding eCharset) const
3451 INetURLObject aTemp(*this);
3452 aTemp.clearPassword();
3453 return aTemp.GetMainURL(eMechanism, eCharset);
3456 OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3457 rtl_TextEncoding eCharset) const
3459 INetURLObject aTemp(*this);
3460 aTemp.clearFragment();
3461 return aTemp.GetMainURL(eMechanism, eCharset);
3464 OUString
3465 INetURLObject::getAbbreviated(
3466 uno::Reference< util::XStringWidth > const & rStringWidth,
3467 sal_Int32 nWidth,
3468 DecodeMechanism eMechanism,
3469 rtl_TextEncoding eCharset)
3470 const
3472 OSL_ENSURE(rStringWidth.is(), "specification violation");
3473 OUStringBuffer aBuffer;
3474 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3475 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3476 if (m_eScheme != INetProtocol::Generic)
3478 aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3480 else
3482 if (!m_aAbsURIRef.isEmpty())
3484 sal_Unicode const * pSchemeBegin
3485 = m_aAbsURIRef.getStr();
3486 sal_Unicode const * pSchemeEnd = pSchemeBegin;
3488 while (pSchemeEnd[0] != ':')
3490 ++pSchemeEnd;
3492 aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3495 aBuffer.append(':');
3496 bool bAuthority = getSchemeInfo().m_bAuthority;
3497 sal_Unicode const * pCoreBegin
3498 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3499 m_aPath.getBegin());
3500 sal_Unicode const * pCoreEnd
3501 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3502 bool bSegment = false;
3503 if (getSchemeInfo().m_bHierarchical)
3505 OUString aRest;
3506 if (m_aQuery.isPresent())
3507 aRest = "?...";
3508 else if (m_aFragment.isPresent())
3509 aRest = "#...";
3510 OUStringBuffer aTrailer;
3511 sal_Unicode const * pBegin = pCoreBegin;
3512 sal_Unicode const * pEnd = pCoreEnd;
3513 sal_Unicode const * pPrefixBegin = pBegin;
3514 sal_Unicode const * pSuffixEnd = pEnd;
3515 bool bPrefix = true;
3516 bool bSuffix = true;
3519 if (bSuffix)
3521 sal_Unicode const * p = pSuffixEnd - 1;
3522 if (pSuffixEnd == pCoreEnd && *p == '/')
3523 --p;
3524 while (*p != '/')
3525 --p;
3526 if (bAuthority && p == pCoreBegin + 1)
3527 --p;
3528 OUString
3529 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3530 1 : 0),
3531 pSuffixEnd,
3532 eMechanism,
3533 eCharset));
3534 pSuffixEnd = p;
3535 OUStringBuffer aResult(aBuffer);
3536 if (pSuffixEnd != pBegin)
3537 aResult.append("...");
3538 aResult.append(aSegment);
3539 aResult.append(aTrailer.toString());
3540 aResult.append(aRest);
3541 if (rStringWidth->
3542 queryStringWidth(aResult.makeStringAndClear())
3543 <= nWidth)
3545 aTrailer.insert(0, aSegment);
3546 bSegment = true;
3547 pEnd = pSuffixEnd;
3549 else
3550 bSuffix = false;
3551 if (pPrefixBegin > pSuffixEnd)
3552 pPrefixBegin = pSuffixEnd;
3553 if (pBegin == pEnd)
3554 break;
3556 if (bPrefix)
3558 sal_Unicode const * p
3559 = pPrefixBegin
3560 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3562 OSL_ASSERT(p <= pEnd);
3563 while (p < pEnd && *p != '/')
3564 ++p;
3565 if (p == pCoreEnd - 1 && *p == '/')
3566 ++p;
3567 OUString
3568 aSegment(decode(pPrefixBegin
3569 + (pPrefixBegin == pCoreBegin ? 0 :
3571 p == pEnd ? p : p + 1,
3572 eMechanism,
3573 eCharset));
3574 pPrefixBegin = p;
3575 OUStringBuffer aResult(aBuffer);
3576 aResult.append(aSegment);
3577 if (pPrefixBegin != pEnd)
3578 aResult.append("...");
3579 aResult.append(aTrailer.toString());
3580 aResult.append(aRest);
3581 if (rStringWidth->
3582 queryStringWidth(aResult.makeStringAndClear())
3583 <= nWidth)
3585 aBuffer.append(aSegment);
3586 bSegment = true;
3587 pBegin = pPrefixBegin;
3589 else
3590 bPrefix = false;
3591 if (pPrefixBegin > pSuffixEnd)
3592 pSuffixEnd = pPrefixBegin;
3593 if (pBegin == pEnd)
3594 break;
3597 while (bPrefix || bSuffix);
3598 if (bSegment)
3600 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3601 aBuffer.append("...");
3602 aBuffer.append(aTrailer.toString());
3605 if (!bSegment)
3606 aBuffer.append(decode(pCoreBegin,
3607 pCoreEnd,
3608 eMechanism,
3609 eCharset));
3610 if (m_aQuery.isPresent())
3612 aBuffer.append('?');
3613 aBuffer.append(decode(m_aQuery, eMechanism, eCharset));
3615 if (m_aFragment.isPresent())
3617 aBuffer.append('#');
3618 aBuffer.append(decode(m_aFragment, eMechanism, eCharset));
3620 if (!aBuffer.isEmpty())
3622 OUStringBuffer aResult(aBuffer);
3623 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3624 > nWidth)
3625 for (sal_Int32 i = aBuffer.getLength();;)
3627 if (i == 0)
3629 aBuffer.setLength(aBuffer.getLength() - 1);
3630 if (aBuffer.isEmpty())
3631 break;
3633 else
3635 aBuffer.setLength(--i);
3636 aBuffer.append("...");
3638 aResult = aBuffer;
3639 if (rStringWidth->
3640 queryStringWidth(aResult.makeStringAndClear())
3641 <= nWidth)
3642 break;
3645 return aBuffer.makeStringAndClear();
3648 bool INetURLObject::operator ==(INetURLObject const & rObject) const
3650 if (m_eScheme != rObject.m_eScheme)
3651 return false;
3652 if (m_eScheme == INetProtocol::NotValid)
3653 return m_aAbsURIRef.toString() == rObject.m_aAbsURIRef.toString();
3654 if ((m_aScheme.compare(
3655 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
3656 != 0)
3657 || GetUser(NO_DECODE) != rObject.GetUser(NO_DECODE)
3658 || GetPass(NO_DECODE) != rObject.GetPass(NO_DECODE)
3659 || !GetHost(NO_DECODE).equalsIgnoreAsciiCase(
3660 rObject.GetHost(NO_DECODE))
3661 || GetPort() != rObject.GetPort()
3662 || HasParam() != rObject.HasParam()
3663 || GetParam(NO_DECODE) != rObject.GetParam(NO_DECODE)
3664 || GetMsgId(NO_DECODE) != INetURLObject::GetMsgId(NO_DECODE))
3665 return false;
3666 OUString aPath1(GetURLPath(NO_DECODE));
3667 OUString aPath2(rObject.GetURLPath(NO_DECODE));
3668 switch (m_eScheme)
3670 case INetProtocol::File:
3672 // If the URL paths of two file URLs only differ in that one has a
3673 // final '/' and the other has not, take the two paths as
3674 // equivalent (this could be useful for other schemes, too):
3675 sal_Int32 nLength = aPath1.getLength();
3676 switch (nLength - aPath2.getLength())
3678 case -1:
3679 if (aPath2[nLength] != '/')
3680 return false;
3681 break;
3683 case 0:
3684 break;
3686 case 1:
3687 if (aPath1[--nLength] != '/')
3688 return false;
3689 break;
3691 default:
3692 return false;
3694 return aPath1.compareTo(aPath2, nLength) == 0;
3697 default:
3698 return aPath1 == aPath2;
3702 bool INetURLObject::operator <(INetURLObject const & rObject) const
3704 sal_Int32 nCompare = m_aScheme.compare(
3705 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef);
3706 if (nCompare < 0) {
3707 return true;
3708 } else if (nCompare > 0) {
3709 return false;
3711 sal_uInt32 nPort1 = GetPort();
3712 sal_uInt32 nPort2 = rObject.GetPort();
3713 if (nPort1 < nPort2)
3714 return true;
3715 else if (nPort1 > nPort2)
3716 return false;
3717 nCompare = GetUser(NO_DECODE).compareTo(rObject.GetUser(NO_DECODE));
3718 if (nCompare < 0)
3719 return true;
3720 else if (nCompare > 0)
3721 return false;
3722 nCompare = GetPass(NO_DECODE).compareTo(rObject.GetPass(NO_DECODE));
3723 if (nCompare < 0)
3724 return true;
3725 else if (nCompare > 0)
3726 return false;
3727 nCompare = GetHost(NO_DECODE).compareTo(rObject.GetHost(NO_DECODE));
3728 if (nCompare < 0)
3729 return true;
3730 else if (nCompare > 0)
3731 return false;
3732 const OUString &rPath1(GetURLPath(NO_DECODE));
3733 const OUString &rPath2(rObject.GetURLPath(NO_DECODE));
3734 nCompare = rPath1.compareTo(rPath2);
3735 if (nCompare < 0)
3736 return true;
3737 else if (nCompare > 0)
3738 return false;
3739 nCompare = GetParam(NO_DECODE).compareTo(rObject.GetParam(NO_DECODE));
3740 if (nCompare < 0)
3741 return true;
3742 else if (nCompare > 0)
3743 return false;
3744 return GetMsgId(NO_DECODE).compareTo(INetURLObject::GetMsgId(NO_DECODE)) < 0;
3747 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
3748 OUString const & rTheUser,
3749 OUString const & rThePassword,
3750 OUString const & rTheHost,
3751 sal_uInt32 nThePort,
3752 OUString const & rThePath,
3753 EncodeMechanism eMechanism,
3754 rtl_TextEncoding eCharset)
3756 setInvalid();
3757 m_eScheme = eTheScheme;
3758 if (HasError() || m_eScheme == INetProtocol::Generic)
3759 return false;
3760 m_aAbsURIRef.setLength(0);
3761 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
3762 m_aAbsURIRef.append(':');
3763 if (getSchemeInfo().m_bAuthority)
3765 m_aAbsURIRef.append("//");
3766 bool bUserInfo = false;
3767 if (getSchemeInfo().m_bUser)
3769 if (!rTheUser.isEmpty())
3771 m_aUser.set(m_aAbsURIRef,
3772 encodeText(rTheUser, false, PART_USER_PASSWORD,
3773 eMechanism, eCharset, false),
3774 m_aAbsURIRef.getLength());
3775 bUserInfo = true;
3778 else if (!rTheUser.isEmpty())
3780 setInvalid();
3781 return false;
3783 if (!rThePassword.isEmpty())
3785 if (getSchemeInfo().m_bPassword)
3787 m_aAbsURIRef.append(':');
3788 m_aAuth.set(m_aAbsURIRef,
3789 encodeText(rThePassword, false, PART_USER_PASSWORD,
3790 eMechanism, eCharset, false),
3791 m_aAbsURIRef.getLength());
3792 bUserInfo = true;
3794 else
3796 setInvalid();
3797 return false;
3800 if (bUserInfo && getSchemeInfo().m_bHost)
3801 m_aAbsURIRef.append('@');
3802 if (getSchemeInfo().m_bHost)
3804 OUStringBuffer aSynHost(rTheHost);
3805 bool bNetBiosName = false;
3806 switch (m_eScheme)
3808 case INetProtocol::File:
3810 OUString sTemp(aSynHost.toString());
3811 if (sTemp.equalsIgnoreAsciiCase( "localhost" ))
3813 aSynHost.setLength(0);
3815 bNetBiosName = true;
3817 break;
3819 case INetProtocol::Ldap:
3820 if (aSynHost.isEmpty() && nThePort != 0)
3822 setInvalid();
3823 return false;
3825 break;
3827 default:
3828 if (aSynHost.isEmpty())
3830 setInvalid();
3831 return false;
3833 break;
3835 if (!parseHostOrNetBiosName(
3836 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
3837 false, eMechanism, eCharset, bNetBiosName, &aSynHost))
3839 setInvalid();
3840 return false;
3842 m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
3843 m_aAbsURIRef.getLength());
3844 if (nThePort != 0)
3846 if (getSchemeInfo().m_bPort)
3848 m_aAbsURIRef.append(':');
3849 m_aPort.set(m_aAbsURIRef,
3850 OUString::number(nThePort),
3851 m_aAbsURIRef.getLength());
3853 else
3855 setInvalid();
3856 return false;
3860 else if (!rTheHost.isEmpty() || nThePort != 0)
3862 setInvalid();
3863 return false;
3866 OUStringBuffer aSynPath;
3867 sal_Unicode const * p = rThePath.getStr();
3868 sal_Unicode const * pEnd = p + rThePath.getLength();
3869 if (!parsePath(m_eScheme, &p, pEnd, false, eMechanism, eCharset, false, '/',
3870 0x80000000, 0x80000000, 0x80000000, aSynPath)
3871 || p != pEnd)
3873 setInvalid();
3874 return false;
3876 m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
3877 m_aAbsURIRef.getLength());
3878 return true;
3881 // static
3882 OUString INetURLObject::GetAbsURL(OUString const & rTheBaseURIRef,
3883 OUString const & rTheRelURIRef,
3884 bool bIgnoreFragment,
3885 EncodeMechanism eEncodeMechanism,
3886 DecodeMechanism eDecodeMechanism,
3887 rtl_TextEncoding eCharset,
3888 FSysStyle eStyle)
3890 // Backwards compatibility:
3891 if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#')
3892 return rTheRelURIRef;
3894 INetURLObject aTheAbsURIRef;
3895 bool bWasAbsolute;
3896 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
3897 convertRelToAbs(rTheRelURIRef, false, aTheAbsURIRef,
3898 bWasAbsolute, eEncodeMechanism,
3899 eCharset, bIgnoreFragment, false,
3900 false, eStyle)
3901 || eEncodeMechanism != WAS_ENCODED
3902 || eDecodeMechanism != DECODE_TO_IURI
3903 || eCharset != RTL_TEXTENCODING_UTF8 ?
3904 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
3905 rTheRelURIRef;
3908 OUString INetURLObject::getExternalURL(DecodeMechanism eMechanism,
3909 rtl_TextEncoding eCharset) const
3911 OUString aTheExtURIRef;
3912 translateToExternal(
3913 m_aAbsURIRef.toString(), aTheExtURIRef, eMechanism, eCharset);
3914 return aTheExtURIRef;
3917 // static
3918 OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
3920 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
3923 // static
3924 OUString INetURLObject::GetSchemeName(INetProtocol eTheScheme)
3926 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pScheme);
3929 // static
3930 INetProtocol INetURLObject::CompareProtocolScheme(OUString const &
3931 rTheAbsURIRef)
3933 sal_Unicode const * p = rTheAbsURIRef.getStr();
3934 PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
3935 return pPrefix ? pPrefix->m_eScheme : INetProtocol::NotValid;
3938 OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
3939 rtl_TextEncoding eCharset)
3941 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
3942 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
3943 if (!getSchemeInfo().m_bHost)
3944 return OUString();
3945 OUStringBuffer aHostPort(decode(m_aHost, eMechanism, eCharset));
3946 if (m_aPort.isPresent())
3948 aHostPort.append(':');
3949 aHostPort.append(decode(m_aPort, eMechanism, eCharset));
3951 return aHostPort.makeStringAndClear();
3954 sal_uInt32 INetURLObject::GetPort() const
3956 if (m_aPort.isPresent())
3958 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
3959 sal_Unicode const * pEnd = p + m_aPort.getLength();
3960 sal_uInt32 nThePort;
3961 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
3962 return nThePort;
3964 return 0;
3967 bool INetURLObject::SetPort(sal_uInt32 nThePort)
3969 if (getSchemeInfo().m_bPort && m_aHost.isPresent())
3971 OUString aNewPort(OUString::number(nThePort));
3972 sal_Int32 nDelta;
3973 if (m_aPort.isPresent())
3974 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
3975 else
3977 m_aAbsURIRef.insert(m_aHost.getEnd(), sal_Unicode(':'));
3978 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
3979 + 1;
3981 m_aPath += nDelta;
3982 m_aQuery += nDelta;
3983 m_aFragment += nDelta;
3984 return true;
3986 return false;
3989 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
3991 if (!checkHierarchical())
3992 return 0;
3994 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3995 sal_Unicode const * pEnd = p + m_aPath.getLength();
3996 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
3997 --pEnd;
3998 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
3999 while (p != pEnd)
4000 if (*p++ == '/')
4001 ++n;
4002 return n;
4005 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4007 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4008 if (!aSegment.isPresent())
4009 return false;
4011 OUStringBuffer aNewPath;
4012 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
4013 aSegment.getBegin() - m_aPath.getBegin());
4014 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
4015 aNewPath.append('/');
4016 else
4017 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
4018 m_aPath.getEnd() - aSegment.getEnd());
4019 if (aNewPath.isEmpty() && !aSegment.isEmpty() &&
4020 m_aAbsURIRef[aSegment.getBegin()] == '/')
4022 aNewPath.append('/');
4025 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4026 RTL_TEXTENCODING_UTF8);
4029 OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4030 DecodeMechanism eMechanism,
4031 rtl_TextEncoding eCharset) const
4033 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4034 if (!aSegment.isPresent())
4035 return OUString();
4037 sal_Unicode const * pSegBegin
4038 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4039 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4041 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4042 ++pSegBegin;
4043 sal_Unicode const * p = pSegBegin;
4044 while (p != pSegEnd && *p != ';')
4045 ++p;
4047 return decode(pSegBegin, p, eMechanism, eCharset);
4050 bool INetURLObject::setName(OUString const & rTheName, sal_Int32 nIndex,
4051 bool bIgnoreFinalSlash,
4052 EncodeMechanism eMechanism,
4053 rtl_TextEncoding eCharset)
4055 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4056 if (!aSegment.isPresent())
4057 return false;
4059 sal_Unicode const * pPathBegin
4060 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4061 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4062 sal_Unicode const * pSegBegin
4063 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4064 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4066 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4067 ++pSegBegin;
4068 sal_Unicode const * p = pSegBegin;
4069 while (p != pSegEnd && *p != ';')
4070 ++p;
4072 OUStringBuffer aNewPath;
4073 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4074 aNewPath.append(encodeText(rTheName, false, PART_PCHAR,
4075 eMechanism, eCharset, true));
4076 aNewPath.append(p, pPathEnd - p);
4078 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4079 RTL_TEXTENCODING_UTF8);
4082 bool INetURLObject::hasExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4083 const
4085 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4086 if (!aSegment.isPresent())
4087 return false;
4089 sal_Unicode const * pSegBegin
4090 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4091 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4093 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4094 ++pSegBegin;
4095 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4096 if (*p == '.' && p != pSegBegin)
4097 return true;
4098 return false;
4101 OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4102 DecodeMechanism eMechanism,
4103 rtl_TextEncoding eCharset) const
4105 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4106 if (!aSegment.isPresent())
4107 return OUString();
4109 sal_Unicode const * pSegBegin
4110 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4111 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4113 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4114 ++pSegBegin;
4115 sal_Unicode const * pExtension = 0;
4116 sal_Unicode const * p = pSegBegin;
4117 for (; p != pSegEnd && *p != ';'; ++p)
4118 if (*p == '.' && p != pSegBegin)
4119 pExtension = p;
4120 if (!pExtension)
4121 pExtension = p;
4123 return decode(pSegBegin, pExtension, eMechanism, eCharset);
4126 bool INetURLObject::setBase(OUString const & rTheBase, sal_Int32 nIndex,
4127 bool bIgnoreFinalSlash,
4128 EncodeMechanism eMechanism,
4129 rtl_TextEncoding eCharset)
4131 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4132 if (!aSegment.isPresent())
4133 return false;
4135 sal_Unicode const * pPathBegin
4136 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4137 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4138 sal_Unicode const * pSegBegin
4139 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4140 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4142 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4143 ++pSegBegin;
4144 sal_Unicode const * pExtension = 0;
4145 sal_Unicode const * p = pSegBegin;
4146 for (; p != pSegEnd && *p != ';'; ++p)
4147 if (*p == '.' && p != pSegBegin)
4148 pExtension = p;
4149 if (!pExtension)
4150 pExtension = p;
4152 OUStringBuffer aNewPath;
4153 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4154 aNewPath.append(encodeText(rTheBase, false, PART_PCHAR,
4155 eMechanism, eCharset, true));
4156 aNewPath.append(pExtension, pPathEnd - pExtension);
4158 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4159 RTL_TEXTENCODING_UTF8);
4162 OUString INetURLObject::getExtension(sal_Int32 nIndex,
4163 bool bIgnoreFinalSlash,
4164 DecodeMechanism eMechanism,
4165 rtl_TextEncoding eCharset) const
4167 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4168 if (!aSegment.isPresent())
4169 return OUString();
4171 sal_Unicode const * pSegBegin
4172 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4173 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4175 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4176 ++pSegBegin;
4177 sal_Unicode const * pExtension = 0;
4178 sal_Unicode const * p = pSegBegin;
4179 for (; p != pSegEnd && *p != ';'; ++p)
4180 if (*p == '.' && p != pSegBegin)
4181 pExtension = p;
4183 if (!pExtension)
4184 return OUString();
4186 return decode(pExtension + 1, p, eMechanism, eCharset);
4189 bool INetURLObject::setExtension(OUString const & rTheExtension,
4190 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4191 EncodeMechanism eMechanism,
4192 rtl_TextEncoding eCharset)
4194 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4195 if (!aSegment.isPresent())
4196 return false;
4198 sal_Unicode const * pPathBegin
4199 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4200 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4201 sal_Unicode const * pSegBegin
4202 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4203 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4205 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4206 ++pSegBegin;
4207 sal_Unicode const * pExtension = 0;
4208 sal_Unicode const * p = pSegBegin;
4209 for (; p != pSegEnd && *p != ';'; ++p)
4210 if (*p == '.' && p != pSegBegin)
4211 pExtension = p;
4212 if (!pExtension)
4213 pExtension = p;
4215 OUStringBuffer aNewPath;
4216 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4217 aNewPath.append('.');
4218 aNewPath.append(encodeText(rTheExtension, false, PART_PCHAR,
4219 eMechanism, eCharset, true));
4220 aNewPath.append(p, pPathEnd - p);
4222 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4223 RTL_TEXTENCODING_UTF8);
4226 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4228 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4229 if (!aSegment.isPresent())
4230 return false;
4232 sal_Unicode const * pPathBegin
4233 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4234 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4235 sal_Unicode const * pSegBegin
4236 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4237 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4239 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4240 ++pSegBegin;
4241 sal_Unicode const * pExtension = 0;
4242 sal_Unicode const * p = pSegBegin;
4243 for (; p != pSegEnd && *p != ';'; ++p)
4244 if (*p == '.' && p != pSegBegin)
4245 pExtension = p;
4246 if (!pExtension)
4247 return true;
4249 OUStringBuffer aNewPath;
4250 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4251 aNewPath.append(p, pPathEnd - p);
4253 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4254 RTL_TEXTENCODING_UTF8);
4257 bool INetURLObject::hasFinalSlash() const
4259 if (!checkHierarchical())
4260 return false;
4262 sal_Unicode const * pPathBegin
4263 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4264 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4265 return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4268 bool INetURLObject::setFinalSlash()
4270 if (!checkHierarchical())
4271 return false;
4273 sal_Unicode const * pPathBegin
4274 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4275 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4276 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4277 return true;
4279 OUStringBuffer aNewPath;
4280 aNewPath.append(pPathBegin, pPathEnd - pPathBegin);
4281 aNewPath.append('/');
4283 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4284 RTL_TEXTENCODING_UTF8);
4287 bool INetURLObject::removeFinalSlash()
4289 if (!checkHierarchical())
4290 return false;
4292 sal_Unicode const * pPathBegin
4293 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4294 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4295 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4296 return true;
4298 --pPathEnd;
4299 if (pPathEnd == pPathBegin && *pPathBegin == '/')
4300 return false;
4301 OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4303 return setPath(aNewPath, false, NOT_CANONIC, RTL_TEXTENCODING_UTF8);
4306 bool INetURLObject::setFSysPath(OUString const & rFSysPath,
4307 FSysStyle eStyle)
4309 sal_Unicode const * pFSysBegin = rFSysPath.getStr();
4310 sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength();
4312 switch ((eStyle & FSYS_VOS ? 1 : 0)
4313 + (eStyle & FSYS_UNX ? 1 : 0)
4314 + (eStyle & FSYS_DOS ? 1 : 0))
4316 case 0:
4317 return false;
4319 case 1:
4320 break;
4322 default:
4323 if (eStyle & FSYS_VOS
4324 && pFSysEnd - pFSysBegin >= 2
4325 && pFSysBegin[0] == '/'
4326 && pFSysBegin[1] == '/')
4328 if (pFSysEnd - pFSysBegin >= 3
4329 && pFSysBegin[2] == '.'
4330 && (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/'))
4332 eStyle = FSYS_VOS; // Production T1
4333 break;
4336 sal_Unicode const * p = pFSysBegin + 2;
4337 OUString aHost;
4338 if (parseHost(p, pFSysEnd, aHost)
4339 && (p == pFSysEnd || *p == '/'))
4341 eStyle = FSYS_VOS; // Production T2
4342 break;
4346 if (eStyle & FSYS_DOS
4347 && pFSysEnd - pFSysBegin >= 2
4348 && pFSysBegin[0] == '\\'
4349 && pFSysBegin[1] == '\\')
4351 sal_Unicode const * p = pFSysBegin + 2;
4352 OUString aHost;
4353 if (parseHost(p, pFSysEnd, aHost)
4354 && (p == pFSysEnd || *p == '\\'))
4356 eStyle = FSYS_DOS; // Production T3
4357 break;
4361 if (eStyle & FSYS_DOS
4362 && pFSysEnd - pFSysBegin >= 2
4363 && rtl::isAsciiAlpha(pFSysBegin[0])
4364 && pFSysBegin[1] == ':'
4365 && (pFSysEnd - pFSysBegin == 2
4366 || pFSysBegin[2] == '/'
4367 || pFSysBegin[2] == '\\'))
4369 eStyle = FSYS_DOS; // Productions T4, T5
4370 break;
4373 if (!(eStyle & (FSYS_UNX | FSYS_DOS)))
4374 return false;
4376 eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle);
4377 // Production T6
4378 break;
4381 OUStringBuffer aSynAbsURIRef("file://");
4383 switch (eStyle)
4385 case FSYS_VOS:
4387 sal_Unicode const * p = pFSysBegin;
4388 if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/')
4389 return false;
4390 if (p != pFSysEnd && *p == '.'
4391 && (pFSysEnd - p == 1 || p[1] == '/'))
4392 ++p;
4393 for (; p != pFSysEnd; ++p)
4394 switch (*p)
4396 case '#':
4397 case '%':
4398 appendEscape(aSynAbsURIRef, *p);
4399 break;
4401 default:
4402 aSynAbsURIRef.append(*p);
4403 break;
4405 break;
4408 case FSYS_UNX:
4410 sal_Unicode const * p = pFSysBegin;
4411 if (p != pFSysEnd && *p != '/')
4412 return false;
4413 for (; p != pFSysEnd; ++p)
4414 switch (*p)
4416 case '|':
4417 case '#':
4418 case '%':
4419 appendEscape(aSynAbsURIRef, *p);
4420 break;
4422 default:
4423 aSynAbsURIRef.append(*p);
4424 break;
4426 break;
4429 case FSYS_DOS:
4431 sal_uInt32 nAltDelimiter = 0x80000000;
4432 sal_Unicode const * p = pFSysBegin;
4433 if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\')
4434 p += 2;
4435 else
4437 aSynAbsURIRef.append('/');
4438 if (pFSysEnd - p >= 2
4439 && rtl::isAsciiAlpha(p[0])
4440 && p[1] == ':'
4441 && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/'))
4442 nAltDelimiter = '/';
4444 for (; p != pFSysEnd; ++p)
4445 if (*p == '\\' || *p == nAltDelimiter)
4446 aSynAbsURIRef.append('/');
4447 else
4448 switch (*p)
4450 case '/':
4451 case '#':
4452 case '%':
4453 appendEscape(aSynAbsURIRef, *p);
4454 break;
4456 default:
4457 aSynAbsURIRef.append(*p);
4458 break;
4460 break;
4463 default:
4464 OSL_ASSERT(false);
4465 break;
4468 INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), WAS_ENCODED,
4469 RTL_TEXTENCODING_UTF8);
4470 if (aTemp.HasError())
4471 return false;
4473 *this = aTemp;
4474 return true;
4477 OUString INetURLObject::getFSysPath(FSysStyle eStyle,
4478 sal_Unicode * pDelimiter) const
4480 if (m_eScheme != INetProtocol::File)
4481 return OUString();
4483 if ((eStyle & FSYS_VOS ? 1 : 0)
4484 + (eStyle & FSYS_UNX ? 1 : 0)
4485 + (eStyle & FSYS_DOS ? 1 : 0)
4486 > 1)
4488 eStyle = eStyle & FSYS_VOS
4489 && m_aHost.isPresent()
4490 && m_aHost.getLength() > 0 ?
4491 FSYS_VOS :
4492 hasDosVolume(eStyle)
4493 || ((eStyle & FSYS_DOS) != 0
4494 && m_aHost.isPresent()
4495 && m_aHost.getLength() > 0) ?
4496 FSYS_DOS :
4497 eStyle & FSYS_UNX
4498 && (!m_aHost.isPresent() || m_aHost.getLength() == 0) ?
4499 FSYS_UNX :
4500 FSysStyle(0);
4503 switch (eStyle)
4505 case FSYS_VOS:
4507 if (pDelimiter)
4508 *pDelimiter = '/';
4510 OUStringBuffer aSynFSysPath;
4511 aSynFSysPath.append("//");
4512 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4513 aSynFSysPath.append(decode(m_aHost, DECODE_WITH_CHARSET,
4514 RTL_TEXTENCODING_UTF8));
4515 else
4516 aSynFSysPath.append('.');
4517 aSynFSysPath.append(decode(m_aPath, DECODE_WITH_CHARSET,
4518 RTL_TEXTENCODING_UTF8));
4519 return aSynFSysPath.makeStringAndClear();
4522 case FSYS_UNX:
4524 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4525 return OUString();
4527 if (pDelimiter)
4528 *pDelimiter = '/';
4530 return decode(m_aPath, DECODE_WITH_CHARSET, RTL_TEXTENCODING_UTF8);
4533 case FSYS_DOS:
4535 if (pDelimiter)
4536 *pDelimiter = '\\';
4538 OUStringBuffer aSynFSysPath;
4539 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4541 aSynFSysPath.append("\\\\");
4542 aSynFSysPath.append(decode(m_aHost, DECODE_WITH_CHARSET,
4543 RTL_TEXTENCODING_UTF8));
4544 aSynFSysPath.append('\\');
4546 sal_Unicode const * p
4547 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4548 sal_Unicode const * pEnd = p + m_aPath.getLength();
4549 DBG_ASSERT(p < pEnd && *p == '/',
4550 "INetURLObject::getFSysPath(): Bad path");
4551 ++p;
4552 while (p < pEnd)
4554 EscapeType eEscapeType;
4555 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, WAS_ENCODED,
4556 RTL_TEXTENCODING_UTF8,
4557 eEscapeType);
4558 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
4559 aSynFSysPath.append('\\');
4560 else
4561 aSynFSysPath.appendUtf32(nUTF32);
4563 return aSynFSysPath.makeStringAndClear();
4566 default:
4567 return OUString();
4571 OUString INetURLObject::GetMsgId(DecodeMechanism,
4572 rtl_TextEncoding)
4574 return OUString();
4577 // static
4578 void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText,
4579 sal_uInt32 nUCS4)
4581 DBG_ASSERT(nUCS4 < 0x80000000,
4582 "INetURLObject::appendUCS4Escape(): Bad char");
4583 if (nUCS4 < 0x80)
4584 appendEscape(rTheText, nUCS4);
4585 else if (nUCS4 < 0x800)
4587 appendEscape(rTheText, nUCS4 >> 6 | 0xC0);
4588 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4590 else if (nUCS4 < 0x10000)
4592 appendEscape(rTheText, nUCS4 >> 12 | 0xE0);
4593 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4594 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4596 else if (nUCS4 < 0x200000)
4598 appendEscape(rTheText, nUCS4 >> 18 | 0xF0);
4599 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4600 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4601 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4603 else if (nUCS4 < 0x4000000)
4605 appendEscape(rTheText, nUCS4 >> 24 | 0xF8);
4606 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4607 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4608 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4609 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4611 else
4613 appendEscape(rTheText, nUCS4 >> 30 | 0xFC);
4614 appendEscape(rTheText, (nUCS4 >> 24 & 0x3F) | 0x80);
4615 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4616 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4617 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4618 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4622 // static
4623 void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4,
4624 EscapeType eEscapeType, bool bOctets,
4625 Part ePart, rtl_TextEncoding eCharset,
4626 bool bKeepVisibleEscapes)
4628 bool bEscape;
4629 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
4630 switch (eEscapeType)
4632 case ESCAPE_NO:
4633 if (mustEncode(nUCS4, ePart))
4635 bEscape = true;
4636 eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 :
4637 RTL_TEXTENCODING_UTF8;
4639 else
4640 bEscape = false;
4641 break;
4643 case ESCAPE_OCTET:
4644 bEscape = true;
4645 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
4646 break;
4648 case ESCAPE_UTF32:
4649 if (mustEncode(nUCS4, ePart))
4651 bEscape = true;
4652 eTargetCharset = eCharset;
4654 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
4656 bEscape = true;
4657 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
4659 else
4660 bEscape = false;
4661 break;
4662 default:
4663 bEscape = false;
4666 if (bEscape)
4668 switch (eTargetCharset)
4670 default:
4671 OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
4672 //fallthrough
4673 case RTL_TEXTENCODING_ASCII_US:
4674 case RTL_TEXTENCODING_ISO_8859_1:
4675 appendEscape(rTheText, nUCS4);
4676 break;
4677 case RTL_TEXTENCODING_UTF8:
4678 appendUCS4Escape(rTheText, nUCS4);
4679 break;
4682 else
4683 rTheText.append(sal_Unicode(nUCS4));
4686 // static
4687 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
4688 sal_Unicode const * pEnd, bool bOctets,
4689 EncodeMechanism eMechanism,
4690 rtl_TextEncoding eCharset,
4691 EscapeType & rEscapeType)
4693 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
4694 sal_uInt32 nUTF32 = bOctets ? *rBegin++ :
4695 INetMIME::getUTF32Character(rBegin, pEnd);
4696 switch (eMechanism)
4698 case ENCODE_ALL:
4699 rEscapeType = ESCAPE_NO;
4700 break;
4702 case WAS_ENCODED:
4704 int nWeight1;
4705 int nWeight2;
4706 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4707 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
4708 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
4710 rBegin += 2;
4711 nUTF32 = nWeight1 << 4 | nWeight2;
4712 switch (eCharset)
4714 default:
4715 OSL_FAIL(
4716 "INetURLObject::getUTF32(): Unsupported charset");
4717 //fall-through
4718 case RTL_TEXTENCODING_ASCII_US:
4719 rEscapeType = rtl::isAscii(nUTF32) ?
4720 ESCAPE_UTF32 : ESCAPE_OCTET;
4721 break;
4723 case RTL_TEXTENCODING_ISO_8859_1:
4724 rEscapeType = ESCAPE_UTF32;
4725 break;
4727 case RTL_TEXTENCODING_UTF8:
4728 if (rtl::isAscii(nUTF32))
4729 rEscapeType = ESCAPE_UTF32;
4730 else
4732 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
4734 sal_uInt32 nEncoded;
4735 int nShift;
4736 sal_uInt32 nMin;
4737 if (nUTF32 <= 0xDF)
4739 nEncoded = (nUTF32 & 0x1F) << 6;
4740 nShift = 0;
4741 nMin = 0x80;
4743 else if (nUTF32 <= 0xEF)
4745 nEncoded = (nUTF32 & 0x0F) << 12;
4746 nShift = 6;
4747 nMin = 0x800;
4749 else
4751 nEncoded = (nUTF32 & 0x07) << 18;
4752 nShift = 12;
4753 nMin = 0x10000;
4755 sal_Unicode const * p = rBegin;
4756 bool bUTF8 = true;
4757 for (;;)
4759 if (pEnd - p < 3
4760 || p[0] != '%'
4761 || (nWeight1
4762 = INetMIME::getHexWeight(p[1]))
4764 || nWeight1 > 11
4765 || (nWeight2
4766 = INetMIME::getHexWeight(p[2]))
4767 < 0)
4769 bUTF8 = false;
4770 break;
4772 p += 3;
4773 nEncoded
4774 |= ((nWeight1 & 3) << 4 | nWeight2)
4775 << nShift;
4776 if (nShift == 0)
4777 break;
4778 nShift -= 6;
4780 if (bUTF8 && nEncoded >= nMin
4781 && nEncoded <= 0x10FFFF
4782 && !rtl::isHighSurrogate(nEncoded)
4783 && !rtl::isLowSurrogate(nEncoded))
4785 rBegin = p;
4786 nUTF32 = nEncoded;
4787 rEscapeType = ESCAPE_UTF32;
4788 break;
4791 rEscapeType = ESCAPE_OCTET;
4793 break;
4796 else
4797 rEscapeType = ESCAPE_NO;
4798 break;
4801 case NOT_CANONIC:
4803 int nWeight1;
4804 int nWeight2;
4805 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4806 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
4807 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
4809 rBegin += 2;
4810 nUTF32 = nWeight1 << 4 | nWeight2;
4811 rEscapeType = ESCAPE_OCTET;
4813 else
4814 rEscapeType = ESCAPE_NO;
4815 break;
4818 return nUTF32;
4821 // static
4822 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
4823 sal_Unicode const * pEnd,
4824 bool bEager)
4826 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
4827 State eState = STATE_DOT;
4828 sal_Int32 nLabels = 0;
4829 sal_Unicode const * pLastAlphanumeric = 0;
4830 for (sal_Unicode const * p = rBegin;; ++p)
4831 switch (eState)
4833 case STATE_DOT:
4834 if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_'))
4836 ++nLabels;
4837 eState = STATE_LABEL;
4838 break;
4840 if (bEager || nLabels == 0)
4841 return 0;
4842 rBegin = p - 1;
4843 return nLabels;
4845 case STATE_LABEL:
4846 if (p != pEnd)
4848 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4849 break;
4850 else if (*p == '.')
4852 eState = STATE_DOT;
4853 break;
4855 else if (*p == '-')
4857 pLastAlphanumeric = p;
4858 eState = STATE_HYPHEN;
4859 break;
4862 rBegin = p;
4863 return nLabels;
4865 case STATE_HYPHEN:
4866 if (p != pEnd)
4868 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4870 eState = STATE_LABEL;
4871 break;
4873 else if (*p == '-')
4874 break;
4876 if (bEager)
4877 return 0;
4878 rBegin = pLastAlphanumeric;
4879 return nLabels;
4883 // static
4884 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
4885 sal_Unicode const * pEnd)
4887 if (rBegin != pEnd && *rBegin == '[') {
4888 sal_Unicode const * p = rBegin + 1;
4889 //TODO: check for valid IPv6address (RFC 2373):
4890 while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.'))
4892 ++p;
4894 if (p != pEnd && *p == ']') {
4895 rBegin = p + 1;
4896 return true;
4899 return false;
4902 OUString INetURLObject::GetPartBeforeLastName(DecodeMechanism eMechanism,
4903 rtl_TextEncoding eCharset)
4904 const
4906 if (!checkHierarchical())
4907 return OUString();
4908 INetURLObject aTemp(*this);
4909 aTemp.clearFragment();
4910 aTemp.clearQuery();
4911 aTemp.removeSegment(LAST_SEGMENT, false);
4912 aTemp.setFinalSlash();
4913 return aTemp.GetMainURL(eMechanism, eCharset);
4916 OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
4917 rtl_TextEncoding eCharset) const
4919 return getName(LAST_SEGMENT, true, eMechanism, eCharset);
4922 OUString INetURLObject::GetFileExtension(DecodeMechanism eMechanism,
4923 rtl_TextEncoding eCharset) const
4925 return getExtension(LAST_SEGMENT, false, eMechanism, eCharset);
4928 bool INetURLObject::CutLastName()
4930 INetURLObject aTemp(*this);
4931 aTemp.clearFragment();
4932 aTemp.clearQuery();
4933 if (!aTemp.removeSegment(LAST_SEGMENT, false))
4934 return false;
4935 *this = aTemp;
4936 return true;
4939 OUString INetURLObject::PathToFileName() const
4941 if (m_eScheme != INetProtocol::File)
4942 return OUString();
4943 OUString aSystemPath;
4944 if (osl::FileBase::getSystemPathFromFileURL(
4945 decode(m_aAbsURIRef.getStr(),
4946 m_aAbsURIRef.getStr() + m_aPath.getEnd(),
4947 NO_DECODE, RTL_TEXTENCODING_UTF8),
4948 aSystemPath)
4949 != osl::FileBase::E_None)
4950 return OUString();
4951 return aSystemPath;
4954 OUString INetURLObject::GetFull() const
4956 INetURLObject aTemp(*this);
4957 aTemp.removeFinalSlash();
4958 return aTemp.PathToFileName();
4961 OUString INetURLObject::GetPath() const
4963 INetURLObject aTemp(*this);
4964 aTemp.removeSegment(LAST_SEGMENT, true);
4965 aTemp.removeFinalSlash();
4966 return aTemp.PathToFileName();
4969 void INetURLObject::SetBase(OUString const & rTheBase)
4971 setBase(rTheBase, LAST_SEGMENT, true, ENCODE_ALL);
4974 OUString INetURLObject::GetBase() const
4976 return getBase(LAST_SEGMENT, true, DECODE_WITH_CHARSET);
4979 void INetURLObject::SetName(OUString const & rTheName,
4980 EncodeMechanism eMechanism,
4981 rtl_TextEncoding eCharset)
4983 INetURLObject aTemp(*this);
4984 if (aTemp.removeSegment(LAST_SEGMENT, true)
4985 && aTemp.insertName(rTheName, false, LAST_SEGMENT, true, eMechanism,
4986 eCharset))
4987 *this = aTemp;
4990 void INetURLObject::SetExtension(OUString const & rTheExtension,
4991 EncodeMechanism eMechanism,
4992 rtl_TextEncoding eCharset)
4994 setExtension(rTheExtension, LAST_SEGMENT, false, eMechanism, eCharset);
4997 OUString INetURLObject::CutExtension(DecodeMechanism eMechanism,
4998 rtl_TextEncoding eCharset)
5000 OUString aTheExtension(getExtension(LAST_SEGMENT, false, eMechanism,
5001 eCharset));
5002 return removeExtension(LAST_SEGMENT, false)
5003 ? aTheExtension : OUString();
5006 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */