bump product version to 4.1.6.2
[LibreOffice.git] / tools / source / fsys / urlobj.cxx
blob377d2eb10b83a454607f9a099e826f969803603d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <tools/urlobj.hxx>
21 #include <tools/debug.hxx>
22 #include <tools/inetmime.hxx>
23 #include "com/sun/star/uno/Reference.hxx"
24 #include "com/sun/star/util/XStringWidth.hpp"
25 #include "osl/diagnose.h"
26 #include "osl/file.hxx"
27 #include "rtl/character.hxx"
28 #include "rtl/string.h"
29 #include "rtl/textenc.h"
30 #include "rtl/ustring.hxx"
31 #include "sal/types.h"
33 #include <algorithm>
34 #include <limits>
36 #include <string.h>
38 namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj;
39 // unnamed namespaces don't work well yet...
41 using namespace com::sun;
43 // INetURLObject
45 /* The URI grammar (using RFC 2234 conventions).
47 Constructs of the form
48 {reference <rule1> using rule2}
49 stand for a rule matching the given rule1 specified in the given reference,
50 encoded to URI syntax using rule2 (as specified in this URI grammar).
53 ; RFC 1738, RFC 2396, RFC 2732, private
54 login = [user [":" password] "@"] hostport
55 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
56 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
57 hostport = host [":" port]
58 host = incomplete-hostname / hostname / IPv4address / IPv6reference
59 incomplete-hostname = *(domainlabel ".") domainlabel
60 hostname = *(domainlabel ".") toplabel ["."]
61 domainlabel = alphanum [*(alphanum / "-") alphanum]
62 toplabel = ALPHA [*(alphanum / "-") alphanum]
63 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
64 IPv6reference = "[" hexpart [":" IPv4address] "]"
65 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
66 hexseq = hex4 *(":" hex4)
67 hex4 = 1*4HEXDIG
68 port = *DIGIT
69 escaped = "%" HEXDIG HEXDIG
70 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
71 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
72 alphanum = ALPHA / DIGIT
73 unreserved = alphanum / mark
74 uric = escaped / reserved / unreserved
75 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
78 ; RFC 1738, RFC 2396
79 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
80 segment = *pchar
83 ; RFC 1738, RFC 2396
84 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
85 segment = *(pchar / ";")
88 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
89 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
90 segment = *pchar
91 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
94 ; RFC 2368, RFC 2396
95 mailto-url = "MAILTO:" [to] [headers]
96 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
97 headers = "?" header *("&" header)
98 header = hname "=" hvalue
99 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
100 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
103 ; private (see RFC 1738, RFC 2396)
104 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
105 segment = *(pchar / ";")
108 ; RFC 1738, RFC 2396, RFC 2732
109 news-url = "NEWS:" grouppart
110 grouppart = "*" / group / article
111 group = alpha *(alphanum / "+" / "-" / "." / "_")
112 article = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "?" / "_" / "~") "@" host
115 ; private
116 private-url = "PRIVATE:" path ["?" *uric]
117 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
120 ; private
121 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
122 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
123 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
126 ; private
127 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
128 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
131 ; private
132 slot-url = "SLOT:" path ["?" *uric]
133 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
136 ; private
137 macro-url = "MACRO:" path ["?" *uric]
138 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
141 ; private
142 javascript-url = "JAVASCRIPT:" *uric
145 ; private (see RFC 2192)
146 imap-url = "IMAP://" user [";AUTH=" auth] "@" hostport "/" segment *("/" segment) ["/;UID=" nz_number]
147 user = 1*{RFC 2060 <CHAR8> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "=" / "_" / "~")}
148 auth = {RFC 2060 <atom> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "+" / "," / "-" / "." / "=" / "_" / "~")}
149 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / "=" / "@" / "_" / "~")
150 nz_number = {RFC 2060 <nz_number> using *DIGIT}
153 ; private
154 pop3-url = "POP3://" login ["/" ["<" *uric ">"]]
157 ; RFC 2397
158 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
159 mediatype = [type "/" subtype] *(";" attribute "=" value)
160 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
161 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
162 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
163 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
166 ; RFC 2392, RFC 2396
167 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
170 ; private
171 out-url = "OUT:///~" name ["/" *uric]
172 name = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "?" / "@" / "_" / "~"
175 ; private
176 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
177 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
179 ; private
180 vim-url = "VIM://" +vimc [":" *vimc] ["/" [("INBOX" message) / ("NEWSGROUPS" ["/" [+vimc message]])]]
181 message = ["/" [+vimc [":" +DIGIT "." +DIGIT "." +DIGIT]]]
182 vimc = ("=" HEXDIG HEXDIG) / alphanum
185 ; private
186 uno-url = ".UNO:" path ["?" *uric]
187 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
190 ; private
191 component-url = ".COMPONENT:" path ["?" *uric]
192 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
195 ; private
196 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
197 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
200 ; RFC 2255
201 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
202 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
203 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
204 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
205 extension = ["!"] ["X-"] extoken ["=" exvalue]
206 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
207 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
210 ; private
211 db-url = "DB:" *uric
214 ; private
215 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
216 opaque_part = uric_no_slash *uric
217 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
220 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
223 ; RFC 1738
224 telnet-url = "TELNET://" login ["/"]
227 ; private
228 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
229 opaque_part = uric_no_slash *uric
230 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
233 ; private
234 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
235 segment = *pchar
238 ; private
239 unknown-url = scheme ":" 1*uric
240 scheme = ALPHA *(alphanum / "+" / "-" / ".")
243 ; private (http://ubiqx.org/cifs/Appendix-D.html):
244 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
245 segment = *(pchar / ";")
248 inline sal_Int32 INetURLObject::SubString::clear()
250 sal_Int32 nDelta = -m_nLength;
251 m_nBegin = -1;
252 m_nLength = 0;
253 return nDelta;
256 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
257 OUString const & rSubString)
259 OUString sTemp(rString.makeStringAndClear());
260 sal_Int32 nDelta = set(sTemp, rSubString);
261 rString.append(sTemp);
262 return nDelta;
265 inline sal_Int32 INetURLObject::SubString::set(OUString & rString,
266 OUString const & rSubString)
268 sal_Int32 nDelta = rSubString.getLength() - m_nLength;
270 rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
272 m_nLength = rSubString.getLength();
273 return nDelta;
276 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
277 OUString const & rSubString,
278 sal_Int32 nTheBegin)
280 m_nBegin = nTheBegin;
281 return set(rString, rSubString);
284 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
286 if (isPresent())
287 m_nBegin = m_nBegin + nDelta;
290 int INetURLObject::SubString::compare(SubString const & rOther,
291 OUStringBuffer const & rThisString,
292 OUStringBuffer const & rOtherString) const
294 sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
295 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
296 sal_Unicode const * end = p1 + len;
297 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
298 while (p1 != end) {
299 if (*p1 < *p2) {
300 return -1;
301 } else if (*p1 > *p2) {
302 return 1;
304 ++p1;
305 ++p2;
307 return m_nLength < rOther.m_nLength ? -1
308 : m_nLength > rOther.m_nLength ? 1
309 : 0;
312 struct INetURLObject::SchemeInfo
314 sal_Char const * m_pScheme;
315 sal_Char const * m_pPrefix;
316 sal_uInt16 m_nDefaultPort;
317 bool m_bAuthority;
318 bool m_bUser;
319 bool m_bAuth;
320 bool m_bPassword;
321 bool m_bHost;
322 bool m_bPort;
323 bool m_bHierarchical;
324 bool m_bQuery;
327 struct INetURLObject::PrefixInfo
329 enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important!
331 sal_Char const * m_pPrefix;
332 sal_Char const * m_pTranslatedPrefix;
333 INetProtocol m_eScheme;
334 Kind m_eKind;
337 static INetURLObject::SchemeInfo const aSchemeInfoMap[INET_PROT_END]
338 = { { "", "", 0, false, false, false, false, false, false, false,
339 false },
340 { "ftp", "ftp://", 21, true, true, false, true, true, true, true,
341 false },
342 { "http", "http://", 80, true, false, false, false, true, true,
343 true, true },
344 { "file", "file://", 0, true, false, false, false, true, false,
345 true, false },
346 { "mailto", "mailto:", 0, false, false, false, false, false,
347 false, false, true },
348 { "vnd.sun.star.webdav", "vnd.sun.star.webdav://", 80, true, false,
349 false, false, true, true, true, true },
350 { "news", "news:", 0, false, false, false, false, false, false, false,
351 false },
352 { "private", "private:", 0, false, false, false, false, false,
353 false, false, true },
354 { "vnd.sun.star.help", "vnd.sun.star.help://", 0, true, false, false,
355 false, false, false, true, true },
356 { "https", "https://", 443, true, false, false, false, true, true,
357 true, true },
358 { "slot", "slot:", 0, false, false, false, false, false, false,
359 false, true },
360 { "macro", "macro:", 0, false, false, false, false, false, false,
361 false, true },
362 { "javascript", "javascript:", 0, false, false, false, false,
363 false, false, false, false },
364 { "imap", "imap://", 143, true, true, true, false, true, true,
365 true, false },
366 { "pop3", "pop3://", 110, true, true, false, true, true, true,
367 false, false },
368 { "data", "data:", 0, false, false, false, false, false, false,
369 false, false },
370 { "cid", "cid:", 0, false, false, false, false, false, false,
371 false, false },
372 { "out", "out://", 0, true, false, false, false, false, false,
373 false, false },
374 { "vnd.sun.star.hier", "vnd.sun.star.hier:", 0, true, false, false,
375 false, false, false, true, false },
376 { "vim", "vim://", 0, true, true, false, true, false, false, true,
377 false },
378 { ".uno", ".uno:", 0, false, false, false, false, false, false,
379 false, true },
380 { ".component", ".component:", 0, false, false, false, false,
381 false, false, false, true },
382 { "vnd.sun.star.pkg", "vnd.sun.star.pkg://", 0, true, false, false,
383 false, false, false, true, true },
384 { "ldap", "ldap://", 389, true, false, false, false, true, true,
385 false, true },
386 { "db", "db:", 0, false, false, false, false, false, false, false,
387 false },
388 { "vnd.sun.star.cmd", "vnd.sun.star.cmd:", 0, false, false, false,
389 false, false, false, false, false },
390 { "", "", 0, false, false, false, false, true, true, true, false }, // Placeholder for removed 26: ODMA
391 { "telnet", "telnet://", 23, true, true, false, true, true, true, true,
392 false },
393 { "vnd.sun.star.expand", "vnd.sun.star.expand:", 0, false, false, false,
394 false, false, false, false, false },
395 { "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", 0, false, false, false,
396 false, false, false, true, false },
397 { "", "", 0, false, false, false, false, true, true, true, false },
398 { "smb", "smb://", 139, true, true, false, true, true, true, true,
399 true },
400 { "hid", "hid:", 0, false, false, false, false, false, false,
401 false, true },
402 { "sftp", "sftp://", 22, true, true, false, true, true, true, true,
403 true },
404 { "vnd.libreoffice.cmis", "vnd.libreoffice.cmis://", 0, true, true, false,
405 false, true, false, true, true } };
407 // static
408 inline INetURLObject::SchemeInfo const &
409 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
411 return aSchemeInfoMap[eTheScheme];
414 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
416 return getSchemeInfo(m_eScheme);
419 // static
420 inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
421 sal_Char cEscapePrefix,
422 sal_uInt32 nOctet)
424 rTheText.append(sal_Unicode(cEscapePrefix));
425 rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet >> 4))));
426 rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet & 15))));
429 namespace unnamed_tools_urlobj {
431 enum
433 PA = INetURLObject::PART_OBSOLETE_NORMAL,
434 PB = INetURLObject::PART_OBSOLETE_FILE,
435 PC = INetURLObject::PART_OBSOLETE_PARAM,
436 PD = INetURLObject::PART_USER_PASSWORD,
437 PE = INetURLObject::PART_IMAP_ACHAR,
438 PF = INetURLObject::PART_VIM,
439 PG = INetURLObject::PART_HOST_EXTRA,
440 PH = INetURLObject::PART_FPATH,
441 PI = INetURLObject::PART_AUTHORITY,
442 PJ = INetURLObject::PART_PATH_SEGMENTS_EXTRA,
443 PK = INetURLObject::PART_REL_SEGMENT_EXTRA,
444 PL = INetURLObject::PART_URIC,
445 PM = INetURLObject::PART_HTTP_PATH,
446 PN = INetURLObject::PART_FILE_SEGMENT_EXTRA,
447 PO = INetURLObject::PART_MESSAGE_ID,
448 PP = INetURLObject::PART_MESSAGE_ID_PATH,
449 PQ = INetURLObject::PART_MAILTO,
450 PR = INetURLObject::PART_PATH_BEFORE_QUERY,
451 PS = INetURLObject::PART_PCHAR,
452 PT = INetURLObject::PART_FRAGMENT,
453 PU = INetURLObject::PART_VISIBLE,
454 PV = INetURLObject::PART_VISIBLE_NONSPECIAL,
455 PW = INetURLObject::PART_CREATEFRAGMENT,
456 PX = INetURLObject::PART_UNO_PARAM_VALUE,
457 PY = INetURLObject::PART_UNAMBIGUOUS,
458 PZ = INetURLObject::PART_URIC_NO_SLASH,
459 P1 = INetURLObject::PART_HTTP_QUERY,
460 P2 = INetURLObject::PART_NEWS_ARTICLE_LOCALPART
463 static sal_uInt32 const aMustEncodeMap[128]
464 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 /* */ PY,
467 /* ! */ PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
468 /* " */ PU+PV +PY,
469 /* # */ PU,
470 /* $ */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
471 /* % */ PU,
472 /* & */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN+PO+PP +PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2,
473 /* ' */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
474 /* ( */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
475 /* ) */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
476 /* * */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
477 /* + */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2,
478 /* , */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW +PZ+P1+P2,
479 /* - */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
480 /* . */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
481 /* / */ PA+PB+PC +PH +PJ +PL+PM +PP+PQ+PR +PT+PU+PV +PX +P2,
482 /* 0 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
483 /* 1 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
484 /* 2 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
485 /* 3 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
486 /* 4 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
487 /* 5 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
488 /* 6 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
489 /* 7 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
490 /* 8 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
491 /* 9 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
492 /* : */ PB+PC +PH+PI+PJ +PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2,
493 /* ; */ PC+PD +PI+PJ+PK+PL+PM +PO+PP+PQ+PR +PT+PU +PW +PZ+P1+P2,
494 /* < */ PC +PO+PP +PU+PV +PY,
495 /* = */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN +PR+PS+PT+PU+PV+PW +PZ+P1+P2,
496 /* > */ PC +PO+PP +PU+PV +PY,
497 /* ? */ PC +PL +PT+PU +PW+PX +PZ +P2,
498 /* @ */ PC +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1,
499 /* A */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
500 /* B */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
501 /* C */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
502 /* D */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
503 /* E */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
504 /* F */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
505 /* G */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
506 /* H */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
507 /* I */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
508 /* J */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
509 /* K */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
510 /* L */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
511 /* M */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
512 /* N */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
513 /* O */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
514 /* P */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
515 /* Q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
516 /* R */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
517 /* S */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
518 /* T */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
519 /* U */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
520 /* V */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
521 /* W */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
522 /* X */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
523 /* Y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
524 /* Z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
525 /* [ */ PL +PU+PV +PX,
526 /* \ */ PB +PU+PV +PY,
527 /* ] */ PL +PU+PV +PX,
528 /* ^ */ PU+PV +PY,
529 /* _ */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
530 /* ` */ PU+PV +PY,
531 /* a */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
532 /* b */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
533 /* c */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
534 /* d */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
535 /* e */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
536 /* f */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
537 /* g */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
538 /* h */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
539 /* i */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
540 /* j */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
541 /* k */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
542 /* l */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
543 /* m */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
544 /* n */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
545 /* o */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
546 /* p */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
547 /* q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
548 /* r */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
549 /* s */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
550 /* t */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
551 /* u */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
552 /* v */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
553 /* w */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
554 /* x */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
555 /* y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
556 /* z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
557 /* { */ PU+PV +PY,
558 /* | */ PB+PC +PN +PT+PU+PV +PY,
559 /* } */ PU+PV +PY,
560 /* ~ */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ +P2,
561 0 };
563 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
565 return !INetMIME::isUSASCII(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
570 void INetURLObject::setInvalid()
572 m_aAbsURIRef.setLength(0);
573 m_eScheme = INET_PROT_NOT_VALID;
574 m_aScheme.clear();
575 m_aUser.clear();
576 m_aAuth.clear();
577 m_aHost.clear();
578 m_aPort.clear();
579 m_aPath.clear();
580 m_aQuery.clear();
581 m_aFragment.clear();
584 namespace unnamed_tools_urlobj {
586 INetURLObject::FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
587 sal_Unicode const * pEnd,
588 INetURLObject::FSysStyle eStyle)
590 DBG_ASSERT(eStyle
591 & (INetURLObject::FSYS_UNX
592 | INetURLObject::FSYS_DOS
593 | INetURLObject::FSYS_MAC),
594 "guessFSysStyleByCounting(): Bad style");
595 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
596 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
597 "guessFSysStyleByCounting(): Too big");
598 sal_Int32 nSlashCount
599 = eStyle & INetURLObject::FSYS_UNX ?
600 0 : std::numeric_limits< sal_Int32 >::min();
601 sal_Int32 nBackslashCount
602 = eStyle & INetURLObject::FSYS_DOS ?
603 0 : std::numeric_limits< sal_Int32 >::min();
604 sal_Int32 nColonCount
605 = eStyle & INetURLObject::FSYS_MAC ?
606 0 : std::numeric_limits< sal_Int32 >::min();
607 while (pBegin != pEnd)
608 switch (*pBegin++)
610 case '/':
611 ++nSlashCount;
612 break;
614 case '\\':
615 ++nBackslashCount;
616 break;
618 case ':':
619 ++nColonCount;
620 break;
622 return nSlashCount >= nBackslashCount ?
623 nSlashCount >= nColonCount ?
624 INetURLObject::FSYS_UNX : INetURLObject::FSYS_MAC :
625 nBackslashCount >= nColonCount ?
626 INetURLObject::FSYS_DOS : INetURLObject::FSYS_MAC;
629 OUString parseScheme(
630 sal_Unicode const ** begin, sal_Unicode const * end,
631 sal_uInt32 fragmentDelimiter)
633 sal_Unicode const * p = *begin;
634 if (p != end && rtl::isAsciiAlpha(*p)) {
635 do {
636 ++p;
637 } while (p != end
638 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
639 || *p == '.'));
640 // #i34835# To avoid problems with Windows file paths like "C:\foo",
641 // do not accept generic schemes that are only one character long:
642 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
643 && p - *begin >= 2)
645 OUString scheme(
646 OUString(*begin, p - *begin).toAsciiLowerCase());
647 *begin = p + 1;
648 return scheme;
651 return OUString();
656 bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef,
657 bool bOctets,
658 EncodeMechanism eMechanism,
659 rtl_TextEncoding eCharset,
660 bool bSmart,
661 FSysStyle eStyle)
663 sal_Unicode const * pPos = rTheAbsURIRef.getStr();
664 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
666 setInvalid();
668 sal_uInt32 nFragmentDelimiter = '#';
670 OUStringBuffer aSynAbsURIRef;
672 // Parse <scheme>:
673 sal_Unicode const * p = pPos;
674 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
675 if (pPrefix)
677 pPos = p;
678 m_eScheme = pPrefix->m_eScheme;
680 OUString sTemp(OUString::createFromAscii(pPrefix->m_eKind
681 >= PrefixInfo::EXTERNAL ?
682 pPrefix->m_pTranslatedPrefix :
683 pPrefix->m_pPrefix));
684 aSynAbsURIRef.append(sTemp);
685 m_aScheme = SubString( 0, sTemp.indexOf(static_cast< sal_Unicode >(':')) );
687 else
689 if (bSmart)
691 // For scheme detection, the first (if any) of the following
692 // productions that matches the input string (and for which the
693 // appropriate style bit is set in eStyle, if applicable)
694 // determines the scheme. The productions use the auxiliary rules
696 // domain = label *("." label)
697 // label = alphanum [*(alphanum / "-") alphanum]
698 // alphanum = ALPHA / DIGIT
699 // IPv6reference = "[" IPv6address "]"
700 // IPv6address = hexpart [":" IPv4address]
701 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
702 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
703 // hexseq = hex4 *(":" hex4)
704 // hex4 = 1*4HEXDIG
705 // UCS4 = <any UCS4 character>
707 // 1st Production (known scheme):
708 // <one of the known schemes, ignoring case> ":" *UCS4
709 // 2nd Production (mailto):
710 // domain "@" domain
711 // 3rd Production (ftp):
712 // "FTP" 2*("." label) ["/" *UCS4]
713 // 4th Production (http):
714 // label 2*("." label) ["/" *UCS4]
715 // 5th Production (file):
716 // "//" (domain / IPv6reference) ["/" *UCS4]
717 // 6th Production (Unix file):
718 // "/" *UCS4
719 // 7th Production (UNC file; FSYS_DOS only):
720 // "\\" domain ["\" *UCS4]
721 // 8th Production (Unix-like DOS file; FSYS_DOS only):
722 // ALPHA ":" ["/" *UCS4]
723 // 9th Production (DOS file; FSYS_DOS only):
724 // ALPHA ":" ["\" *UCS4]
726 // For the 'non URL' file productions 6--9, the interpretation of
727 // the input as a (degenerate) URI is turned off, i.e., escape
728 // sequences and fragments are never detected as such, but are
729 // taken as literal characters.
731 sal_Unicode const * p1 = pPos;
732 if (eStyle & FSYS_DOS
733 && pEnd - p1 >= 2
734 && rtl::isAsciiAlpha(p1[0])
735 && p1[1] == ':'
736 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
738 m_eScheme = INET_PROT_FILE; // 8th, 9th
739 eMechanism = ENCODE_ALL;
740 nFragmentDelimiter = 0x80000000;
742 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
744 p1 += 2;
745 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
746 && (p1 == pEnd || *p1 == '/'))
747 m_eScheme = INET_PROT_FILE; // 5th
749 else if (p1 != pEnd && *p1 == '/')
751 m_eScheme = INET_PROT_FILE; // 6th
752 eMechanism = ENCODE_ALL;
753 nFragmentDelimiter = 0x80000000;
755 else if (eStyle & FSYS_DOS
756 && pEnd - p1 >= 2
757 && p1[0] == '\\'
758 && p1[1] == '\\')
760 p1 += 2;
761 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
762 p1, pEnd - p1, '\\');
763 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
764 if (
765 parseHostOrNetBiosName(
766 p1, pe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
767 true, NULL) ||
768 (scanDomain(p1, pe) > 0 && p1 == pe)
771 m_eScheme = INET_PROT_FILE; // 7th
772 eMechanism = ENCODE_ALL;
773 nFragmentDelimiter = 0x80000000;
776 else
778 sal_Unicode const * pDomainEnd = p1;
779 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
780 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
782 ++pDomainEnd;
783 if (scanDomain(pDomainEnd, pEnd) > 0
784 && pDomainEnd == pEnd)
785 m_eScheme = INET_PROT_MAILTO; // 2nd
787 else if (nLabels >= 3
788 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
789 m_eScheme
790 = pDomainEnd - p1 >= 4
791 && (p1[0] == 'f' || p1[0] == 'F')
792 && (p1[1] == 't' || p1[1] == 'T')
793 && (p1[2] == 'p' || p1[2] == 'P')
794 && p1[3] == '.' ?
795 INET_PROT_FTP : INET_PROT_HTTP; // 3rd, 4th
799 OUString aSynScheme;
800 if (m_eScheme == INET_PROT_NOT_VALID) {
801 sal_Unicode const * p1 = pPos;
802 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
803 if (!aSynScheme.isEmpty())
805 m_eScheme = INET_PROT_GENERIC;
806 pPos = p1;
810 if (bSmart && m_eScheme == INET_PROT_NOT_VALID && pPos != pEnd
811 && *pPos != nFragmentDelimiter)
813 m_eScheme = m_eSmartScheme;
816 if (m_eScheme == INET_PROT_NOT_VALID)
818 setInvalid();
819 return false;
822 if (m_eScheme != INET_PROT_GENERIC) {
823 aSynScheme = OUString::createFromAscii(getSchemeInfo().m_pScheme);
825 m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
826 aSynAbsURIRef.append(sal_Unicode(':'));
829 sal_Char cEscapePrefix = getEscapePrefix();
830 sal_uInt32 nSegmentDelimiter = '/';
831 sal_uInt32 nAltSegmentDelimiter = 0x80000000;
832 bool bSkippedInitialSlash = false;
834 // Parse //<user>;AUTH=<auth>@<host>:<port> or
835 // //<user>:<password>@<host>:<port> or
836 // //<reg_name>
837 if (getSchemeInfo().m_bAuthority)
839 sal_Unicode const * pUserInfoBegin = 0;
840 sal_Unicode const * pUserInfoEnd = 0;
841 sal_Unicode const * pHostPortBegin = 0;
842 sal_Unicode const * pHostPortEnd = 0;
844 switch (m_eScheme)
846 case INET_PROT_VND_SUN_STAR_HELP:
848 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
850 setInvalid();
851 return false;
853 aSynAbsURIRef.append("//");
854 OUStringBuffer aSynAuthority;
855 while (pPos < pEnd
856 && *pPos != '/' && *pPos != '?'
857 && *pPos != nFragmentDelimiter)
859 EscapeType eEscapeType;
860 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
861 cEscapePrefix, eMechanism,
862 eCharset, eEscapeType);
863 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
864 PART_AUTHORITY, cEscapePrefix, eCharset,
865 false);
867 m_aHost.set(aSynAbsURIRef,
868 aSynAuthority.makeStringAndClear(),
869 aSynAbsURIRef.getLength());
870 // misusing m_aHost to store the authority
871 break;
874 case INET_PROT_VND_SUN_STAR_HIER:
876 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
878 pPos += 2;
879 aSynAbsURIRef.append("//");
880 OUStringBuffer aSynAuthority;
881 while (pPos < pEnd
882 && *pPos != '/' && *pPos != '?'
883 && *pPos != nFragmentDelimiter)
885 EscapeType eEscapeType;
886 sal_uInt32 nUTF32 = getUTF32(pPos,
887 pEnd,
888 bOctets,
889 cEscapePrefix,
890 eMechanism,
891 eCharset,
892 eEscapeType);
893 appendUCS4(aSynAuthority,
894 nUTF32,
895 eEscapeType,
896 bOctets,
897 PART_AUTHORITY,
898 cEscapePrefix,
899 eCharset,
900 false);
902 if (aSynAuthority.getLength() == 0)
904 setInvalid();
905 return false;
907 m_aHost.set(aSynAbsURIRef,
908 aSynAuthority.makeStringAndClear(),
909 aSynAbsURIRef.getLength());
910 // misusing m_aHost to store the authority
912 break;
915 case INET_PROT_VND_SUN_STAR_PKG:
916 case INET_PROT_CMIS:
918 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
920 setInvalid();
921 return false;
923 aSynAbsURIRef.append("//");
924 OUStringBuffer aSynUser;
926 bool bHasUser = false;
927 while (pPos < pEnd && *pPos != '@'
928 && *pPos != '/' && *pPos != '?'
929 && *pPos != nFragmentDelimiter)
931 EscapeType eEscapeType;
932 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
933 cEscapePrefix, eMechanism,
934 eCharset, eEscapeType);
935 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets,
936 PART_USER_PASSWORD, cEscapePrefix, eCharset,
937 false);
939 bHasUser = *pPos == '@';
942 OUStringBuffer aSynAuthority;
943 if ( !bHasUser )
945 aSynAuthority = aSynUser;
947 else
949 m_aUser.set(aSynAbsURIRef,
950 aSynUser.makeStringAndClear(),
951 aSynAbsURIRef.getLength());
952 aSynAbsURIRef.append("@");
953 ++pPos;
955 while (pPos < pEnd
956 && *pPos != '/' && *pPos != '?'
957 && *pPos != nFragmentDelimiter)
959 EscapeType eEscapeType;
960 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
961 cEscapePrefix, eMechanism,
962 eCharset, eEscapeType);
963 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
964 PART_AUTHORITY, cEscapePrefix, eCharset,
965 false);
968 if (aSynAuthority.getLength() == 0)
970 setInvalid();
971 return false;
973 m_aHost.set(aSynAbsURIRef,
974 aSynAuthority.makeStringAndClear(),
975 aSynAbsURIRef.getLength());
976 // misusing m_aHost to store the authority
977 break;
980 case INET_PROT_FILE:
981 if (bSmart)
983 // The first of the following seven productions that
984 // matches the rest of the input string (and for which the
985 // appropriate style bit is set in eStyle, if applicable)
986 // determines the used notation. The productions use the
987 // auxiliary rules
989 // domain = label *("." label)
990 // label = alphanum [*(alphanum / "-") alphanum]
991 // alphanum = ALPHA / DIGIT
992 // IPv6reference = "[" IPv6address "]"
993 // IPv6address = hexpart [":" IPv4address]
994 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
995 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
996 // hexseq = hex4 *(":" hex4)
997 // hex4 = 1*4HEXDIG
998 // path = <any UCS4 character except "#">
999 // UCS4 = <any UCS4 character>
1001 // 1st Production (URL):
1002 // "//" [domain / IPv6reference] ["/" *path]
1003 // ["#" *UCS4]
1004 // becomes
1005 // "file://" domain "/" *path ["#" *UCS4]
1006 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1008 sal_Unicode const * p1 = pPos + 2;
1009 while (p1 != pEnd && *p1 != '/' &&
1010 *p1 != nFragmentDelimiter)
1012 ++p1;
1014 if (parseHostOrNetBiosName(
1015 pPos + 2, p1, bOctets, ENCODE_ALL,
1016 RTL_TEXTENCODING_DONTKNOW, true, NULL))
1018 aSynAbsURIRef.append("//");
1019 pHostPortBegin = pPos + 2;
1020 pHostPortEnd = p1;
1021 pPos = p1;
1022 break;
1026 // 2nd Production (MS IE generated 1; FSYS_DOS only):
1027 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1028 // becomes
1029 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1030 // replacing "\" by "/" within <*path>
1031 // 3rd Production (MS IE generated 2; FSYS_DOS only):
1032 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1033 // becomes
1034 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1035 // replacing "\" by "/" within <*path>
1036 // 4th Production (misscounted slashes):
1037 // "//" *path ["#" *UCS4]
1038 // becomes
1039 // "file:///" *path ["#" *UCS4]
1040 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1042 aSynAbsURIRef.append("//");
1043 pPos += 2;
1044 bSkippedInitialSlash = true;
1045 if ((eStyle & FSYS_DOS) != 0
1046 && pEnd - pPos >= 2
1047 && rtl::isAsciiAlpha(pPos[0])
1048 && pPos[1] == ':'
1049 && (pEnd - pPos == 2
1050 || pPos[2] == '/' || pPos[2] == '\\'))
1051 nAltSegmentDelimiter = '\\';
1052 break;
1055 // 5th Production (Unix):
1056 // "/" *path ["#" *UCS4]
1057 // becomes
1058 // "file:///" *path ["#" *UCS4]
1059 if (pPos < pEnd && *pPos == '/')
1061 aSynAbsURIRef.append("//");
1062 break;
1065 // 6th Production (UNC; FSYS_DOS only):
1066 // "\\" domain ["\" *path] ["#" *UCS4]
1067 // becomes
1068 // "file://" domain "/" *path ["#" *UCS4]
1069 // replacing "\" by "/" within <*path>
1070 if (eStyle & FSYS_DOS
1071 && pEnd - pPos >= 2
1072 && pPos[0] == '\\'
1073 && pPos[1] == '\\')
1075 sal_Unicode const * p1 = pPos + 2;
1076 sal_Unicode const * pe = p1;
1077 while (pe < pEnd && *pe != '\\' &&
1078 *pe != nFragmentDelimiter)
1080 ++pe;
1082 if (
1083 parseHostOrNetBiosName(
1084 p1, pe, bOctets, ENCODE_ALL,
1085 RTL_TEXTENCODING_DONTKNOW, true, NULL) ||
1086 (scanDomain(p1, pe) > 0 && p1 == pe)
1089 aSynAbsURIRef.append("//");
1090 pHostPortBegin = pPos + 2;
1091 pHostPortEnd = pe;
1092 pPos = pe;
1093 nSegmentDelimiter = '\\';
1094 break;
1098 // 7th Production (Unix-like DOS; FSYS_DOS only):
1099 // ALPHA ":" ["/" *path] ["#" *UCS4]
1100 // becomes
1101 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1102 // replacing "\" by "/" within <*path>
1103 // 8th Production (DOS; FSYS_DOS only):
1104 // ALPHA ":" ["\" *path] ["#" *UCS4]
1105 // becomes
1106 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1107 // replacing "\" by "/" within <*path>
1108 if (eStyle & FSYS_DOS
1109 && pEnd - pPos >= 2
1110 && rtl::isAsciiAlpha(pPos[0])
1111 && pPos[1] == ':'
1112 && (pEnd - pPos == 2
1113 || pPos[2] == '/'
1114 || pPos[2] == '\\'))
1116 aSynAbsURIRef.append("//");
1117 nAltSegmentDelimiter = '\\';
1118 bSkippedInitialSlash = true;
1119 break;
1122 // 9th Production (any):
1123 // *path ["#" *UCS4]
1124 // becomes
1125 // "file:///" *path ["#" *UCS4]
1126 // replacing the delimiter by "/" within <*path>. The
1127 // delimiter is that character from the set { "/", "\",
1128 // ":" } which appears most often in <*path> (if FSYS_UNX
1129 // is not among the style bits, "/" is removed from the
1130 // set; if FSYS_DOS is not among the style bits, "\" is
1131 // removed from the set; if FSYS_MAC is not among the
1132 // style bits, ":" is removed from the set). If two or
1133 // more characters appear the same number of times, the
1134 // character mentioned first in that set is chosen. If
1135 // the first character of <*path> is the delimiter, that
1136 // character is not copied.
1137 if (eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC))
1139 aSynAbsURIRef.appendAscii("//");
1140 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1142 case FSYS_UNX:
1143 nSegmentDelimiter = '/';
1144 break;
1146 case FSYS_DOS:
1147 nSegmentDelimiter = '\\';
1148 break;
1150 case FSYS_MAC:
1151 nSegmentDelimiter = ':';
1152 break;
1154 default:
1155 OSL_FAIL(
1156 "INetURLObject::setAbsURIRef():"
1157 " Bad guessFSysStyleByCounting");
1158 break;
1160 bSkippedInitialSlash
1161 = pPos != pEnd && *pPos != nSegmentDelimiter;
1162 break;
1165 default:
1167 // For INET_PROT_FILE, allow an empty authority ("//") to be
1168 // missing if the following path starts with an explicit "/"
1169 // (Java is notorious in generating such file URLs, so be
1170 // liberal here):
1171 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1172 pPos += 2;
1173 else if (!bSmart
1174 && !(m_eScheme == INET_PROT_FILE
1175 && pPos != pEnd && *pPos == '/'))
1177 setInvalid();
1178 return false;
1180 aSynAbsURIRef.append("//");
1182 sal_Unicode const * pAuthority = pPos;
1183 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1184 while (pPos < pEnd && *pPos != '/' && *pPos != c
1185 && *pPos != nFragmentDelimiter)
1186 ++pPos;
1187 if (getSchemeInfo().m_bUser)
1188 if (getSchemeInfo().m_bHost)
1190 sal_Unicode const * p1 = pAuthority;
1191 while (p1 < pPos && *p1 != '@')
1192 ++p1;
1193 if (p1 == pPos)
1195 pHostPortBegin = pAuthority;
1196 pHostPortEnd = pPos;
1198 else
1200 pUserInfoBegin = pAuthority;
1201 pUserInfoEnd = p1;
1202 pHostPortBegin = p1 + 1;
1203 pHostPortEnd = pPos;
1206 else
1208 pUserInfoBegin = pAuthority;
1209 pUserInfoEnd = pPos;
1211 else if (getSchemeInfo().m_bHost)
1213 pHostPortBegin = pAuthority;
1214 pHostPortEnd = pPos;
1216 else if (pPos != pAuthority)
1218 setInvalid();
1219 return false;
1221 break;
1225 if (pUserInfoBegin)
1227 Part ePart = m_eScheme == INET_PROT_IMAP ?
1228 PART_IMAP_ACHAR :
1229 m_eScheme == INET_PROT_VIM ?
1230 PART_VIM :
1231 PART_USER_PASSWORD;
1232 bool bSupportsPassword = getSchemeInfo().m_bPassword;
1233 bool bSupportsAuth
1234 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1235 bool bHasAuth = false;
1236 OUStringBuffer aSynUser;
1237 sal_Unicode const * p1 = pUserInfoBegin;
1238 while (p1 < pUserInfoEnd)
1240 EscapeType eEscapeType;
1241 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1242 cEscapePrefix, eMechanism,
1243 eCharset, eEscapeType);
1244 if (eEscapeType == ESCAPE_NO)
1246 if (nUTF32 == ':' && bSupportsPassword)
1248 bHasAuth = true;
1249 break;
1251 else if (nUTF32 == ';' && bSupportsAuth
1252 && pUserInfoEnd - p1
1253 > RTL_CONSTASCII_LENGTH("auth=")
1254 && INetMIME::equalIgnoreCase(
1256 p1 + RTL_CONSTASCII_LENGTH("auth="),
1257 "auth="))
1259 p1 += RTL_CONSTASCII_LENGTH("auth=");
1260 bHasAuth = true;
1261 break;
1264 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets, ePart,
1265 cEscapePrefix, eCharset, false);
1267 m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1268 aSynAbsURIRef.getLength());
1269 if (bHasAuth)
1271 if (bSupportsPassword)
1273 aSynAbsURIRef.append(sal_Unicode(':'));
1274 OUStringBuffer aSynAuth;
1275 while (p1 < pUserInfoEnd)
1277 EscapeType eEscapeType;
1278 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1279 cEscapePrefix,
1280 eMechanism, eCharset,
1281 eEscapeType);
1282 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1283 ePart, cEscapePrefix, eCharset, false);
1285 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1286 aSynAbsURIRef.getLength());
1288 else
1290 aSynAbsURIRef.append(";AUTH=");
1291 OUStringBuffer aSynAuth;
1292 while (p1 < pUserInfoEnd)
1294 EscapeType eEscapeType;
1295 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1296 cEscapePrefix,
1297 eMechanism, eCharset,
1298 eEscapeType);
1299 if (!INetMIME::isIMAPAtomChar(nUTF32))
1301 setInvalid();
1302 return false;
1304 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1305 ePart, cEscapePrefix, eCharset, false);
1307 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1308 aSynAbsURIRef.getLength());
1311 if (pHostPortBegin)
1312 aSynAbsURIRef.append(sal_Unicode('@'));
1315 if (pHostPortBegin)
1317 sal_Unicode const * pPort = pHostPortEnd;
1318 if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1320 sal_Unicode const * p1 = pHostPortEnd - 1;
1321 while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
1322 --p1;
1323 if (*p1 == ':')
1324 pPort = p1;
1326 bool bNetBiosName = false;
1327 switch (m_eScheme)
1329 case INET_PROT_FILE:
1330 // If the host equals "LOCALHOST" (unencoded and ignoring
1331 // case), turn it into an empty host:
1332 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1333 "localhost"))
1334 pHostPortBegin = pPort;
1335 bNetBiosName = true;
1336 break;
1338 case INET_PROT_LDAP:
1339 case INET_PROT_SMB:
1340 if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1342 setInvalid();
1343 return false;
1345 break;
1346 default:
1347 if (pHostPortBegin == pPort)
1349 setInvalid();
1350 return false;
1352 break;
1354 OUStringBuffer aSynHost;
1355 if (!parseHostOrNetBiosName(
1356 pHostPortBegin, pPort, bOctets, eMechanism, eCharset,
1357 bNetBiosName, &aSynHost))
1359 setInvalid();
1360 return false;
1362 m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1363 aSynAbsURIRef.getLength());
1364 if (pPort != pHostPortEnd)
1366 aSynAbsURIRef.append(sal_Unicode(':'));
1367 m_aPort.set(aSynAbsURIRef,
1368 OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1369 aSynAbsURIRef.getLength());
1374 // Parse <path>
1375 OUStringBuffer aSynPath;
1376 if (!parsePath(m_eScheme, &pPos, pEnd, bOctets, eMechanism, eCharset,
1377 bSkippedInitialSlash, nSegmentDelimiter,
1378 nAltSegmentDelimiter,
1379 getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1380 nFragmentDelimiter, aSynPath))
1382 setInvalid();
1383 return false;
1385 m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1386 aSynAbsURIRef.getLength());
1388 // Parse ?<query>
1389 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1391 aSynAbsURIRef.append(sal_Unicode('?'));
1392 OUStringBuffer aSynQuery;
1393 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1395 EscapeType eEscapeType;
1396 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1397 eMechanism, eCharset, eEscapeType);
1398 appendUCS4(aSynQuery, nUTF32, eEscapeType, bOctets,
1399 PART_URIC, cEscapePrefix, eCharset, true);
1401 m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1402 aSynAbsURIRef.getLength());
1405 // Parse #<fragment>
1406 if (pPos < pEnd && *pPos == nFragmentDelimiter)
1408 aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1409 OUStringBuffer aSynFragment;
1410 for (++pPos; pPos < pEnd;)
1412 EscapeType eEscapeType;
1413 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1414 eMechanism, eCharset, eEscapeType);
1415 appendUCS4(aSynFragment, nUTF32, eEscapeType, bOctets, PART_URIC,
1416 cEscapePrefix, eCharset, true);
1418 m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1419 aSynAbsURIRef.getLength());
1422 if (pPos != pEnd)
1424 setInvalid();
1425 return false;
1428 m_aAbsURIRef = aSynAbsURIRef;
1430 // At this point references of type "\\server\paths" have
1431 // been converted to file:://server/path".
1432 #ifdef LINUX
1433 if (m_eScheme==INET_PROT_FILE && !m_aHost.isEmpty()) {
1434 // Change "file:://server/path" URIs to "smb:://server/path" on
1435 // Linux
1436 // Leave "file::path" URIs unchanged.
1437 changeScheme(INET_PROT_SMB);
1439 #endif
1441 #ifdef WIN
1442 if (m_eScheme==INET_PROT_SMB) {
1443 // Change "smb://server/path" URIs to "file://server/path"
1444 // URIs on Windows, since Windows doesn't understand the
1445 // SMB scheme.
1446 changeScheme(INET_PROT_FILE);
1448 #endif
1450 return true;
1453 void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
1454 OUString aTmpStr=m_aAbsURIRef.makeStringAndClear();
1455 int oldSchemeLen=strlen(getSchemeInfo().m_pScheme);
1456 m_eScheme=eTargetScheme;
1457 int newSchemeLen=strlen(getSchemeInfo().m_pScheme);
1458 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1459 m_aAbsURIRef.append(aTmpStr.getStr()+oldSchemeLen);
1460 int delta=newSchemeLen-oldSchemeLen;
1461 m_aUser+=delta;
1462 m_aAuth+=delta;
1463 m_aHost+=delta;
1464 m_aPort+=delta;
1465 m_aPath+=delta;
1466 m_aQuery+=delta;
1467 m_aFragment+=delta;
1470 bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
1471 bool bOctets,
1472 INetURLObject & rTheAbsURIRef,
1473 bool & rWasAbsolute,
1474 EncodeMechanism eMechanism,
1475 rtl_TextEncoding eCharset,
1476 bool bIgnoreFragment, bool bSmart,
1477 bool bRelativeNonURIs, FSysStyle eStyle)
1478 const
1480 sal_Unicode const * p = rTheRelURIRef.getStr();
1481 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1483 sal_Unicode const * pPrefixBegin = p;
1484 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1485 bool hasScheme = pPrefix != 0;
1486 if (!hasScheme) {
1487 pPrefixBegin = p;
1488 hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
1491 sal_uInt32 nSegmentDelimiter = '/';
1492 sal_uInt32 nQueryDelimiter
1493 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1494 sal_uInt32 nFragmentDelimiter = '#';
1495 Part ePart = PART_VISIBLE;
1497 if (!hasScheme && bSmart)
1499 // If the input matches any of the following productions (for which
1500 // the appropriate style bit is set in eStyle), it is assumed to be an
1501 // absolute file system path, rather than a relative URI reference.
1502 // (This is only a subset of the productions used for scheme detection
1503 // in INetURLObject::setAbsURIRef(), because most of those productions
1504 // interfere with the syntax of relative URI references.) The
1505 // productions use the auxiliary rules
1507 // domain = label *("." label)
1508 // label = alphanum [*(alphanum / "-") alphanum]
1509 // alphanum = ALPHA / DIGIT
1510 // UCS4 = <any UCS4 character>
1512 // 1st Production (UNC file; FSYS_DOS only):
1513 // "\\" domain ["\" *UCS4]
1514 // 2nd Production (Unix-like DOS file; FSYS_DOS only):
1515 // ALPHA ":" ["/" *UCS4]
1516 // 3rd Production (DOS file; FSYS_DOS only):
1517 // ALPHA ":" ["\" *UCS4]
1518 if (eStyle & FSYS_DOS)
1520 bool bFSys = false;
1521 sal_Unicode const * q = p;
1522 if (pEnd - q >= 2
1523 && rtl::isAsciiAlpha(q[0])
1524 && q[1] == ':'
1525 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1526 bFSys = true; // 2nd, 3rd
1527 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1529 q += 2;
1530 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1531 q, pEnd - q, '\\');
1532 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1533 if (parseHostOrNetBiosName(
1534 q, qe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
1535 true, NULL))
1537 bFSys = true; // 1st
1540 if (bFSys)
1542 INetURLObject aNewURI;
1543 aNewURI.setAbsURIRef(rTheRelURIRef, bOctets, eMechanism,
1544 eCharset, true, eStyle);
1545 if (!aNewURI.HasError())
1547 rTheAbsURIRef = aNewURI;
1548 rWasAbsolute = true;
1549 return true;
1554 // When the base URL is a file URL, accept relative file system paths
1555 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1556 // and "#"), as well as relative URIs using "/" as delimiter:
1557 if (m_eScheme == INET_PROT_FILE)
1558 switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1560 case FSYS_UNX:
1561 nSegmentDelimiter = '/';
1562 break;
1564 case FSYS_DOS:
1565 nSegmentDelimiter = '\\';
1566 bRelativeNonURIs = true;
1567 break;
1569 case FSYS_MAC:
1570 nSegmentDelimiter = ':';
1571 bRelativeNonURIs = true;
1572 break;
1574 default:
1575 OSL_FAIL("INetURLObject::convertRelToAbs():"
1576 " Bad guessFSysStyleByCounting");
1577 break;
1580 if (bRelativeNonURIs)
1582 eMechanism = ENCODE_ALL;
1583 nQueryDelimiter = 0x80000000;
1584 nFragmentDelimiter = 0x80000000;
1585 ePart = PART_VISIBLE_NONSPECIAL;
1589 // If the relative URI has the same scheme as the base URI, and that
1590 // scheme is hierarchical, then ignore its presence in the relative
1591 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1592 // step 3):
1593 if (pPrefix && pPrefix->m_eScheme == m_eScheme
1594 && getSchemeInfo().m_bHierarchical)
1596 hasScheme = false;
1597 while (p != pEnd && *p++ != ':') ;
1599 rWasAbsolute = hasScheme;
1601 // Fast solution for non-relative URIs:
1602 if (hasScheme)
1604 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1605 if (aNewURI.HasError())
1607 rWasAbsolute = false;
1608 return false;
1611 if (bIgnoreFragment)
1612 aNewURI.clearFragment();
1613 rTheAbsURIRef = aNewURI;
1614 return true;
1617 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1618 STATE_DONE };
1620 OUStringBuffer aSynAbsURIRef;
1621 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1622 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1623 if (m_eScheme != INET_PROT_GENERIC)
1625 aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1627 else
1629 sal_Unicode const * pSchemeBegin
1630 = m_aAbsURIRef.getStr();
1631 sal_Unicode const * pSchemeEnd = pSchemeBegin;
1632 while (pSchemeEnd[0] != ':')
1634 ++pSchemeEnd;
1636 aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1638 aSynAbsURIRef.append(sal_Unicode(':'));
1640 sal_Char cEscapePrefix = getEscapePrefix();
1642 State eState = STATE_AUTH;
1643 bool bSameDoc = true;
1645 if (getSchemeInfo().m_bAuthority)
1647 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1649 aSynAbsURIRef.append("//");
1650 p += 2;
1651 eState = STATE_ABS_PATH;
1652 bSameDoc = false;
1653 while (p != pEnd)
1655 EscapeType eEscapeType;
1656 sal_uInt32 nUTF32
1657 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1658 eCharset, eEscapeType);
1659 if (eEscapeType == ESCAPE_NO)
1661 if (nUTF32 == nSegmentDelimiter)
1662 break;
1663 else if (nUTF32 == nFragmentDelimiter)
1665 eState = STATE_FRAGMENT;
1666 break;
1669 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1670 PART_VISIBLE, cEscapePrefix, eCharset, true);
1673 else
1675 SubString aAuthority(getAuthority());
1676 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1677 + aAuthority.getBegin(),
1678 aAuthority.getLength());
1682 if (eState == STATE_AUTH)
1684 if (p == pEnd)
1685 eState = STATE_DONE;
1686 else if (*p == nFragmentDelimiter)
1688 ++p;
1689 eState = STATE_FRAGMENT;
1691 else if (*p == nSegmentDelimiter)
1693 ++p;
1694 eState = STATE_ABS_PATH;
1695 bSameDoc = false;
1697 else
1699 eState = STATE_REL_PATH;
1700 bSameDoc = false;
1704 if (eState == STATE_ABS_PATH)
1706 aSynAbsURIRef.append(sal_Unicode('/'));
1707 eState = STATE_DONE;
1708 while (p != pEnd)
1710 EscapeType eEscapeType;
1711 sal_uInt32 nUTF32
1712 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1713 eCharset, eEscapeType);
1714 if (eEscapeType == ESCAPE_NO)
1716 if (nUTF32 == nFragmentDelimiter)
1718 eState = STATE_FRAGMENT;
1719 break;
1721 else if (nUTF32 == nSegmentDelimiter)
1722 nUTF32 = '/';
1724 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1725 cEscapePrefix, eCharset, true);
1728 else if (eState == STATE_REL_PATH)
1730 if (!getSchemeInfo().m_bHierarchical)
1732 // Detect cases where a relative input could not be made absolute
1733 // because the given base URL is broken (most probably because it is
1734 // empty):
1735 OSL_ASSERT(!HasError());
1736 rWasAbsolute = false;
1737 return false;
1740 sal_Unicode const * pBasePathBegin
1741 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1742 sal_Unicode const * pBasePathEnd
1743 = pBasePathBegin + m_aPath.getLength();
1744 while (pBasePathEnd != pBasePathBegin)
1745 if (*(--pBasePathEnd) == '/')
1747 ++pBasePathEnd;
1748 break;
1751 sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1752 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1753 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1754 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1755 "INetURLObject::convertRelToAbs(): Bad base path");
1757 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1759 if (*p == '.')
1761 if (pEnd - p == 1
1762 || p[1] == nSegmentDelimiter
1763 || p[1] == nQueryDelimiter
1764 || p[1] == nFragmentDelimiter)
1766 ++p;
1767 if (p != pEnd && *p == nSegmentDelimiter)
1768 ++p;
1769 continue;
1771 else if (pEnd - p >= 2
1772 && p[1] == '.'
1773 && (pEnd - p == 2
1774 || p[2] == nSegmentDelimiter
1775 || p[2] == nQueryDelimiter
1776 || p[2] == nFragmentDelimiter)
1777 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1779 p += 2;
1780 if (p != pEnd && *p == nSegmentDelimiter)
1781 ++p;
1783 sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1784 while (i > nPathBegin && aSynAbsURIRef[i] != '/')
1785 --i;
1786 aSynAbsURIRef.setLength(i + 1);
1787 DBG_ASSERT(
1788 aSynAbsURIRef.getLength() > nPathBegin
1789 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1790 "INetURLObject::convertRelToAbs(): Bad base path");
1791 continue;
1795 while (p != pEnd
1796 && *p != nSegmentDelimiter
1797 && *p != nQueryDelimiter
1798 && *p != nFragmentDelimiter)
1800 EscapeType eEscapeType;
1801 sal_uInt32 nUTF32
1802 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1803 eCharset, eEscapeType);
1804 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1805 cEscapePrefix, eCharset, true);
1807 if (p != pEnd && *p == nSegmentDelimiter)
1809 aSynAbsURIRef.append(sal_Unicode('/'));
1810 ++p;
1814 while (p != pEnd && *p != nFragmentDelimiter)
1816 EscapeType eEscapeType;
1817 sal_uInt32 nUTF32
1818 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1819 eCharset, eEscapeType);
1820 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1821 cEscapePrefix, eCharset, true);
1824 if (p == pEnd)
1825 eState = STATE_DONE;
1826 else
1828 ++p;
1829 eState = STATE_FRAGMENT;
1832 else if (bSameDoc)
1834 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1835 m_aPath.getLength());
1836 if (m_aQuery.isPresent())
1837 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1838 + m_aQuery.getBegin() - 1,
1839 m_aQuery.getLength() + 1);
1842 if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1844 aSynAbsURIRef.append(sal_Unicode('#'));
1845 while (p != pEnd)
1847 EscapeType eEscapeType;
1848 sal_uInt32 nUTF32
1849 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1850 eCharset, eEscapeType);
1851 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1852 PART_VISIBLE, cEscapePrefix, eCharset, true);
1856 INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1857 if (aNewURI.HasError())
1859 // Detect cases where a relative input could not be made absolute
1860 // because the given base URL is broken (most probably because it is
1861 // empty):
1862 OSL_ASSERT(!HasError());
1863 rWasAbsolute = false;
1864 return false;
1867 rTheAbsURIRef = aNewURI;
1868 return true;
1871 bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef,
1872 bool bOctets, OUString & rTheRelURIRef,
1873 EncodeMechanism eEncodeMechanism,
1874 DecodeMechanism eDecodeMechanism,
1875 rtl_TextEncoding eCharset,
1876 FSysStyle eStyle) const
1878 // Check for hierarchical base URL:
1879 if (!getSchemeInfo().m_bHierarchical)
1881 rTheRelURIRef
1882 = decode(rTheAbsURIRef,
1883 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1884 eDecodeMechanism, eCharset);
1885 return false;
1888 // Convert the input (absolute or relative URI ref) to an absolute URI
1889 // ref:
1890 INetURLObject aSubject;
1891 bool bWasAbsolute;
1892 if (!convertRelToAbs(rTheAbsURIRef, bOctets, aSubject, bWasAbsolute,
1893 eEncodeMechanism, eCharset, false, false, false,
1894 eStyle))
1896 rTheRelURIRef
1897 = decode(rTheAbsURIRef,
1898 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1899 eDecodeMechanism, eCharset);
1900 return false;
1903 // Check for differing scheme or authority parts:
1904 if ((m_aScheme.compare(
1905 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1906 != 0)
1907 || (m_aUser.compare(
1908 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1909 != 0)
1910 || (m_aAuth.compare(
1911 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1912 != 0)
1913 || (m_aHost.compare(
1914 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1915 != 0)
1916 || (m_aPort.compare(
1917 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1918 != 0))
1920 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1921 return false;
1924 sal_Unicode const * pBasePathBegin
1925 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1926 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1927 sal_Unicode const * pSubjectPathBegin
1928 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1929 sal_Unicode const * pSubjectPathEnd
1930 = pSubjectPathBegin + aSubject.m_aPath.getLength();
1932 // Make nMatch point past the last matching slash, or past the end of the
1933 // paths, in case they are equal:
1934 sal_Unicode const * pSlash = 0;
1935 sal_Unicode const * p1 = pBasePathBegin;
1936 sal_Unicode const * p2 = pSubjectPathBegin;
1937 for (;;)
1939 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1941 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1942 pSlash = p1;
1943 break;
1946 sal_Unicode c = *p1++;
1947 if (c != *p2++)
1948 break;
1949 if (c == '/')
1950 pSlash = p1;
1952 if (!pSlash)
1954 // One of the paths does not start with '/':
1955 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1956 return false;
1958 sal_Int32 nMatch = pSlash - pBasePathBegin;
1960 // If the two URLs are DOS file URLs starting with different volumes
1961 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1962 // relative (it could be, but some people do not like that):
1963 if (m_eScheme == INET_PROT_FILE
1964 && nMatch <= 1
1965 && hasDosVolume(eStyle)
1966 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1968 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1969 return false;
1972 // For every slash in the base path after nMatch, a prefix of "../" is
1973 // added to the new relative URL (if the common prefix of the two paths is
1974 // only "/"---but see handling of file URLs above---, the complete subject
1975 // path could go into the new relative URL instead, but some people don't
1976 // like that):
1977 OUStringBuffer aSynRelURIRef;
1978 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1979 ++p)
1981 if (*p == '/')
1982 aSynRelURIRef.append("../");
1985 // If the new relative URL would start with "//" (i.e., it would be
1986 // mistaken for a relative URL starting with an authority part), or if the
1987 // new relative URL would neither be empty nor start with <"/"> nor start
1988 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1989 // with a scheme part), then the new relative URL is prefixed with "./":
1990 if (aSynRelURIRef.getLength() == 0)
1992 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1993 && pSubjectPathBegin[nMatch] == '/'
1994 && pSubjectPathBegin[nMatch + 1] == '/')
1996 aSynRelURIRef.append("./");
1998 else
2000 for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
2001 p != pSubjectPathEnd && *p != '/'; ++p)
2003 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
2005 aSynRelURIRef.append("./");
2006 break;
2012 // The remainder of the subject path, starting at nMatch, is appended to
2013 // the new relative URL:
2014 sal_Char cEscapePrefix = getEscapePrefix();
2015 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2016 cEscapePrefix, eDecodeMechanism, eCharset));
2018 // If the subject has defined query or fragment parts, they are appended
2019 // to the new relative URL:
2020 if (aSubject.m_aQuery.isPresent())
2022 aSynRelURIRef.append(sal_Unicode('?'));
2023 aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery, cEscapePrefix,
2024 eDecodeMechanism, eCharset));
2026 if (aSubject.m_aFragment.isPresent())
2028 aSynRelURIRef.append(sal_Unicode('#'));
2029 aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2030 cEscapePrefix, eDecodeMechanism, eCharset));
2033 rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2034 return true;
2037 // static
2038 bool INetURLObject::convertIntToExt(OUString const & rTheIntURIRef,
2039 bool bOctets, OUString & rTheExtURIRef,
2040 DecodeMechanism eDecodeMechanism,
2041 rtl_TextEncoding eCharset)
2043 sal_Char cEscapePrefix
2044 = getEscapePrefix(CompareProtocolScheme(rTheIntURIRef));
2045 OUString aSynExtURIRef(encodeText(rTheIntURIRef, bOctets, PART_VISIBLE,
2046 cEscapePrefix, NOT_CANONIC, eCharset,
2047 true));
2048 sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2049 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2050 sal_Unicode const * p = pBegin;
2051 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2052 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL;
2053 if (bConvert)
2055 aSynExtURIRef =
2056 aSynExtURIRef.replaceAt(0, p - pBegin,
2057 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2059 rTheExtURIRef = decode(aSynExtURIRef, cEscapePrefix, eDecodeMechanism,
2060 eCharset);
2061 return bConvert;
2064 // static
2065 bool INetURLObject::convertExtToInt(OUString const & rTheExtURIRef,
2066 bool bOctets, OUString & rTheIntURIRef,
2067 DecodeMechanism eDecodeMechanism,
2068 rtl_TextEncoding eCharset)
2070 sal_Char cEscapePrefix
2071 = getEscapePrefix(CompareProtocolScheme(rTheExtURIRef));
2072 OUString aSynIntURIRef(encodeText(rTheExtURIRef, bOctets, PART_VISIBLE,
2073 cEscapePrefix, NOT_CANONIC, eCharset,
2074 true));
2075 sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2076 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2077 sal_Unicode const * p = pBegin;
2078 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2079 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL;
2080 if (bConvert)
2082 aSynIntURIRef =
2083 aSynIntURIRef.replaceAt(0, p - pBegin,
2084 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2086 rTheIntURIRef = decode(aSynIntURIRef, cEscapePrefix, eDecodeMechanism,
2087 eCharset);
2088 return bConvert;
2091 // static
2092 INetURLObject::PrefixInfo const * INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2093 sal_Unicode const * pEnd)
2095 static PrefixInfo const aMap[]
2096 = { // dummy entry at front needed, because pLast may point here:
2097 { 0, 0, INET_PROT_NOT_VALID, PrefixInfo::INTERNAL },
2098 { ".component:", "staroffice.component:", INET_PROT_COMPONENT,
2099 PrefixInfo::INTERNAL },
2100 { ".uno:", "staroffice.uno:", INET_PROT_UNO,
2101 PrefixInfo::INTERNAL },
2102 { "cid:", 0, INET_PROT_CID, PrefixInfo::OFFICIAL },
2103 { "data:", 0, INET_PROT_DATA, PrefixInfo::OFFICIAL },
2104 { "db:", "staroffice.db:", INET_PROT_DB, PrefixInfo::INTERNAL },
2105 { "file:", 0, INET_PROT_FILE, PrefixInfo::OFFICIAL },
2106 { "ftp:", 0, INET_PROT_FTP, PrefixInfo::OFFICIAL },
2107 { "hid:", "staroffice.hid:", INET_PROT_HID,
2108 PrefixInfo::INTERNAL },
2109 { "http:", 0, INET_PROT_HTTP, PrefixInfo::OFFICIAL },
2110 { "https:", 0, INET_PROT_HTTPS, PrefixInfo::OFFICIAL },
2111 { "imap:", 0, INET_PROT_IMAP, PrefixInfo::OFFICIAL },
2112 { "javascript:", 0, INET_PROT_JAVASCRIPT, PrefixInfo::OFFICIAL },
2113 { "ldap:", 0, INET_PROT_LDAP, PrefixInfo::OFFICIAL },
2114 { "macro:", "staroffice.macro:", INET_PROT_MACRO,
2115 PrefixInfo::INTERNAL },
2116 { "mailto:", 0, INET_PROT_MAILTO, PrefixInfo::OFFICIAL },
2117 { "news:", 0, INET_PROT_NEWS, PrefixInfo::OFFICIAL },
2118 { "out:", "staroffice.out:", INET_PROT_OUT,
2119 PrefixInfo::INTERNAL },
2120 { "pop3:", "staroffice.pop3:", INET_PROT_POP3,
2121 PrefixInfo::INTERNAL },
2122 { "private:", "staroffice.private:", INET_PROT_PRIV_SOFFICE,
2123 PrefixInfo::INTERNAL },
2124 { "private:factory/", "staroffice.factory:",
2125 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2126 { "private:helpid/", "staroffice.helpid:", INET_PROT_PRIV_SOFFICE,
2127 PrefixInfo::INTERNAL },
2128 { "private:java/", "staroffice.java:", INET_PROT_PRIV_SOFFICE,
2129 PrefixInfo::INTERNAL },
2130 { "private:searchfolder:", "staroffice.searchfolder:",
2131 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2132 { "private:trashcan:", "staroffice.trashcan:",
2133 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2134 { "sftp:", 0, INET_PROT_SFTP, PrefixInfo::OFFICIAL },
2135 { "slot:", "staroffice.slot:", INET_PROT_SLOT,
2136 PrefixInfo::INTERNAL },
2137 { "smb:", 0, INET_PROT_SMB, PrefixInfo::OFFICIAL },
2138 { "staroffice.component:", ".component:", INET_PROT_COMPONENT,
2139 PrefixInfo::EXTERNAL },
2140 { "staroffice.db:", "db:", INET_PROT_DB, PrefixInfo::EXTERNAL },
2141 { "staroffice.factory:", "private:factory/",
2142 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2143 { "staroffice.helpid:", "private:helpid/", INET_PROT_PRIV_SOFFICE,
2144 PrefixInfo::EXTERNAL },
2145 { "staroffice.hid:", "hid:", INET_PROT_HID,
2146 PrefixInfo::EXTERNAL },
2147 { "staroffice.java:", "private:java/", INET_PROT_PRIV_SOFFICE,
2148 PrefixInfo::EXTERNAL },
2149 { "staroffice.macro:", "macro:", INET_PROT_MACRO,
2150 PrefixInfo::EXTERNAL },
2151 { "staroffice.out:", "out:", INET_PROT_OUT,
2152 PrefixInfo::EXTERNAL },
2153 { "staroffice.pop3:", "pop3:", INET_PROT_POP3,
2154 PrefixInfo::EXTERNAL },
2155 { "staroffice.private:", "private:", INET_PROT_PRIV_SOFFICE,
2156 PrefixInfo::EXTERNAL },
2157 { "staroffice.searchfolder:", "private:searchfolder:",
2158 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2159 { "staroffice.slot:", "slot:", INET_PROT_SLOT,
2160 PrefixInfo::EXTERNAL },
2161 { "staroffice.trashcan:", "private:trashcan:",
2162 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2163 { "staroffice.uno:", ".uno:", INET_PROT_UNO,
2164 PrefixInfo::EXTERNAL },
2165 { "staroffice.vim:", "vim:", INET_PROT_VIM,
2166 PrefixInfo::EXTERNAL },
2167 { "staroffice:", "private:", INET_PROT_PRIV_SOFFICE,
2168 PrefixInfo::EXTERNAL },
2169 { "telnet:", 0, INET_PROT_TELNET, PrefixInfo::OFFICIAL },
2170 { "vim:", "staroffice.vim:", INET_PROT_VIM,
2171 PrefixInfo::INTERNAL },
2172 { "vnd.libreoffice.cmis:", 0, INET_PROT_CMIS, PrefixInfo::INTERNAL },
2173 { "vnd.sun.star.cmd:", 0, INET_PROT_VND_SUN_STAR_CMD,
2174 PrefixInfo::OFFICIAL },
2175 { "vnd.sun.star.expand:", 0, INET_PROT_VND_SUN_STAR_EXPAND,
2176 PrefixInfo::OFFICIAL },
2177 { "vnd.sun.star.help:", 0, INET_PROT_VND_SUN_STAR_HELP,
2178 PrefixInfo::OFFICIAL },
2179 { "vnd.sun.star.hier:", 0, INET_PROT_VND_SUN_STAR_HIER,
2180 PrefixInfo::OFFICIAL },
2181 { "vnd.sun.star.pkg:", 0, INET_PROT_VND_SUN_STAR_PKG,
2182 PrefixInfo::OFFICIAL },
2183 { "vnd.sun.star.tdoc:", 0, INET_PROT_VND_SUN_STAR_TDOC,
2184 PrefixInfo::OFFICIAL },
2185 { "vnd.sun.star.webdav:", 0, INET_PROT_VND_SUN_STAR_WEBDAV,
2186 PrefixInfo::OFFICIAL } };
2187 /* This list needs to be sorted, or you'll introduce serious bugs */
2189 PrefixInfo const * pFirst = aMap + 1;
2190 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2191 PrefixInfo const * pMatch = 0;
2192 sal_Unicode const * pMatched = rBegin;
2193 sal_Unicode const * p = rBegin;
2194 sal_Int32 i = 0;
2195 for (; pFirst < pLast; ++i)
2197 if (pFirst->m_pPrefix[i] == '\0')
2199 pMatch = pFirst++;
2200 pMatched = p;
2202 if (p >= pEnd)
2203 break;
2204 sal_uInt32 nChar = INetMIME::toLowerCase(*p++);
2205 while (pFirst <= pLast && sal_uChar(pFirst->m_pPrefix[i]) < nChar)
2206 ++pFirst;
2207 while (pFirst <= pLast && sal_uChar(pLast->m_pPrefix[i]) > nChar)
2208 --pLast;
2210 if (pFirst == pLast)
2212 sal_Char const * q = pFirst->m_pPrefix + i;
2213 while (p < pEnd && *q != '\0'
2214 && INetMIME::toLowerCase(*p) == sal_uChar(*q))
2216 ++p;
2217 ++q;
2219 if (*q == '\0')
2221 rBegin = p;
2222 return pFirst;
2225 rBegin = pMatched;
2226 return pMatch;
2229 sal_Int32 INetURLObject::getAuthorityBegin() const
2231 DBG_ASSERT(getSchemeInfo().m_bAuthority,
2232 "INetURLObject::getAuthority(): Bad scheme");
2233 sal_Int32 nBegin;
2234 if (m_aUser.isPresent())
2235 nBegin = m_aUser.getBegin();
2236 else if (m_aHost.isPresent())
2237 nBegin = m_aHost.getBegin();
2238 else
2239 nBegin = m_aPath.getBegin();
2240 nBegin -= RTL_CONSTASCII_LENGTH("//");
2241 DBG_ASSERT(m_aAbsURIRef.getStr()[nBegin] == '/'
2242 && m_aAbsURIRef.getStr()[nBegin + 1] == '/',
2243 "INetURLObject::getAuthority(): Bad authority");
2244 return nBegin;
2247 INetURLObject::SubString INetURLObject::getAuthority() const
2249 sal_Int32 nBegin = getAuthorityBegin();
2250 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2251 m_aHost.isPresent() ? m_aHost.getEnd() :
2252 m_aAuth.isPresent() ? m_aAuth.getEnd() :
2253 m_aUser.isPresent() ? m_aUser.getEnd() :
2254 nBegin + RTL_CONSTASCII_LENGTH("//");
2255 return SubString(nBegin, nEnd - nBegin);
2258 bool INetURLObject::setUser(OUString const & rTheUser,
2259 bool bOctets, EncodeMechanism eMechanism,
2260 rtl_TextEncoding eCharset)
2262 if (
2263 !getSchemeInfo().m_bUser ||
2264 (m_eScheme == INET_PROT_IMAP && rTheUser.isEmpty())
2267 return false;
2270 OUString aNewUser(encodeText(rTheUser, bOctets,
2271 m_eScheme == INET_PROT_IMAP ?
2272 PART_IMAP_ACHAR :
2273 m_eScheme == INET_PROT_VIM ?
2274 PART_VIM :
2275 PART_USER_PASSWORD,
2276 getEscapePrefix(), eMechanism, eCharset,
2277 false));
2278 sal_Int32 nDelta;
2279 if (m_aUser.isPresent())
2280 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2281 else if (m_aHost.isPresent())
2283 m_aAbsURIRef.insert(m_aHost.getBegin(), sal_Unicode('@'));
2284 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2286 else if (getSchemeInfo().m_bHost)
2287 return false;
2288 else
2289 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2290 m_aAuth += nDelta;
2291 m_aHost += nDelta;
2292 m_aPort += nDelta;
2293 m_aPath += nDelta;
2294 m_aQuery += nDelta;
2295 m_aFragment += nDelta;
2296 return true;
2299 namespace
2301 void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2303 OUString sTemp(rBuf.makeStringAndClear());
2304 rBuf.append(sTemp.replaceAt(index, count, OUString()));
2308 bool INetURLObject::clearPassword()
2310 if (!getSchemeInfo().m_bPassword)
2311 return false;
2312 if (m_aAuth.isPresent())
2314 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2315 m_aAuth.getLength() + 1);
2316 sal_Int32 nDelta = m_aAuth.clear() - 1;
2317 m_aHost += nDelta;
2318 m_aPort += nDelta;
2319 m_aPath += nDelta;
2320 m_aQuery += nDelta;
2321 m_aFragment += nDelta;
2323 return true;
2326 bool INetURLObject::setPassword(OUString const & rThePassword,
2327 bool bOctets, EncodeMechanism eMechanism,
2328 rtl_TextEncoding eCharset)
2330 if (!getSchemeInfo().m_bPassword)
2331 return false;
2332 OUString aNewAuth(encodeText(rThePassword, bOctets,
2333 m_eScheme == INET_PROT_VIM ?
2334 PART_VIM : PART_USER_PASSWORD,
2335 getEscapePrefix(), eMechanism, eCharset,
2336 false));
2337 sal_Int32 nDelta;
2338 if (m_aAuth.isPresent())
2339 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2340 else if (m_aUser.isPresent())
2342 m_aAbsURIRef.insert(m_aUser.getEnd(), sal_Unicode(':'));
2343 nDelta
2344 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2346 else if (m_aHost.isPresent())
2348 m_aAbsURIRef.insert(m_aHost.getBegin(),
2349 OUString( ":@" ));
2350 m_aUser.set(m_aAbsURIRef, OUString(), m_aHost.getBegin());
2351 nDelta
2352 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2354 else if (getSchemeInfo().m_bHost)
2355 return false;
2356 else
2358 m_aAbsURIRef.insert(m_aPath.getBegin(), sal_Unicode(':'));
2359 m_aUser.set(m_aAbsURIRef, OUString(), m_aPath.getBegin());
2360 nDelta
2361 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2363 m_aHost += nDelta;
2364 m_aPort += nDelta;
2365 m_aPath += nDelta;
2366 m_aQuery += nDelta;
2367 m_aFragment += nDelta;
2368 return true;
2371 // static
2372 bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2373 OUString & rCanonic)
2375 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2376 // IPv4 address directly follows the abbreviating "::". The ABNF in
2377 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2378 // mentions "::13:1.68.3". This algorithm accepts both variants:
2379 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2380 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2381 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2382 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2383 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2384 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2385 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2386 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2387 OUStringBuffer aTheCanonic;
2388 sal_uInt32 nNumber = 0;
2389 int nDigits = 0;
2390 int nOctets = 0;
2391 State eState = STATE_INITIAL;
2392 sal_Unicode const * p = rBegin;
2393 for (; p != pEnd; ++p)
2394 switch (eState)
2396 case STATE_INITIAL:
2397 if (*p == '[')
2399 aTheCanonic.append(sal_Unicode('['));
2400 eState = STATE_IP6;
2402 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2403 eState = STATE_TOPLABEL;
2404 else if (rtl::isAsciiDigit(*p))
2406 nNumber = INetMIME::getWeight(*p);
2407 nDigits = 1;
2408 nOctets = 1;
2409 eState = STATE_IP4;
2411 else
2412 goto done;
2413 break;
2415 case STATE_LABEL:
2416 if (*p == '.')
2417 eState = STATE_LABEL_DOT;
2418 else if (*p == '-')
2419 eState = STATE_LABEL_HYPHEN;
2420 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2421 goto done;
2422 break;
2424 case STATE_LABEL_HYPHEN:
2425 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2426 eState = STATE_LABEL;
2427 else if (*p != '-')
2428 goto done;
2429 break;
2431 case STATE_LABEL_DOT:
2432 if (rtl::isAsciiAlpha(*p) || *p == '_')
2433 eState = STATE_TOPLABEL;
2434 else if (rtl::isAsciiDigit(*p))
2435 eState = STATE_LABEL;
2436 else
2437 goto done;
2438 break;
2440 case STATE_TOPLABEL:
2441 if (*p == '.')
2442 eState = STATE_TOPLABEL_DOT;
2443 else if (*p == '-')
2444 eState = STATE_TOPLABEL_HYPHEN;
2445 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2446 goto done;
2447 break;
2449 case STATE_TOPLABEL_HYPHEN:
2450 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2451 eState = STATE_TOPLABEL;
2452 else if (*p != '-')
2453 goto done;
2454 break;
2456 case STATE_TOPLABEL_DOT:
2457 if (rtl::isAsciiAlpha(*p) || *p == '_')
2458 eState = STATE_TOPLABEL;
2459 else if (rtl::isAsciiDigit(*p))
2460 eState = STATE_LABEL;
2461 else
2462 goto done;
2463 break;
2465 case STATE_IP4:
2466 if (*p == '.')
2467 if (nOctets < 4)
2469 aTheCanonic.append(
2470 OUString::valueOf(sal_Int32(nNumber)));
2471 aTheCanonic.append(sal_Unicode('.'));
2472 ++nOctets;
2473 eState = STATE_IP4_DOT;
2475 else
2476 eState = STATE_LABEL_DOT;
2477 else if (*p == '-')
2478 eState = STATE_LABEL_HYPHEN;
2479 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2480 eState = STATE_LABEL;
2481 else if (rtl::isAsciiDigit(*p))
2482 if (nDigits < 3)
2484 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2485 ++nDigits;
2487 else
2488 eState = STATE_LABEL;
2489 else
2490 goto done;
2491 break;
2493 case STATE_IP4_DOT:
2494 if (rtl::isAsciiAlpha(*p) || *p == '_')
2495 eState = STATE_TOPLABEL;
2496 else if (rtl::isAsciiDigit(*p))
2498 nNumber = INetMIME::getWeight(*p);
2499 nDigits = 1;
2500 eState = STATE_IP4;
2502 else
2503 goto done;
2504 break;
2506 case STATE_IP6:
2507 if (*p == ':')
2508 eState = STATE_IP6_COLON;
2509 else if (rtl::isAsciiHexDigit(*p))
2511 nNumber = INetMIME::getHexWeight(*p);
2512 nDigits = 1;
2513 eState = STATE_IP6_HEXSEQ1;
2515 else
2516 goto done;
2517 break;
2519 case STATE_IP6_COLON:
2520 if (*p == ':')
2522 aTheCanonic.append("::");
2523 eState = STATE_IP6_2COLON;
2525 else
2526 goto done;
2527 break;
2529 case STATE_IP6_2COLON:
2530 if (*p == ']')
2531 eState = STATE_IP6_DONE;
2532 else if (*p == ':')
2534 aTheCanonic.append(sal_Unicode(':'));
2535 eState = STATE_IP6_3COLON;
2537 else if (rtl::isAsciiDigit(*p))
2539 nNumber = INetMIME::getWeight(*p);
2540 nDigits = 1;
2541 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2543 else if (rtl::isAsciiHexDigit(*p))
2545 nNumber = INetMIME::getHexWeight(*p);
2546 nDigits = 1;
2547 eState = STATE_IP6_HEXSEQ2;
2549 else
2550 goto done;
2551 break;
2553 case STATE_IP6_3COLON:
2554 if (rtl::isAsciiDigit(*p))
2556 nNumber = INetMIME::getWeight(*p);
2557 nDigits = 1;
2558 nOctets = 1;
2559 eState = STATE_IP6_IP4;
2561 else
2562 goto done;
2563 break;
2565 case STATE_IP6_HEXSEQ1:
2566 if (*p == ']')
2568 aTheCanonic.append(
2569 OUString::valueOf(sal_Int32(nNumber), 16));
2570 eState = STATE_IP6_DONE;
2572 else if (*p == ':')
2574 aTheCanonic.append(
2575 OUString::valueOf(sal_Int32(nNumber), 16));
2576 aTheCanonic.append(sal_Unicode(':'));
2577 eState = STATE_IP6_HEXSEQ1_COLON;
2579 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2581 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2582 ++nDigits;
2584 else
2585 goto done;
2586 break;
2588 case STATE_IP6_HEXSEQ1_COLON:
2589 if (*p == ':')
2591 aTheCanonic.append(sal_Unicode(':'));
2592 eState = STATE_IP6_2COLON;
2594 else if (rtl::isAsciiDigit(*p))
2596 nNumber = INetMIME::getWeight(*p);
2597 nDigits = 1;
2598 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2600 else if (rtl::isAsciiHexDigit(*p))
2602 nNumber = INetMIME::getHexWeight(*p);
2603 nDigits = 1;
2604 eState = STATE_IP6_HEXSEQ1;
2606 else
2607 goto done;
2608 break;
2610 case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2611 if (*p == ']')
2613 aTheCanonic.append(
2614 OUString::valueOf(sal_Int32(nNumber), 16));
2615 eState = STATE_IP6_DONE;
2617 else if (*p == ':')
2619 aTheCanonic.append(
2620 OUString::valueOf(sal_Int32(nNumber), 16));
2621 aTheCanonic.append(sal_Unicode(':'));
2622 eState = STATE_IP6_HEXSEQ1_COLON;
2624 else if (*p == '.')
2626 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2627 + (nNumber & 15);
2628 aTheCanonic.append(
2629 OUString::valueOf(sal_Int32(nNumber)));
2630 aTheCanonic.append(sal_Unicode('.'));
2631 nOctets = 2;
2632 eState = STATE_IP6_IP4_DOT;
2634 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2636 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2637 ++nDigits;
2639 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2641 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2642 ++nDigits;
2643 eState = STATE_IP6_HEXSEQ1;
2645 else
2646 goto done;
2647 break;
2649 case STATE_IP6_HEXSEQ2:
2650 if (*p == ']')
2652 aTheCanonic.append(
2653 OUString::valueOf(sal_Int32(nNumber), 16));
2654 eState = STATE_IP6_DONE;
2656 else if (*p == ':')
2658 aTheCanonic.append(
2659 OUString::valueOf(sal_Int32(nNumber), 16));
2660 aTheCanonic.append(sal_Unicode(':'));
2661 eState = STATE_IP6_HEXSEQ2_COLON;
2663 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2665 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2666 ++nDigits;
2668 else
2669 goto done;
2670 break;
2672 case STATE_IP6_HEXSEQ2_COLON:
2673 if (rtl::isAsciiDigit(*p))
2675 nNumber = INetMIME::getWeight(*p);
2676 nDigits = 1;
2677 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2679 else if (rtl::isAsciiHexDigit(*p))
2681 nNumber = INetMIME::getHexWeight(*p);
2682 nDigits = 1;
2683 eState = STATE_IP6_HEXSEQ2;
2685 else
2686 goto done;
2687 break;
2689 case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2690 if (*p == ']')
2692 aTheCanonic.append(
2693 OUString::valueOf(sal_Int32(nNumber), 16));
2694 eState = STATE_IP6_DONE;
2696 else if (*p == ':')
2698 aTheCanonic.append(
2699 OUString::valueOf(sal_Int32(nNumber), 16));
2700 aTheCanonic.append(sal_Unicode(':'));
2701 eState = STATE_IP6_HEXSEQ2_COLON;
2703 else if (*p == '.')
2705 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2706 + (nNumber & 15);
2707 aTheCanonic.append(
2708 OUString::valueOf(sal_Int32(nNumber)));
2709 aTheCanonic.append(sal_Unicode('.'));
2710 nOctets = 2;
2711 eState = STATE_IP6_IP4_DOT;
2713 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2715 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2716 ++nDigits;
2718 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2720 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2721 ++nDigits;
2722 eState = STATE_IP6_HEXSEQ2;
2724 else
2725 goto done;
2726 break;
2728 case STATE_IP6_IP4:
2729 if (*p == ']')
2730 if (nOctets == 4)
2732 aTheCanonic.append(
2733 OUString::valueOf(sal_Int32(nNumber)));
2734 eState = STATE_IP6_DONE;
2736 else
2737 goto done;
2738 else if (*p == '.')
2739 if (nOctets < 4)
2741 aTheCanonic.append(
2742 OUString::valueOf(sal_Int32(nNumber)));
2743 aTheCanonic.append(sal_Unicode('.'));
2744 ++nOctets;
2745 eState = STATE_IP6_IP4_DOT;
2747 else
2748 goto done;
2749 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2751 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2752 ++nDigits;
2754 else
2755 goto done;
2756 break;
2758 case STATE_IP6_IP4_DOT:
2759 if (rtl::isAsciiDigit(*p))
2761 nNumber = INetMIME::getWeight(*p);
2762 nDigits = 1;
2763 eState = STATE_IP6_IP4;
2765 else
2766 goto done;
2767 break;
2769 case STATE_IP6_DONE:
2770 goto done;
2772 done:
2773 switch (eState)
2775 case STATE_LABEL:
2776 case STATE_TOPLABEL:
2777 case STATE_TOPLABEL_DOT:
2778 aTheCanonic.setLength(0);
2779 aTheCanonic.append(rBegin, p - rBegin);
2780 rBegin = p;
2781 rCanonic = aTheCanonic.makeStringAndClear();
2782 return true;
2784 case STATE_IP4:
2785 if (nOctets == 4)
2787 aTheCanonic.append(
2788 OUString::valueOf(sal_Int32(nNumber)));
2789 rBegin = p;
2790 rCanonic = aTheCanonic.makeStringAndClear();
2791 return true;
2793 return false;
2795 case STATE_IP6_DONE:
2796 aTheCanonic.append(sal_Unicode(']'));
2797 rBegin = p;
2798 rCanonic = aTheCanonic.makeStringAndClear();
2799 return true;
2801 default:
2802 return false;
2806 // static
2807 bool INetURLObject::parseHostOrNetBiosName(
2808 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
2809 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2810 OUStringBuffer* pCanonic)
2812 OUString aTheCanonic;
2813 if (pBegin < pEnd)
2815 sal_Unicode const * p = pBegin;
2816 if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2818 if (bNetBiosName)
2820 OUStringBuffer buf;
2821 while (pBegin < pEnd)
2823 EscapeType eEscapeType;
2824 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, '%',
2825 eMechanism, eCharset,
2826 eEscapeType);
2827 if (!INetMIME::isVisible(nUTF32))
2828 return false;
2829 if (!rtl::isAsciiAlphanumeric(nUTF32))
2830 switch (nUTF32)
2832 case '"':
2833 case '*':
2834 case '+':
2835 case ',':
2836 case '/':
2837 case ':':
2838 case ';':
2839 case '<':
2840 case '=':
2841 case '>':
2842 case '?':
2843 case '[':
2844 case '\\':
2845 case ']':
2846 case '`':
2847 case '|':
2848 return false;
2850 if (pCanonic != NULL) {
2851 appendUCS4(
2852 buf, nUTF32, eEscapeType, bOctets, PART_URIC, '%',
2853 eCharset, true);
2856 aTheCanonic = buf.makeStringAndClear();
2858 else
2859 return false;
2862 if (pCanonic != NULL) {
2863 *pCanonic = aTheCanonic;
2865 return true;
2868 bool INetURLObject::setHost(OUString const & rTheHost, bool bOctets,
2869 EncodeMechanism eMechanism,
2870 rtl_TextEncoding eCharset)
2872 if (!getSchemeInfo().m_bHost)
2873 return false;
2874 OUStringBuffer aSynHost(rTheHost);
2875 bool bNetBiosName = false;
2876 switch (m_eScheme)
2878 case INET_PROT_FILE:
2880 OUString sTemp(aSynHost.toString());
2881 if (sTemp.equalsIgnoreAsciiCase("localhost"))
2883 aSynHost.setLength(0);
2885 bNetBiosName = true;
2887 break;
2888 case INET_PROT_LDAP:
2889 if (aSynHost.getLength() == 0 && m_aPort.isPresent())
2890 return false;
2891 break;
2893 default:
2894 if (aSynHost.getLength() == 0)
2895 return false;
2896 break;
2898 if (!parseHostOrNetBiosName(
2899 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2900 bOctets, eMechanism, eCharset, bNetBiosName, &aSynHost))
2901 return false;
2902 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2903 m_aPort += nDelta;
2904 m_aPath += nDelta;
2905 m_aQuery += nDelta;
2906 m_aFragment += nDelta;
2907 return true;
2910 // static
2911 bool INetURLObject::parsePath(INetProtocol eScheme,
2912 sal_Unicode const ** pBegin,
2913 sal_Unicode const * pEnd,
2914 bool bOctets,
2915 EncodeMechanism eMechanism,
2916 rtl_TextEncoding eCharset,
2917 bool bSkippedInitialSlash,
2918 sal_uInt32 nSegmentDelimiter,
2919 sal_uInt32 nAltSegmentDelimiter,
2920 sal_uInt32 nQueryDelimiter,
2921 sal_uInt32 nFragmentDelimiter,
2922 OUStringBuffer &rSynPath)
2924 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2926 sal_Unicode const * pPos = *pBegin;
2927 OUStringBuffer aTheSynPath;
2929 switch (eScheme)
2931 case INET_PROT_NOT_VALID:
2932 return false;
2934 case INET_PROT_FTP:
2935 case INET_PROT_IMAP:
2936 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2937 return false;
2938 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2940 EscapeType eEscapeType;
2941 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2942 '%', eMechanism,
2943 eCharset, eEscapeType);
2944 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2945 PART_HTTP_PATH, '%', eCharset, true);
2947 if (aTheSynPath.getLength() == 0)
2948 aTheSynPath.append(sal_Unicode('/'));
2949 break;
2951 case INET_PROT_HTTP:
2952 case INET_PROT_VND_SUN_STAR_WEBDAV:
2953 case INET_PROT_HTTPS:
2954 case INET_PROT_SMB:
2955 case INET_PROT_CMIS:
2956 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2957 return false;
2958 while (pPos < pEnd && *pPos != nQueryDelimiter
2959 && *pPos != nFragmentDelimiter)
2961 EscapeType eEscapeType;
2962 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2963 '%', eMechanism,
2964 eCharset, eEscapeType);
2965 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2966 PART_HTTP_PATH, '%', eCharset, true);
2968 if (aTheSynPath.getLength() == 0)
2969 aTheSynPath.append(sal_Unicode('/'));
2970 break;
2972 case INET_PROT_FILE:
2974 if (bSkippedInitialSlash)
2975 aTheSynPath.append(sal_Unicode('/'));
2976 else if (pPos < pEnd
2977 && *pPos != nSegmentDelimiter
2978 && *pPos != nAltSegmentDelimiter)
2979 return false;
2980 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2982 EscapeType eEscapeType;
2983 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2984 '%', eMechanism,
2985 eCharset, eEscapeType);
2986 if (eEscapeType == ESCAPE_NO)
2988 if (nUTF32 == nSegmentDelimiter
2989 || nUTF32 == nAltSegmentDelimiter)
2991 aTheSynPath.append(sal_Unicode('/'));
2992 continue;
2994 else if (nUTF32 == '|'
2995 && (pPos == pEnd
2996 || *pPos == nFragmentDelimiter
2997 || *pPos == nSegmentDelimiter
2998 || *pPos == nAltSegmentDelimiter)
2999 && aTheSynPath.getLength() == 2
3000 && rtl::isAsciiAlpha(aTheSynPath[1]))
3002 // A first segment of <ALPHA "|"> is translated to
3003 // <ALPHA ":">:
3004 aTheSynPath.append(sal_Unicode(':'));
3005 continue;
3008 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3009 PART_PCHAR, '%', eCharset, true);
3011 if (aTheSynPath.getLength() == 0)
3012 aTheSynPath.append(sal_Unicode('/'));
3013 break;
3016 case INET_PROT_MAILTO:
3017 while (pPos < pEnd && *pPos != nQueryDelimiter
3018 && *pPos != nFragmentDelimiter)
3020 EscapeType eEscapeType;
3021 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3022 '%', eMechanism,
3023 eCharset, eEscapeType);
3024 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3025 PART_MAILTO, '%', eCharset, true);
3027 break;
3029 case INET_PROT_NEWS:
3030 if (pPos == pEnd || *pPos == nQueryDelimiter
3031 || *pPos == nFragmentDelimiter)
3032 return false;
3034 // Match <"*">:
3035 if (*pPos == '*'
3036 && (pEnd - pPos == 1 || pPos[1] == nQueryDelimiter
3037 || pPos[1] == nFragmentDelimiter))
3039 ++pPos;
3040 aTheSynPath.append(sal_Unicode('*'));
3041 break;
3044 // Match <group>:
3045 if (rtl::isAsciiAlpha(*pPos))
3047 for (sal_Unicode const * p = pPos + 1;; ++p)
3049 if (p == pEnd || *p == nQueryDelimiter
3050 || *p == nFragmentDelimiter)
3052 aTheSynPath.setLength(0);
3053 aTheSynPath.append(pPos, p - pPos);
3054 pPos = p;
3055 goto done;
3057 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+'
3058 && *p != '-' && *p != '.' && *p != '_')
3060 break;
3065 // Match <article>:
3066 for (;;)
3068 if (pPos == pEnd || *pPos == nQueryDelimiter
3069 || *pPos == nFragmentDelimiter)
3070 return false;
3071 if (*pPos == '@')
3072 break;
3073 EscapeType eEscapeType;
3074 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, '%',
3075 eMechanism, eCharset, eEscapeType);
3076 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3077 PART_NEWS_ARTICLE_LOCALPART, '%', eCharset, true);
3079 if (aTheSynPath.getLength() == 0)
3080 return false;
3081 ++pPos;
3082 aTheSynPath.append(sal_Unicode('@'));
3084 sal_Unicode const * p = pPos;
3085 while (p < pEnd && *pPos != nQueryDelimiter
3086 && *pPos != nFragmentDelimiter)
3087 ++p;
3088 OUString aCanonic;
3089 if (!parseHost(pPos, p, aCanonic))
3090 return false;
3091 aTheSynPath.append(aCanonic);
3094 done:
3095 break;
3097 case INET_PROT_POP3:
3098 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3100 EscapeType eEscapeType;
3101 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3102 '%', eMechanism,
3103 eCharset, eEscapeType);
3104 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3105 PART_MESSAGE_ID_PATH, '%', eCharset,
3106 true);
3108 break;
3110 case INET_PROT_PRIV_SOFFICE:
3111 case INET_PROT_SLOT:
3112 case INET_PROT_HID:
3113 case INET_PROT_MACRO:
3114 case INET_PROT_UNO:
3115 case INET_PROT_COMPONENT:
3116 case INET_PROT_LDAP:
3117 while (pPos < pEnd && *pPos != nQueryDelimiter
3118 && *pPos != nFragmentDelimiter)
3120 EscapeType eEscapeType;
3121 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3122 '%', eMechanism,
3123 eCharset, eEscapeType);
3124 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3125 PART_PATH_BEFORE_QUERY, '%', eCharset,
3126 true);
3128 break;
3130 case INET_PROT_VND_SUN_STAR_HELP:
3131 if (pPos == pEnd
3132 || *pPos == nQueryDelimiter
3133 || *pPos == nFragmentDelimiter)
3134 aTheSynPath.append(sal_Unicode('/'));
3135 else
3137 if (*pPos != '/')
3138 return false;
3139 while (pPos < pEnd && *pPos != nQueryDelimiter
3140 && *pPos != nFragmentDelimiter)
3142 EscapeType eEscapeType;
3143 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3144 '%', eMechanism,
3145 eCharset, eEscapeType);
3146 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3147 PART_HTTP_PATH, '%', eCharset, true);
3150 break;
3152 case INET_PROT_JAVASCRIPT:
3153 case INET_PROT_DATA:
3154 case INET_PROT_CID:
3155 case INET_PROT_DB:
3156 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3158 EscapeType eEscapeType;
3159 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3160 '%', eMechanism,
3161 eCharset, eEscapeType);
3162 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3163 PART_URIC, '%', eCharset, true);
3165 break;
3167 case INET_PROT_OUT:
3168 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '~')
3169 return false;
3170 aTheSynPath.append("/~");
3171 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3173 EscapeType eEscapeType;
3174 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3175 '%', eMechanism,
3176 eCharset, eEscapeType);
3177 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3178 PART_URIC, '%', eCharset, true);
3180 break;
3182 case INET_PROT_VND_SUN_STAR_HIER:
3183 case INET_PROT_VND_SUN_STAR_PKG:
3184 if (pPos < pEnd && *pPos != '/'
3185 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3186 return false;
3187 while (pPos < pEnd && *pPos != nQueryDelimiter
3188 && *pPos != nFragmentDelimiter)
3190 EscapeType eEscapeType;
3191 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3192 '%', eMechanism,
3193 eCharset, eEscapeType);
3194 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3195 aTheSynPath.append(sal_Unicode('/'));
3196 else
3197 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3198 PART_PCHAR, '%', eCharset, false);
3200 if (aTheSynPath.getLength() == 0)
3201 aTheSynPath.append(sal_Unicode('/'));
3202 break;
3204 case INET_PROT_VIM:
3206 sal_Unicode const * pPathEnd = pPos;
3207 while (pPathEnd < pEnd && *pPathEnd != nFragmentDelimiter)
3208 ++pPathEnd;
3209 aTheSynPath.append(sal_Unicode('/'));
3210 if (pPos == pPathEnd)
3211 break;
3212 else if (*pPos++ != '/')
3213 return false;
3214 if (pPos == pPathEnd)
3215 break;
3216 while (pPos < pPathEnd && *pPos != '/')
3218 EscapeType eEscapeType;
3219 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3220 '=', eMechanism,
3221 eCharset, eEscapeType);
3222 appendUCS4(aTheSynPath,
3223 eEscapeType == ESCAPE_NO ?
3224 INetMIME::toLowerCase(nUTF32) : nUTF32,
3225 eEscapeType, bOctets, PART_VIM, '=',
3226 eCharset, false);
3228 bool bInbox;
3229 OUString sCompare(aTheSynPath.toString());
3230 if ( sCompare == "/inbox" )
3231 bInbox = true;
3232 else if ( sCompare == "/newsgroups" )
3233 bInbox = false;
3234 else
3235 return false;
3236 aTheSynPath.append(sal_Unicode('/'));
3237 if (pPos == pPathEnd)
3238 break;
3239 else if (*pPos++ != '/')
3240 return false;
3241 if (!bInbox)
3243 bool bEmpty = true;
3244 while (pPos < pPathEnd && *pPos != '/')
3246 EscapeType eEscapeType;
3247 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3248 '=', eMechanism,
3249 eCharset, eEscapeType);
3250 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3251 PART_VIM, '=', eCharset, false);
3252 bEmpty = false;
3254 if (bEmpty)
3255 return false;
3256 aTheSynPath.append(sal_Unicode('/'));
3257 if (pPos == pPathEnd)
3258 break;
3259 else if (*pPos++ != '/')
3260 return false;
3262 bool bEmpty = true;
3263 while (pPos < pPathEnd && *pPos != ':')
3265 EscapeType eEscapeType;
3266 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3267 '=', eMechanism,
3268 eCharset, eEscapeType);
3269 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3270 PART_VIM, '=', eCharset, false);
3271 bEmpty = false;
3273 if (bEmpty)
3274 return false;
3275 if (pPos == pPathEnd)
3276 break;
3277 else if (*pPos++ != ':')
3278 return false;
3279 aTheSynPath.append(sal_Unicode(':'));
3280 for (int i = 0; i < 3; ++i)
3282 if (i != 0)
3284 if (pPos == pPathEnd || *pPos++ != '.')
3285 return false;
3286 aTheSynPath.append(sal_Unicode('.'));
3288 bEmpty = true;
3289 while (pPos < pPathEnd && *pPos != '.')
3291 EscapeType eEscapeType;
3292 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3293 '=', eMechanism,
3294 eCharset, eEscapeType);
3295 if (!rtl::isAsciiDigit(nUTF32))
3296 return false;
3297 aTheSynPath.append(sal_Unicode(nUTF32));
3298 bEmpty = false;
3300 if (bEmpty)
3301 return false;
3303 if (pPos != pPathEnd)
3304 return false;
3305 break;
3308 case INET_PROT_VND_SUN_STAR_CMD:
3309 case INET_PROT_VND_SUN_STAR_EXPAND:
3311 if (pPos == pEnd || *pPos == nFragmentDelimiter)
3312 return false;
3313 Part ePart = PART_URIC_NO_SLASH;
3314 while (pPos != pEnd && *pPos != nFragmentDelimiter)
3316 EscapeType eEscapeType;
3317 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3318 '%', eMechanism,
3319 eCharset, eEscapeType);
3320 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, ePart,
3321 '%', eCharset, true);
3322 ePart = PART_URIC;
3324 break;
3327 case INET_PROT_TELNET:
3328 if (pPos < pEnd)
3330 if (*pPos != '/' || pEnd - pPos > 1)
3331 return false;
3332 ++pPos;
3334 aTheSynPath.append(sal_Unicode('/'));
3335 break;
3337 case INET_PROT_VND_SUN_STAR_TDOC:
3338 if (pPos == pEnd || *pPos != '/')
3339 return false;
3340 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3342 EscapeType eEscapeType;
3343 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3344 '%', eMechanism,
3345 eCharset, eEscapeType);
3346 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3347 aTheSynPath.append(sal_Unicode('/'));
3348 else
3349 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3350 PART_PCHAR, '%', eCharset, false);
3352 break;
3354 case INET_PROT_GENERIC:
3355 case INET_PROT_SFTP:
3356 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3358 EscapeType eEscapeType;
3359 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3360 '%', eMechanism,
3361 eCharset, eEscapeType);
3362 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3363 PART_URIC, '%', eCharset, true);
3365 if (aTheSynPath.getLength() == 0)
3366 return false;
3367 break;
3368 default:
3369 OSL_ASSERT(false);
3370 break;
3373 *pBegin = pPos;
3374 rSynPath = aTheSynPath;
3375 return true;
3378 bool INetURLObject::setPath(OUString const & rThePath, bool bOctets,
3379 EncodeMechanism eMechanism,
3380 rtl_TextEncoding eCharset)
3382 OUStringBuffer aSynPath;
3383 sal_Unicode const * p = rThePath.getStr();
3384 sal_Unicode const * pEnd = p + rThePath.getLength();
3385 if (!parsePath(m_eScheme, &p, pEnd, bOctets, eMechanism, eCharset, false,
3386 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3387 || p != pEnd)
3388 return false;
3389 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3390 m_aQuery += nDelta;
3391 m_aFragment += nDelta;
3392 return true;
3395 bool INetURLObject::checkHierarchical() const {
3396 if (m_eScheme == INET_PROT_VND_SUN_STAR_EXPAND) {
3397 OSL_FAIL(
3398 "INetURLObject::checkHierarchical vnd.sun.star.expand");
3399 return true;
3400 } else {
3401 return getSchemeInfo().m_bHierarchical;
3405 bool INetURLObject::appendSegment(OUString const & rTheSegment,
3406 bool bOctets, EncodeMechanism eMechanism,
3407 rtl_TextEncoding eCharset)
3409 return insertName(rTheSegment, bOctets, false, LAST_SEGMENT, true,
3410 eMechanism, eCharset);
3413 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3414 bool bIgnoreFinalSlash)
3415 const
3417 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3418 "INetURLObject::getSegment(): Bad index");
3420 if (!checkHierarchical())
3421 return SubString();
3423 sal_Unicode const * pPathBegin
3424 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3425 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3426 sal_Unicode const * pSegBegin;
3427 sal_Unicode const * pSegEnd;
3428 if (nIndex == LAST_SEGMENT)
3430 pSegEnd = pPathEnd;
3431 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3432 --pSegEnd;
3433 if (pSegEnd <= pPathBegin)
3434 return SubString();
3435 pSegBegin = pSegEnd - 1;
3436 while (pSegBegin > pPathBegin && *pSegBegin != '/')
3437 --pSegBegin;
3439 else
3441 pSegBegin = pPathBegin;
3442 while (nIndex-- > 0)
3445 ++pSegBegin;
3446 if (pSegBegin >= pPathEnd)
3447 return SubString();
3449 while (*pSegBegin != '/');
3450 pSegEnd = pSegBegin + 1;
3451 while (pSegEnd < pPathEnd && *pSegEnd != '/')
3452 ++pSegEnd;
3455 return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3456 pSegEnd - pSegBegin);
3459 bool INetURLObject::insertName(OUString const & rTheName, bool bOctets,
3460 bool bAppendFinalSlash, sal_Int32 nIndex,
3461 bool bIgnoreFinalSlash,
3462 EncodeMechanism eMechanism,
3463 rtl_TextEncoding eCharset)
3465 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3466 "INetURLObject::insertName(): Bad index");
3468 if (!checkHierarchical())
3469 return false;
3471 sal_Unicode const * pPathBegin
3472 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3473 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3474 sal_Unicode const * pPrefixEnd;
3475 bool bInsertSlash;
3476 sal_Unicode const * pSuffixBegin;
3477 if (nIndex == LAST_SEGMENT)
3479 pPrefixEnd = pPathEnd;
3480 if (bIgnoreFinalSlash && pPrefixEnd > pPathBegin &&
3481 pPrefixEnd[-1] == '/')
3483 --pPrefixEnd;
3485 bInsertSlash = bAppendFinalSlash;
3486 pSuffixBegin = pPathEnd;
3488 else if (nIndex == 0)
3490 pPrefixEnd = pPathBegin;
3491 bInsertSlash =
3492 (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3493 (pPathBegin == pPathEnd && bAppendFinalSlash);
3494 pSuffixBegin =
3495 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3496 !bAppendFinalSlash && bIgnoreFinalSlash)
3497 ? pPathEnd : pPathBegin;
3499 else
3501 pPrefixEnd = pPathBegin;
3502 sal_Unicode const * pEnd = pPathEnd;
3503 if (bIgnoreFinalSlash && pEnd > pPathBegin && pEnd[-1] == '/')
3504 --pEnd;
3505 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3506 bInsertSlash = false;
3507 pSuffixBegin = pPathEnd;
3508 while (nIndex-- > 0)
3509 for (;;)
3511 if (bSkip)
3512 ++pPrefixEnd;
3513 bSkip = true;
3514 if (pPrefixEnd >= pEnd)
3516 if (nIndex == 0)
3518 bInsertSlash = bAppendFinalSlash;
3519 break;
3521 else
3522 return false;
3524 if (*pPrefixEnd == '/')
3526 pSuffixBegin = pPrefixEnd;
3527 break;
3532 OUStringBuffer aNewPath;
3533 aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3534 aNewPath.append(sal_Unicode('/'));
3535 aNewPath.append(encodeText(rTheName, bOctets, PART_PCHAR, getEscapePrefix(),
3536 eMechanism, eCharset, true));
3537 if (bInsertSlash) {
3538 aNewPath.append(sal_Unicode('/'));
3540 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3542 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
3543 RTL_TEXTENCODING_UTF8);
3546 bool INetURLObject::clearQuery()
3548 if (HasError())
3549 return false;
3550 if (m_aQuery.isPresent())
3552 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3553 m_aQuery.getLength() + 1);
3554 m_aFragment += m_aQuery.clear() - 1;
3556 return false;
3559 bool INetURLObject::setQuery(OUString const & rTheQuery, bool bOctets,
3560 EncodeMechanism eMechanism,
3561 rtl_TextEncoding eCharset)
3563 if (!getSchemeInfo().m_bQuery)
3564 return false;
3565 OUString aNewQuery(encodeText(rTheQuery, bOctets, PART_URIC,
3566 getEscapePrefix(), eMechanism, eCharset,
3567 true));
3568 sal_Int32 nDelta;
3569 if (m_aQuery.isPresent())
3570 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3571 else
3573 m_aAbsURIRef.insert(m_aPath.getEnd(), sal_Unicode('?'));
3574 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3575 + 1;
3577 m_aFragment += nDelta;
3578 return true;
3581 bool INetURLObject::clearFragment()
3583 if (HasError())
3584 return false;
3585 if (m_aFragment.isPresent())
3587 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3588 m_aFragment.clear();
3590 return true;
3593 bool INetURLObject::setFragment(OUString const & rTheFragment,
3594 bool bOctets, EncodeMechanism eMechanism,
3595 rtl_TextEncoding eCharset)
3597 if (HasError())
3598 return false;
3599 OUString aNewFragment(encodeText(rTheFragment, bOctets, PART_URIC,
3600 getEscapePrefix(), eMechanism,
3601 eCharset, true));
3602 if (m_aFragment.isPresent())
3603 m_aFragment.set(m_aAbsURIRef, aNewFragment);
3604 else
3606 m_aAbsURIRef.append(sal_Unicode('#'));
3607 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3609 return true;
3612 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3614 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3615 return (eStyle & FSYS_DOS) != 0
3616 && m_aPath.getLength() >= 3
3617 && p[0] == '/'
3618 && rtl::isAsciiAlpha(p[1])
3619 && p[2] == ':'
3620 && (m_aPath.getLength() == 3 || p[3] == '/');
3623 // static
3624 OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3625 sal_Unicode const * pEnd, bool bOctets,
3626 Part ePart, sal_Char cEscapePrefix,
3627 EncodeMechanism eMechanism,
3628 rtl_TextEncoding eCharset,
3629 bool bKeepVisibleEscapes)
3631 OUStringBuffer aResult;
3632 while (pBegin < pEnd)
3634 EscapeType eEscapeType;
3635 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, cEscapePrefix,
3636 eMechanism, eCharset, eEscapeType);
3637 appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart,
3638 cEscapePrefix, eCharset, bKeepVisibleEscapes);
3640 return aResult.makeStringAndClear();
3643 // static
3644 OUString INetURLObject::decode(sal_Unicode const * pBegin,
3645 sal_Unicode const * pEnd,
3646 sal_Char cEscapePrefix,
3647 DecodeMechanism eMechanism,
3648 rtl_TextEncoding eCharset)
3650 switch (eMechanism)
3652 case NO_DECODE:
3653 return OUString(pBegin, pEnd - pBegin);
3655 case DECODE_TO_IURI:
3656 eCharset = RTL_TEXTENCODING_UTF8;
3657 break;
3659 default:
3660 break;
3662 OUStringBuffer aResult;
3663 while (pBegin < pEnd)
3665 EscapeType eEscapeType;
3666 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false, cEscapePrefix,
3667 WAS_ENCODED, eCharset, eEscapeType);
3668 switch (eEscapeType)
3670 case ESCAPE_NO:
3671 aResult.appendUtf32(nUTF32);
3672 break;
3674 case ESCAPE_OCTET:
3675 appendEscape(aResult, cEscapePrefix, nUTF32);
3676 break;
3678 case ESCAPE_UTF32:
3679 if (
3680 INetMIME::isUSASCII(nUTF32) &&
3682 eMechanism == DECODE_TO_IURI ||
3684 eMechanism == DECODE_UNAMBIGUOUS &&
3685 mustEncode(nUTF32, PART_UNAMBIGUOUS)
3690 appendEscape(aResult, cEscapePrefix, nUTF32);
3692 else
3693 aResult.appendUtf32(nUTF32);
3694 break;
3697 return aResult.makeStringAndClear();
3700 OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3701 rtl_TextEncoding eCharset) const
3703 INetURLObject aTemp(*this);
3704 aTemp.clearPassword();
3705 return aTemp.GetMainURL(eMechanism, eCharset);
3708 OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3709 rtl_TextEncoding eCharset) const
3711 INetURLObject aTemp(*this);
3712 aTemp.clearFragment();
3713 return aTemp.GetMainURL(eMechanism, eCharset);
3716 OUString
3717 INetURLObject::getAbbreviated(
3718 star::uno::Reference< star::util::XStringWidth > const & rStringWidth,
3719 sal_Int32 nWidth,
3720 DecodeMechanism eMechanism,
3721 rtl_TextEncoding eCharset)
3722 const
3724 OSL_ENSURE(rStringWidth.is(), "specification violation");
3725 sal_Char cEscapePrefix = getEscapePrefix();
3726 OUStringBuffer aBuffer;
3727 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3728 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3729 if (m_eScheme != INET_PROT_GENERIC)
3731 aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3733 else
3735 if (m_aAbsURIRef.getLength() != 0)
3737 sal_Unicode const * pSchemeBegin
3738 = m_aAbsURIRef.getStr();
3739 sal_Unicode const * pSchemeEnd = pSchemeBegin;
3741 while (pSchemeEnd[0] != ':')
3743 ++pSchemeEnd;
3745 aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3748 aBuffer.append(static_cast< sal_Unicode >(':'));
3749 bool bAuthority = getSchemeInfo().m_bAuthority;
3750 sal_Unicode const * pCoreBegin
3751 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3752 m_aPath.getBegin());
3753 sal_Unicode const * pCoreEnd
3754 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3755 bool bSegment = false;
3756 if (getSchemeInfo().m_bHierarchical)
3758 OUString aRest;
3759 if (m_aQuery.isPresent())
3760 aRest = "?...";
3761 else if (m_aFragment.isPresent())
3762 aRest = "#...";
3763 OUStringBuffer aTrailer;
3764 sal_Unicode const * pBegin = pCoreBegin;
3765 sal_Unicode const * pEnd = pCoreEnd;
3766 sal_Unicode const * pPrefixBegin = pBegin;
3767 sal_Unicode const * pSuffixEnd = pEnd;
3768 bool bPrefix = true;
3769 bool bSuffix = true;
3772 if (bSuffix)
3774 sal_Unicode const * p = pSuffixEnd - 1;
3775 if (pSuffixEnd == pCoreEnd && *p == '/')
3776 --p;
3777 while (*p != '/')
3778 --p;
3779 if (bAuthority && p == pCoreBegin + 1)
3780 --p;
3781 OUString
3782 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3783 1 : 0),
3784 pSuffixEnd,
3785 cEscapePrefix,
3786 eMechanism,
3787 eCharset));
3788 pSuffixEnd = p;
3789 OUStringBuffer aResult(aBuffer);
3790 if (pSuffixEnd != pBegin)
3791 aResult.append("...");
3792 aResult.append(aSegment);
3793 aResult.append(aTrailer.toString());
3794 aResult.append(aRest);
3795 if (rStringWidth->
3796 queryStringWidth(aResult.makeStringAndClear())
3797 <= nWidth)
3799 aTrailer.insert(0, aSegment);
3800 bSegment = true;
3801 pEnd = pSuffixEnd;
3803 else
3804 bSuffix = false;
3805 if (pPrefixBegin > pSuffixEnd)
3806 pPrefixBegin = pSuffixEnd;
3807 if (pBegin == pEnd)
3808 break;
3810 if (bPrefix)
3812 sal_Unicode const * p
3813 = pPrefixBegin
3814 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3816 OSL_ASSERT(p <= pEnd);
3817 while (p < pEnd && *p != '/')
3818 ++p;
3819 if (p == pCoreEnd - 1 && *p == '/')
3820 ++p;
3821 OUString
3822 aSegment(decode(pPrefixBegin
3823 + (pPrefixBegin == pCoreBegin ? 0 :
3825 p == pEnd ? p : p + 1,
3826 cEscapePrefix,
3827 eMechanism,
3828 eCharset));
3829 pPrefixBegin = p;
3830 OUStringBuffer aResult(aBuffer);
3831 aResult.append(aSegment);
3832 if (pPrefixBegin != pEnd)
3833 aResult.append("...");
3834 aResult.append(aTrailer.toString());
3835 aResult.append(aRest);
3836 if (rStringWidth->
3837 queryStringWidth(aResult.makeStringAndClear())
3838 <= nWidth)
3840 aBuffer.append(aSegment);
3841 bSegment = true;
3842 pBegin = pPrefixBegin;
3844 else
3845 bPrefix = false;
3846 if (pPrefixBegin > pSuffixEnd)
3847 pSuffixEnd = pPrefixBegin;
3848 if (pBegin == pEnd)
3849 break;
3852 while (bPrefix || bSuffix);
3853 if (bSegment)
3855 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3856 aBuffer.append("...");
3857 aBuffer.append(aTrailer.toString());
3860 if (!bSegment)
3861 aBuffer.append(decode(pCoreBegin,
3862 pCoreEnd,
3863 cEscapePrefix,
3864 eMechanism,
3865 eCharset));
3866 if (m_aQuery.isPresent())
3868 aBuffer.append(static_cast< sal_Unicode >('?'));
3869 aBuffer.append(decode(m_aQuery, cEscapePrefix, eMechanism, eCharset));
3871 if (m_aFragment.isPresent())
3873 aBuffer.append(static_cast< sal_Unicode >('#'));
3874 aBuffer.
3875 append(decode(m_aFragment, cEscapePrefix, eMechanism, eCharset));
3877 if (aBuffer.getLength() != 0)
3879 OUStringBuffer aResult(aBuffer);
3880 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3881 > nWidth)
3882 for (sal_Int32 i = aBuffer.getLength();;)
3884 if (i == 0)
3886 aBuffer.setLength(aBuffer.getLength() - 1);
3887 if (aBuffer.getLength() == 0)
3888 break;
3890 else
3892 aBuffer.setLength(--i);
3893 aBuffer.append("...");
3895 aResult = aBuffer;
3896 if (rStringWidth->
3897 queryStringWidth(aResult.makeStringAndClear())
3898 <= nWidth)
3899 break;
3902 return aBuffer.makeStringAndClear();
3905 bool INetURLObject::operator ==(INetURLObject const & rObject) const
3907 if (m_eScheme != rObject.m_eScheme)
3908 return false;
3909 if (m_eScheme == INET_PROT_NOT_VALID)
3910 return m_aAbsURIRef.toString() == rObject.m_aAbsURIRef.toString();
3911 if ((m_aScheme.compare(
3912 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
3913 != 0)
3914 || GetUser(NO_DECODE) != rObject.GetUser(NO_DECODE)
3915 || GetPass(NO_DECODE) != rObject.GetPass(NO_DECODE)
3916 || !GetHost(NO_DECODE).equalsIgnoreAsciiCase(
3917 rObject.GetHost(NO_DECODE))
3918 || GetPort() != rObject.GetPort()
3919 || HasParam() != rObject.HasParam()
3920 || GetParam(NO_DECODE) != rObject.GetParam(NO_DECODE)
3921 || GetMsgId(NO_DECODE) != rObject.GetMsgId(NO_DECODE))
3922 return false;
3923 OUString aPath1(GetURLPath(NO_DECODE));
3924 OUString aPath2(rObject.GetURLPath(NO_DECODE));
3925 switch (m_eScheme)
3927 case INET_PROT_FILE:
3929 // If the URL paths of two file URLs only differ in that one has a
3930 // final '/' and the other has not, take the two paths as
3931 // equivalent (this could be useful for other schemes, too):
3932 sal_Int32 nLength = aPath1.getLength();
3933 switch (nLength - aPath2.getLength())
3935 case -1:
3936 if (aPath2.getStr()[nLength] != '/')
3937 return false;
3938 break;
3940 case 0:
3941 break;
3943 case 1:
3944 if (aPath1.getStr()[--nLength] != '/')
3945 return false;
3946 break;
3948 default:
3949 return false;
3951 return aPath1.compareTo(aPath2, nLength) == 0;
3954 default:
3955 return (aPath1 == aPath2) != false;
3959 bool INetURLObject::operator <(INetURLObject const & rObject) const
3961 sal_Int32 nCompare = m_aScheme.compare(
3962 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef);
3963 if (nCompare < 0) {
3964 return true;
3965 } else if (nCompare > 0) {
3966 return false;
3968 sal_uInt32 nPort1 = GetPort();
3969 sal_uInt32 nPort2 = rObject.GetPort();
3970 if (nPort1 < nPort2)
3971 return true;
3972 else if (nPort1 > nPort2)
3973 return false;
3974 nCompare = GetUser(NO_DECODE).compareTo(rObject.GetUser(NO_DECODE));
3975 if (nCompare < 0)
3976 return true;
3977 else if (nCompare > 0)
3978 return false;
3979 nCompare = GetPass(NO_DECODE).compareTo(rObject.GetPass(NO_DECODE));
3980 if (nCompare < 0)
3981 return true;
3982 else if (nCompare > 0)
3983 return false;
3984 nCompare = GetHost(NO_DECODE).compareTo(rObject.GetHost(NO_DECODE));
3985 if (nCompare < 0)
3986 return true;
3987 else if (nCompare > 0)
3988 return false;
3989 const OUString &rPath1(GetURLPath(NO_DECODE));
3990 const OUString &rPath2(rObject.GetURLPath(NO_DECODE));
3991 nCompare = rPath1.compareTo(rPath2);
3992 if (nCompare < 0)
3993 return true;
3994 else if (nCompare > 0)
3995 return false;
3996 nCompare = GetParam(NO_DECODE).compareTo(rObject.GetParam(NO_DECODE));
3997 if (nCompare < 0)
3998 return true;
3999 else if (nCompare > 0)
4000 return false;
4001 return GetMsgId(NO_DECODE).compareTo(rObject.GetMsgId(NO_DECODE)) < 0;
4004 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
4005 OUString const & rTheUser,
4006 OUString const & rThePassword,
4007 OUString const & rTheHost,
4008 sal_uInt32 nThePort,
4009 OUString const & rThePath,
4010 EncodeMechanism eMechanism,
4011 rtl_TextEncoding eCharset)
4013 setInvalid();
4014 m_eScheme = eTheScheme;
4015 if (HasError() || m_eScheme == INET_PROT_GENERIC)
4016 return false;
4017 m_aAbsURIRef.setLength(0);
4018 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
4019 m_aAbsURIRef.append(sal_Unicode(':'));
4020 if (getSchemeInfo().m_bAuthority)
4022 m_aAbsURIRef.append("//");
4023 bool bUserInfo = false;
4024 if (getSchemeInfo().m_bUser)
4026 if (m_eScheme == INET_PROT_IMAP && rTheUser.isEmpty())
4028 setInvalid();
4029 return false;
4031 if (!rTheUser.isEmpty())
4033 m_aUser.set(m_aAbsURIRef,
4034 encodeText(rTheUser, false,
4035 m_eScheme == INET_PROT_IMAP ?
4036 PART_IMAP_ACHAR :
4037 m_eScheme == INET_PROT_VIM ?
4038 PART_VIM :
4039 PART_USER_PASSWORD,
4040 getEscapePrefix(), eMechanism,
4041 eCharset, false),
4042 m_aAbsURIRef.getLength());
4043 bUserInfo = true;
4046 else if (!rTheUser.isEmpty())
4048 setInvalid();
4049 return false;
4051 if (!rThePassword.isEmpty())
4053 if (getSchemeInfo().m_bPassword)
4055 m_aAbsURIRef.append(sal_Unicode(':'));
4056 m_aAuth.set(m_aAbsURIRef,
4057 encodeText(rThePassword, false,
4058 m_eScheme == INET_PROT_VIM ?
4059 PART_VIM : PART_USER_PASSWORD,
4060 getEscapePrefix(), eMechanism,
4061 eCharset, false),
4062 m_aAbsURIRef.getLength());
4063 bUserInfo = true;
4065 else
4067 setInvalid();
4068 return false;
4071 if (bUserInfo && getSchemeInfo().m_bHost)
4072 m_aAbsURIRef.append(sal_Unicode('@'));
4073 if (getSchemeInfo().m_bHost)
4075 OUStringBuffer aSynHost(rTheHost);
4076 bool bNetBiosName = false;
4077 switch (m_eScheme)
4079 case INET_PROT_FILE:
4081 OUString sTemp(aSynHost.toString());
4082 if (sTemp.equalsIgnoreAsciiCase( "localhost" ))
4084 aSynHost.setLength(0);
4086 bNetBiosName = true;
4088 break;
4090 case INET_PROT_LDAP:
4091 if (aSynHost.getLength() == 0 && nThePort != 0)
4093 setInvalid();
4094 return false;
4096 break;
4098 default:
4099 if (aSynHost.getLength() == 0)
4101 setInvalid();
4102 return false;
4104 break;
4106 if (!parseHostOrNetBiosName(
4107 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
4108 false, eMechanism, eCharset, bNetBiosName, &aSynHost))
4110 setInvalid();
4111 return false;
4113 m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
4114 m_aAbsURIRef.getLength());
4115 if (nThePort != 0)
4117 if (getSchemeInfo().m_bPort)
4119 m_aAbsURIRef.append(sal_Unicode(':'));
4120 m_aPort.set(m_aAbsURIRef,
4121 OUString::valueOf(sal_Int64(nThePort)),
4122 m_aAbsURIRef.getLength());
4124 else
4126 setInvalid();
4127 return false;
4131 else if (!rTheHost.isEmpty() || nThePort != 0)
4133 setInvalid();
4134 return false;
4137 OUStringBuffer aSynPath;
4138 sal_Unicode const * p = rThePath.getStr();
4139 sal_Unicode const * pEnd = p + rThePath.getLength();
4140 if (!parsePath(m_eScheme, &p, pEnd, false, eMechanism, eCharset, false, '/',
4141 0x80000000, 0x80000000, 0x80000000, aSynPath)
4142 || p != pEnd)
4144 setInvalid();
4145 return false;
4147 m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
4148 m_aAbsURIRef.getLength());
4149 return true;
4152 // static
4153 OUString INetURLObject::GetAbsURL(OUString const & rTheBaseURIRef,
4154 OUString const & rTheRelURIRef,
4155 bool bIgnoreFragment,
4156 EncodeMechanism eEncodeMechanism,
4157 DecodeMechanism eDecodeMechanism,
4158 rtl_TextEncoding eCharset,
4159 FSysStyle eStyle)
4161 // Backwards compatibility:
4162 if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#')
4163 return rTheRelURIRef;
4165 INetURLObject aTheAbsURIRef;
4166 bool bWasAbsolute;
4167 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
4168 convertRelToAbs(rTheRelURIRef, false, aTheAbsURIRef,
4169 bWasAbsolute, eEncodeMechanism,
4170 eCharset, bIgnoreFragment, false,
4171 false, eStyle)
4172 || eEncodeMechanism != WAS_ENCODED
4173 || eDecodeMechanism != DECODE_TO_IURI
4174 || eCharset != RTL_TEXTENCODING_UTF8 ?
4175 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
4176 rTheRelURIRef;
4179 OUString INetURLObject::getExternalURL(DecodeMechanism eMechanism,
4180 rtl_TextEncoding eCharset) const
4182 OUString aTheExtURIRef;
4183 translateToExternal(
4184 m_aAbsURIRef.toString(), aTheExtURIRef, eMechanism, eCharset);
4185 return aTheExtURIRef;
4188 // static
4189 OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
4191 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
4194 // static
4195 OUString INetURLObject::GetSchemeName(INetProtocol eTheScheme)
4197 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pScheme);
4200 // static
4201 INetProtocol INetURLObject::CompareProtocolScheme(OUString const &
4202 rTheAbsURIRef)
4204 sal_Unicode const * p = rTheAbsURIRef.getStr();
4205 PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
4206 return pPrefix ? pPrefix->m_eScheme : INET_PROT_NOT_VALID;
4209 OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
4210 rtl_TextEncoding eCharset)
4212 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
4213 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
4214 if (!getSchemeInfo().m_bHost)
4215 return OUString();
4216 OUStringBuffer aHostPort(decode(m_aHost, getEscapePrefix(),
4217 eMechanism, eCharset));
4218 if (m_aPort.isPresent())
4220 aHostPort.append(sal_Unicode(':'));
4221 aHostPort.append(decode(m_aPort, getEscapePrefix(),
4222 eMechanism, eCharset));
4224 return aHostPort.makeStringAndClear();
4227 sal_uInt32 INetURLObject::GetPort() const
4229 if (m_aPort.isPresent())
4231 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4232 sal_Unicode const * pEnd = p + m_aPort.getLength();
4233 sal_uInt32 nThePort;
4234 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4235 return nThePort;
4237 return 0;
4240 bool INetURLObject::SetPort(sal_uInt32 nThePort)
4242 if (getSchemeInfo().m_bPort && m_aHost.isPresent())
4244 OUString aNewPort(OUString::valueOf(sal_Int64(nThePort)));
4245 sal_Int32 nDelta;
4246 if (m_aPort.isPresent())
4247 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
4248 else
4250 m_aAbsURIRef.insert(m_aHost.getEnd(), sal_Unicode(':'));
4251 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
4252 + 1;
4254 m_aPath += nDelta;
4255 m_aQuery += nDelta;
4256 m_aFragment += nDelta;
4257 return true;
4259 return false;
4262 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
4264 if (!checkHierarchical())
4265 return 0;
4267 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4268 sal_Unicode const * pEnd = p + m_aPath.getLength();
4269 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
4270 --pEnd;
4271 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
4272 while (p != pEnd)
4273 if (*p++ == '/')
4274 ++n;
4275 return n;
4278 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4280 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4281 if (!aSegment.isPresent())
4282 return false;
4284 OUStringBuffer aNewPath;
4285 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
4286 aSegment.getBegin() - m_aPath.getBegin());
4287 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
4288 aNewPath.append(sal_Unicode('/'));
4289 else
4290 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
4291 m_aPath.getEnd() - aSegment.getEnd());
4292 if (aNewPath.getLength() == 0 && !aSegment.isEmpty() &&
4293 m_aAbsURIRef[aSegment.getBegin()] == '/')
4295 aNewPath.append(sal_Unicode('/'));
4298 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4299 RTL_TEXTENCODING_UTF8);
4302 OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4303 DecodeMechanism eMechanism,
4304 rtl_TextEncoding eCharset) const
4306 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4307 if (!aSegment.isPresent())
4308 return OUString();
4310 sal_Unicode const * pSegBegin
4311 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4312 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4314 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4315 ++pSegBegin;
4316 sal_Unicode const * p = pSegBegin;
4317 while (p != pSegEnd && *p != ';')
4318 ++p;
4320 return decode(pSegBegin, p, getEscapePrefix(), eMechanism, eCharset);
4323 bool INetURLObject::setName(OUString const & rTheName, sal_Int32 nIndex,
4324 bool bIgnoreFinalSlash,
4325 EncodeMechanism eMechanism,
4326 rtl_TextEncoding eCharset)
4328 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4329 if (!aSegment.isPresent())
4330 return false;
4332 sal_Unicode const * pPathBegin
4333 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4334 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4335 sal_Unicode const * pSegBegin
4336 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4337 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4339 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4340 ++pSegBegin;
4341 sal_Unicode const * p = pSegBegin;
4342 while (p != pSegEnd && *p != ';')
4343 ++p;
4345 OUStringBuffer aNewPath;
4346 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4347 aNewPath.append(encodeText(rTheName, false, PART_PCHAR, getEscapePrefix(),
4348 eMechanism, eCharset, true));
4349 aNewPath.append(p, pPathEnd - p);
4351 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4352 RTL_TEXTENCODING_UTF8);
4355 bool INetURLObject::hasExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4356 const
4358 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4359 if (!aSegment.isPresent())
4360 return false;
4362 sal_Unicode const * pSegBegin
4363 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4364 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4366 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4367 ++pSegBegin;
4368 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4369 if (*p == '.' && p != pSegBegin)
4370 return true;
4371 return false;
4374 OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4375 DecodeMechanism eMechanism,
4376 rtl_TextEncoding eCharset) const
4378 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4379 if (!aSegment.isPresent())
4380 return OUString();
4382 sal_Unicode const * pSegBegin
4383 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4384 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4386 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4387 ++pSegBegin;
4388 sal_Unicode const * pExtension = 0;
4389 sal_Unicode const * p = pSegBegin;
4390 for (; p != pSegEnd && *p != ';'; ++p)
4391 if (*p == '.' && p != pSegBegin)
4392 pExtension = p;
4393 if (!pExtension)
4394 pExtension = p;
4396 return decode(pSegBegin, pExtension, getEscapePrefix(), eMechanism,
4397 eCharset);
4400 bool INetURLObject::setBase(OUString const & rTheBase, sal_Int32 nIndex,
4401 bool bIgnoreFinalSlash,
4402 EncodeMechanism eMechanism,
4403 rtl_TextEncoding eCharset)
4405 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4406 if (!aSegment.isPresent())
4407 return false;
4409 sal_Unicode const * pPathBegin
4410 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4411 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4412 sal_Unicode const * pSegBegin
4413 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4414 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4416 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4417 ++pSegBegin;
4418 sal_Unicode const * pExtension = 0;
4419 sal_Unicode const * p = pSegBegin;
4420 for (; p != pSegEnd && *p != ';'; ++p)
4421 if (*p == '.' && p != pSegBegin)
4422 pExtension = p;
4423 if (!pExtension)
4424 pExtension = p;
4426 OUStringBuffer aNewPath;
4427 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4428 aNewPath.append(encodeText(rTheBase, false, PART_PCHAR, getEscapePrefix(),
4429 eMechanism, eCharset, true));
4430 aNewPath.append(pExtension, pPathEnd - pExtension);
4432 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4433 RTL_TEXTENCODING_UTF8);
4436 OUString INetURLObject::getExtension(sal_Int32 nIndex,
4437 bool bIgnoreFinalSlash,
4438 DecodeMechanism eMechanism,
4439 rtl_TextEncoding eCharset) const
4441 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4442 if (!aSegment.isPresent())
4443 return OUString();
4445 sal_Unicode const * pSegBegin
4446 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4447 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4449 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4450 ++pSegBegin;
4451 sal_Unicode const * pExtension = 0;
4452 sal_Unicode const * p = pSegBegin;
4453 for (; p != pSegEnd && *p != ';'; ++p)
4454 if (*p == '.' && p != pSegBegin)
4455 pExtension = p;
4457 if (!pExtension)
4458 return OUString();
4460 return decode(pExtension + 1, p, getEscapePrefix(), eMechanism, eCharset);
4463 bool INetURLObject::setExtension(OUString const & rTheExtension,
4464 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4465 EncodeMechanism eMechanism,
4466 rtl_TextEncoding eCharset)
4468 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4469 if (!aSegment.isPresent())
4470 return false;
4472 sal_Unicode const * pPathBegin
4473 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4474 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4475 sal_Unicode const * pSegBegin
4476 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4477 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4479 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4480 ++pSegBegin;
4481 sal_Unicode const * pExtension = 0;
4482 sal_Unicode const * p = pSegBegin;
4483 for (; p != pSegEnd && *p != ';'; ++p)
4484 if (*p == '.' && p != pSegBegin)
4485 pExtension = p;
4486 if (!pExtension)
4487 pExtension = p;
4489 OUStringBuffer aNewPath;
4490 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4491 aNewPath.append(sal_Unicode('.'));
4492 aNewPath.append(encodeText(rTheExtension, false, PART_PCHAR,
4493 getEscapePrefix(), eMechanism, eCharset, true));
4494 aNewPath.append(p, pPathEnd - p);
4496 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4497 RTL_TEXTENCODING_UTF8);
4500 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4502 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4503 if (!aSegment.isPresent())
4504 return false;
4506 sal_Unicode const * pPathBegin
4507 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4508 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4509 sal_Unicode const * pSegBegin
4510 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4511 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4513 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4514 ++pSegBegin;
4515 sal_Unicode const * pExtension = 0;
4516 sal_Unicode const * p = pSegBegin;
4517 for (; p != pSegEnd && *p != ';'; ++p)
4518 if (*p == '.' && p != pSegBegin)
4519 pExtension = p;
4520 if (!pExtension)
4521 return true;
4523 OUStringBuffer aNewPath;
4524 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4525 aNewPath.append(p, pPathEnd - p);
4527 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4528 RTL_TEXTENCODING_UTF8);
4531 bool INetURLObject::hasFinalSlash() const
4533 if (!checkHierarchical())
4534 return false;
4536 sal_Unicode const * pPathBegin
4537 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4538 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4539 return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4542 bool INetURLObject::setFinalSlash()
4544 if (!checkHierarchical())
4545 return false;
4547 sal_Unicode const * pPathBegin
4548 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4549 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4550 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4551 return true;
4553 OUStringBuffer aNewPath;
4554 aNewPath.append(pPathBegin, pPathEnd - pPathBegin);
4555 aNewPath.append(sal_Unicode('/'));
4557 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4558 RTL_TEXTENCODING_UTF8);
4561 bool INetURLObject::removeFinalSlash()
4563 if (!checkHierarchical())
4564 return false;
4566 sal_Unicode const * pPathBegin
4567 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4568 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4569 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4570 return true;
4572 --pPathEnd;
4573 if (pPathEnd == pPathBegin && *pPathBegin == '/')
4574 return false;
4575 OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4577 return setPath(aNewPath, false, NOT_CANONIC, RTL_TEXTENCODING_UTF8);
4580 bool INetURLObject::setFSysPath(OUString const & rFSysPath,
4581 FSysStyle eStyle)
4583 sal_Unicode const * pFSysBegin = rFSysPath.getStr();
4584 sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength();
4586 switch ((eStyle & FSYS_VOS ? 1 : 0)
4587 + (eStyle & FSYS_UNX ? 1 : 0)
4588 + (eStyle & FSYS_DOS ? 1 : 0)
4589 + (eStyle & FSYS_MAC ? 1 : 0))
4591 case 0:
4592 return false;
4594 case 1:
4595 break;
4597 default:
4598 if (eStyle & FSYS_VOS
4599 && pFSysEnd - pFSysBegin >= 2
4600 && pFSysBegin[0] == '/'
4601 && pFSysBegin[1] == '/')
4603 if (pFSysEnd - pFSysBegin >= 3
4604 && pFSysBegin[2] == '.'
4605 && (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/'))
4607 eStyle = FSYS_VOS; // Production T1
4608 break;
4611 sal_Unicode const * p = pFSysBegin + 2;
4612 OUString aHost;
4613 if (parseHost(p, pFSysEnd, aHost)
4614 && (p == pFSysEnd || *p == '/'))
4616 eStyle = FSYS_VOS; // Production T2
4617 break;
4621 if (eStyle & FSYS_DOS
4622 && pFSysEnd - pFSysBegin >= 2
4623 && pFSysBegin[0] == '\\'
4624 && pFSysBegin[1] == '\\')
4626 sal_Unicode const * p = pFSysBegin + 2;
4627 OUString aHost;
4628 if (parseHost(p, pFSysEnd, aHost)
4629 && (p == pFSysEnd || *p == '\\'))
4631 eStyle = FSYS_DOS; // Production T3
4632 break;
4636 if (eStyle & FSYS_DOS
4637 && pFSysEnd - pFSysBegin >= 2
4638 && rtl::isAsciiAlpha(pFSysBegin[0])
4639 && pFSysBegin[1] == ':'
4640 && (pFSysEnd - pFSysBegin == 2
4641 || pFSysBegin[2] == '/'
4642 || pFSysBegin[2] == '\\'))
4644 eStyle = FSYS_DOS; // Productions T4, T5
4645 break;
4648 if (!(eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC)))
4649 return false;
4651 eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle);
4652 // Production T6
4653 break;
4656 OUStringBuffer aSynAbsURIRef(OUString("file://"));
4658 switch (eStyle)
4660 case FSYS_VOS:
4662 sal_Unicode const * p = pFSysBegin;
4663 if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/')
4664 return false;
4665 if (p != pFSysEnd && *p == '.'
4666 && (pFSysEnd - p == 1 || p[1] == '/'))
4667 ++p;
4668 for (; p != pFSysEnd; ++p)
4669 switch (*p)
4671 case '#':
4672 case '%':
4673 appendEscape(aSynAbsURIRef, '%', *p);
4674 break;
4676 default:
4677 aSynAbsURIRef.append(*p);
4678 break;
4680 break;
4683 case FSYS_UNX:
4685 sal_Unicode const * p = pFSysBegin;
4686 if (p != pFSysEnd && *p != '/')
4687 return false;
4688 for (; p != pFSysEnd; ++p)
4689 switch (*p)
4691 case '|':
4692 case '#':
4693 case '%':
4694 appendEscape(aSynAbsURIRef, '%', *p);
4695 break;
4697 default:
4698 aSynAbsURIRef.append(*p);
4699 break;
4701 break;
4704 case FSYS_DOS:
4706 sal_uInt32 nAltDelimiter = 0x80000000;
4707 sal_Unicode const * p = pFSysBegin;
4708 if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\')
4709 p += 2;
4710 else
4712 aSynAbsURIRef.append(sal_Unicode('/'));
4713 if (pFSysEnd - p >= 2
4714 && rtl::isAsciiAlpha(p[0])
4715 && p[1] == ':'
4716 && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/'))
4717 nAltDelimiter = '/';
4719 for (; p != pFSysEnd; ++p)
4720 if (*p == '\\' || *p == nAltDelimiter)
4721 aSynAbsURIRef.append(sal_Unicode('/'));
4722 else
4723 switch (*p)
4725 case '/':
4726 case '#':
4727 case '%':
4728 appendEscape(aSynAbsURIRef, '%', *p);
4729 break;
4731 default:
4732 aSynAbsURIRef.append(*p);
4733 break;
4735 break;
4738 case FSYS_MAC:
4739 aSynAbsURIRef.append(sal_Unicode('/'));
4740 for (sal_Unicode const * p = pFSysBegin; p != pFSysEnd; ++p)
4742 switch (*p)
4744 case ':':
4745 aSynAbsURIRef.append(sal_Unicode('/'));
4746 break;
4748 case '/':
4749 case '|':
4750 case '#':
4751 case '%':
4752 appendEscape(aSynAbsURIRef, '%', *p);
4753 break;
4755 default:
4756 aSynAbsURIRef.append(*p);
4757 break;
4760 break;
4762 default:
4763 OSL_ASSERT(false);
4764 break;
4767 INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), WAS_ENCODED,
4768 RTL_TEXTENCODING_UTF8);
4769 if (aTemp.HasError())
4770 return false;
4772 *this = aTemp;
4773 return true;
4776 OUString INetURLObject::getFSysPath(FSysStyle eStyle,
4777 sal_Unicode * pDelimiter) const
4779 if (m_eScheme != INET_PROT_FILE)
4780 return OUString();
4782 if ((eStyle & FSYS_VOS ? 1 : 0)
4783 + (eStyle & FSYS_UNX ? 1 : 0)
4784 + (eStyle & FSYS_DOS ? 1 : 0)
4785 + (eStyle & FSYS_MAC ? 1 : 0)
4786 > 1)
4788 eStyle = eStyle & FSYS_VOS
4789 && m_aHost.isPresent()
4790 && m_aHost.getLength() > 0 ?
4791 FSYS_VOS :
4792 hasDosVolume(eStyle)
4793 || ((eStyle & FSYS_DOS) != 0
4794 && m_aHost.isPresent()
4795 && m_aHost.getLength() > 0) ?
4796 FSYS_DOS :
4797 eStyle & FSYS_UNX
4798 && (!m_aHost.isPresent() || m_aHost.getLength() == 0) ?
4799 FSYS_UNX :
4800 FSysStyle(0);
4803 switch (eStyle)
4805 case FSYS_VOS:
4807 if (pDelimiter)
4808 *pDelimiter = '/';
4810 OUStringBuffer aSynFSysPath;
4811 aSynFSysPath.append("//");
4812 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4813 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
4814 RTL_TEXTENCODING_UTF8));
4815 else
4816 aSynFSysPath.append(sal_Unicode('.'));
4817 aSynFSysPath.append(decode(m_aPath, '%', DECODE_WITH_CHARSET,
4818 RTL_TEXTENCODING_UTF8));
4819 return aSynFSysPath.makeStringAndClear();
4822 case FSYS_UNX:
4824 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4825 return OUString();
4827 if (pDelimiter)
4828 *pDelimiter = '/';
4830 return decode(m_aPath, '%', DECODE_WITH_CHARSET,
4831 RTL_TEXTENCODING_UTF8);
4834 case FSYS_DOS:
4836 if (pDelimiter)
4837 *pDelimiter = '\\';
4839 OUStringBuffer aSynFSysPath;
4840 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4842 aSynFSysPath.append("\\\\");
4843 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
4844 RTL_TEXTENCODING_UTF8));
4845 aSynFSysPath.append(sal_Unicode('\\'));
4847 sal_Unicode const * p
4848 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4849 sal_Unicode const * pEnd = p + m_aPath.getLength();
4850 DBG_ASSERT(p < pEnd && *p == '/',
4851 "INetURLObject::getFSysPath(): Bad path");
4852 ++p;
4853 while (p < pEnd)
4855 EscapeType eEscapeType;
4856 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
4857 RTL_TEXTENCODING_UTF8,
4858 eEscapeType);
4859 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
4860 aSynFSysPath.append(sal_Unicode('\\'));
4861 else
4862 aSynFSysPath.appendUtf32(nUTF32);
4864 return aSynFSysPath.makeStringAndClear();
4867 case FSYS_MAC:
4869 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4870 return OUString();
4872 if (pDelimiter)
4873 *pDelimiter = ':';
4875 OUStringBuffer aSynFSysPath;
4876 sal_Unicode const * p
4877 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4878 sal_Unicode const * pEnd = p + m_aPath.getLength();
4879 DBG_ASSERT(p < pEnd && *p == '/',
4880 "INetURLObject::getFSysPath(): Bad path");
4881 ++p;
4882 while (p < pEnd)
4884 EscapeType eEscapeType;
4885 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
4886 RTL_TEXTENCODING_UTF8,
4887 eEscapeType);
4888 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
4889 aSynFSysPath.append(sal_Unicode(':'));
4890 else
4891 aSynFSysPath.appendUtf32(nUTF32);
4893 return aSynFSysPath.makeStringAndClear();
4896 default:
4897 return OUString();
4901 OUString INetURLObject::GetMsgId(DecodeMechanism eMechanism,
4902 rtl_TextEncoding eCharset) const
4904 if (m_eScheme != INET_PROT_POP3)
4905 return OUString();
4906 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4907 sal_Unicode const * pEnd = p + m_aPath.getLength();
4908 for (; p < pEnd; ++p)
4909 if (*p == '<')
4910 return decode(p, pEnd, getEscapePrefix(), eMechanism, eCharset);
4911 return OUString();
4914 // static
4915 void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText,
4916 sal_Char cEscapePrefix, sal_uInt32 nUCS4)
4918 DBG_ASSERT(nUCS4 < 0x80000000,
4919 "INetURLObject::appendUCS4Escape(): Bad char");
4920 if (nUCS4 < 0x80)
4921 appendEscape(rTheText, cEscapePrefix, nUCS4);
4922 else if (nUCS4 < 0x800)
4924 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 | 0xC0);
4925 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
4927 else if (nUCS4 < 0x10000)
4929 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 | 0xE0);
4930 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
4931 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
4933 else if (nUCS4 < 0x200000)
4935 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 | 0xF0);
4936 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
4937 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
4938 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
4940 else if (nUCS4 < 0x4000000)
4942 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 | 0xF8);
4943 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
4944 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
4945 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
4946 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
4948 else
4950 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 30 | 0xFC);
4951 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 24 & 0x3F) | 0x80);
4952 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
4953 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
4954 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
4955 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
4959 // static
4960 void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4,
4961 EscapeType eEscapeType, bool bOctets,
4962 Part ePart, sal_Char cEscapePrefix,
4963 rtl_TextEncoding eCharset,
4964 bool bKeepVisibleEscapes)
4966 bool bEscape;
4967 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
4968 switch (eEscapeType)
4970 case ESCAPE_NO:
4971 if (mustEncode(nUCS4, ePart))
4973 bEscape = true;
4974 eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 :
4975 RTL_TEXTENCODING_UTF8;
4977 else
4978 bEscape = false;
4979 break;
4981 case ESCAPE_OCTET:
4982 bEscape = true;
4983 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
4984 break;
4986 case ESCAPE_UTF32:
4987 if (mustEncode(nUCS4, ePart))
4989 bEscape = true;
4990 eTargetCharset = eCharset;
4992 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
4994 bEscape = true;
4995 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
4997 else
4998 bEscape = false;
4999 break;
5000 default:
5001 bEscape = false;
5004 if (bEscape)
5006 switch (eTargetCharset)
5008 default:
5009 OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
5010 case RTL_TEXTENCODING_ASCII_US:
5011 case RTL_TEXTENCODING_ISO_8859_1:
5012 appendEscape(rTheText, cEscapePrefix, nUCS4);
5013 break;
5015 case RTL_TEXTENCODING_UTF8:
5016 appendUCS4Escape(rTheText, cEscapePrefix, nUCS4);
5017 break;
5020 else
5021 rTheText.append(sal_Unicode(nUCS4));
5024 // static
5025 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
5026 sal_Unicode const * pEnd, bool bOctets,
5027 sal_Char cEscapePrefix,
5028 EncodeMechanism eMechanism,
5029 rtl_TextEncoding eCharset,
5030 EscapeType & rEscapeType)
5032 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
5033 sal_uInt32 nUTF32 = bOctets ? *rBegin++ :
5034 INetMIME::getUTF32Character(rBegin, pEnd);
5035 switch (eMechanism)
5037 case ENCODE_ALL:
5038 rEscapeType = ESCAPE_NO;
5039 break;
5041 case WAS_ENCODED:
5043 int nWeight1;
5044 int nWeight2;
5045 if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5046 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
5047 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
5049 rBegin += 2;
5050 nUTF32 = nWeight1 << 4 | nWeight2;
5051 switch (eCharset)
5053 default:
5054 OSL_FAIL(
5055 "INetURLObject::getUTF32(): Unsupported charset");
5056 case RTL_TEXTENCODING_ASCII_US:
5057 rEscapeType = INetMIME::isUSASCII(nUTF32) ?
5058 ESCAPE_UTF32 : ESCAPE_OCTET;
5059 break;
5061 case RTL_TEXTENCODING_ISO_8859_1:
5062 rEscapeType = ESCAPE_UTF32;
5063 break;
5065 case RTL_TEXTENCODING_UTF8:
5066 if (INetMIME::isUSASCII(nUTF32))
5067 rEscapeType = ESCAPE_UTF32;
5068 else
5070 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
5072 sal_uInt32 nEncoded;
5073 int nShift;
5074 sal_uInt32 nMin;
5075 if (nUTF32 <= 0xDF)
5077 nEncoded = (nUTF32 & 0x1F) << 6;
5078 nShift = 0;
5079 nMin = 0x80;
5081 else if (nUTF32 <= 0xEF)
5083 nEncoded = (nUTF32 & 0x0F) << 12;
5084 nShift = 6;
5085 nMin = 0x800;
5087 else
5089 nEncoded = (nUTF32 & 0x07) << 18;
5090 nShift = 12;
5091 nMin = 0x10000;
5093 sal_Unicode const * p = rBegin;
5094 bool bUTF8 = true;
5095 for (;;)
5097 if (pEnd - p < 3
5098 || p[0] != cEscapePrefix
5099 || (nWeight1
5100 = INetMIME::getHexWeight(p[1]))
5102 || nWeight1 > 11
5103 || (nWeight2
5104 = INetMIME::getHexWeight(p[2]))
5105 < 0)
5107 bUTF8 = false;
5108 break;
5110 p += 3;
5111 nEncoded
5112 |= ((nWeight1 & 3) << 4 | nWeight2)
5113 << nShift;
5114 if (nShift == 0)
5115 break;
5116 nShift -= 6;
5118 if (bUTF8 && nEncoded >= nMin
5119 && !INetMIME::isHighSurrogate(nEncoded)
5120 && !INetMIME::isLowSurrogate(nEncoded)
5121 && nEncoded <= 0x10FFFF)
5123 rBegin = p;
5124 nUTF32 = nEncoded;
5125 rEscapeType = ESCAPE_UTF32;
5126 break;
5129 rEscapeType = ESCAPE_OCTET;
5131 break;
5134 else
5135 rEscapeType = ESCAPE_NO;
5136 break;
5139 case NOT_CANONIC:
5141 int nWeight1;
5142 int nWeight2;
5143 if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5144 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
5145 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
5147 rBegin += 2;
5148 nUTF32 = nWeight1 << 4 | nWeight2;
5149 rEscapeType = ESCAPE_OCTET;
5151 else
5152 rEscapeType = ESCAPE_NO;
5153 break;
5156 return nUTF32;
5159 // static
5160 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
5161 sal_Unicode const * pEnd,
5162 bool bEager)
5164 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
5165 State eState = STATE_DOT;
5166 sal_Int32 nLabels = 0;
5167 sal_Unicode const * pLastAlphanumeric = 0;
5168 for (sal_Unicode const * p = rBegin;; ++p)
5169 switch (eState)
5171 case STATE_DOT:
5172 if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_'))
5174 ++nLabels;
5175 eState = STATE_LABEL;
5176 break;
5178 if (bEager || nLabels == 0)
5179 return 0;
5180 rBegin = p - 1;
5181 return nLabels;
5183 case STATE_LABEL:
5184 if (p != pEnd)
5186 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
5187 break;
5188 else if (*p == '.')
5190 eState = STATE_DOT;
5191 break;
5193 else if (*p == '-')
5195 pLastAlphanumeric = p;
5196 eState = STATE_HYPHEN;
5197 break;
5200 rBegin = p;
5201 return nLabels;
5203 case STATE_HYPHEN:
5204 if (p != pEnd)
5206 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
5208 eState = STATE_LABEL;
5209 break;
5211 else if (*p == '-')
5212 break;
5214 if (bEager)
5215 return 0;
5216 rBegin = pLastAlphanumeric;
5217 return nLabels;
5221 // static
5222 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
5223 sal_Unicode const * pEnd)
5225 if (rBegin != pEnd && *rBegin == '[') {
5226 sal_Unicode const * p = rBegin + 1;
5227 //TODO: check for valid IPv6address (RFC 2373):
5228 while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.'))
5230 ++p;
5232 if (p != pEnd && *p == ']') {
5233 rBegin = p + 1;
5234 return true;
5237 return false;
5240 OUString INetURLObject::GetPartBeforeLastName(DecodeMechanism eMechanism,
5241 rtl_TextEncoding eCharset)
5242 const
5244 if (!checkHierarchical())
5245 return OUString();
5246 INetURLObject aTemp(*this);
5247 aTemp.clearFragment();
5248 aTemp.clearQuery();
5249 aTemp.removeSegment(LAST_SEGMENT, false);
5250 aTemp.setFinalSlash();
5251 return aTemp.GetMainURL(eMechanism, eCharset);
5254 OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
5255 rtl_TextEncoding eCharset) const
5257 return getName(LAST_SEGMENT, true, eMechanism, eCharset);
5260 OUString INetURLObject::GetFileExtension(DecodeMechanism eMechanism,
5261 rtl_TextEncoding eCharset) const
5263 return getExtension(LAST_SEGMENT, false, eMechanism, eCharset);
5266 bool INetURLObject::CutLastName()
5268 INetURLObject aTemp(*this);
5269 aTemp.clearFragment();
5270 aTemp.clearQuery();
5271 if (!aTemp.removeSegment(LAST_SEGMENT, false))
5272 return false;
5273 *this = aTemp;
5274 return true;
5277 OUString INetURLObject::PathToFileName() const
5279 if (m_eScheme != INET_PROT_FILE)
5280 return OUString();
5281 OUString aSystemPath;
5282 if (osl::FileBase::getSystemPathFromFileURL(
5283 decode(m_aAbsURIRef.getStr(),
5284 m_aAbsURIRef.getStr() + m_aPath.getEnd(),
5285 getEscapePrefix(), NO_DECODE, RTL_TEXTENCODING_UTF8),
5286 aSystemPath)
5287 != osl::FileBase::E_None)
5288 return OUString();
5289 return aSystemPath;
5292 OUString INetURLObject::GetFull() const
5294 INetURLObject aTemp(*this);
5295 aTemp.removeFinalSlash();
5296 return aTemp.PathToFileName();
5299 OUString INetURLObject::GetPath() const
5301 INetURLObject aTemp(*this);
5302 aTemp.removeSegment(LAST_SEGMENT, true);
5303 aTemp.removeFinalSlash();
5304 return aTemp.PathToFileName();
5307 void INetURLObject::SetBase(OUString const & rTheBase)
5309 setBase(rTheBase, LAST_SEGMENT, true, ENCODE_ALL);
5312 OUString INetURLObject::GetBase() const
5314 return getBase(LAST_SEGMENT, true, DECODE_WITH_CHARSET);
5317 void INetURLObject::SetName(OUString const & rTheName,
5318 EncodeMechanism eMechanism,
5319 rtl_TextEncoding eCharset)
5321 INetURLObject aTemp(*this);
5322 if (aTemp.removeSegment(LAST_SEGMENT, true)
5323 && aTemp.insertName(rTheName, false, LAST_SEGMENT, true, eMechanism,
5324 eCharset))
5325 *this = aTemp;
5328 void INetURLObject::SetExtension(OUString const & rTheExtension,
5329 EncodeMechanism eMechanism,
5330 rtl_TextEncoding eCharset)
5332 setExtension(rTheExtension, LAST_SEGMENT, false, eMechanism, eCharset);
5335 OUString INetURLObject::CutExtension(DecodeMechanism eMechanism,
5336 rtl_TextEncoding eCharset)
5338 OUString aTheExtension(getExtension(LAST_SEGMENT, false, eMechanism,
5339 eCharset));
5340 return removeExtension(LAST_SEGMENT, false)
5341 ? aTheExtension : OUString();
5344 bool INetURLObject::IsCaseSensitive() const
5346 return true;
5349 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */