merge the formfield patch from ooo-build
[ooovba.git] / tools / source / fsys / urlobj.cxx
blob5c891b23ccd6b887ed745d44f319b9748a230a8c
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: urlobj.cxx,v $
10 * $Revision: 1.63.36.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_tools.hxx"
33 #include <tools/urlobj.hxx>
34 #include <tools/debug.hxx>
35 #include <tools/inetmime.hxx>
36 #include "com/sun/star/uno/Reference.hxx"
37 #include "com/sun/star/util/XStringWidth.hpp"
38 #include "osl/diagnose.h"
39 #include "osl/file.hxx"
40 #include "rtl/string.h"
41 #include "rtl/textenc.h"
42 #include "rtl/ustring.hxx"
43 #include "sal/types.h"
45 #ifndef INCLUDED_ALGORITHM
46 #include <algorithm>
47 #define INCLUDED_ALGORITHM
48 #endif
49 #ifndef INCLUDED_LIMITS
50 #include <limits>
51 #define INCLUDED_LIMITS
52 #endif
54 #include <string.h>
56 namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj;
57 // unnamed namespaces don't work well yet...
59 using namespace com::sun;
61 //============================================================================
63 // INetURLObject
65 //============================================================================
67 /* The URI grammar (using RFC 2234 conventions).
69 Constructs of the form
70 {reference <rule1> using rule2}
71 stand for a rule matching the given rule1 specified in the given reference,
72 encoded to URI syntax using rule2 (as specified in this URI grammar).
75 ; RFC 1738, RFC 2396, RFC 2732, private
76 login = [user [":" password] "@"] hostport
77 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
78 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
79 hostport = host [":" port]
80 host = incomplete-hostname / hostname / IPv4address / IPv6reference
81 incomplete-hostname = *(domainlabel ".") domainlabel
82 hostname = *(domainlabel ".") toplabel ["."]
83 domainlabel = alphanum [*(alphanum / "-") alphanum]
84 toplabel = ALPHA [*(alphanum / "-") alphanum]
85 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
86 IPv6reference = "[" hexpart [":" IPv4address] "]"
87 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
88 hexseq = hex4 *(":" hex4)
89 hex4 = 1*4HEXDIG
90 port = *DIGIT
91 escaped = "%" HEXDIG HEXDIG
92 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
93 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
94 alphanum = ALPHA / DIGIT
95 unreserved = alphanum / mark
96 uric = escaped / reserved / unreserved
97 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
100 ; RFC 1738, RFC 2396
101 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
102 segment = *pchar
105 ; RFC 1738, RFC 2396
106 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
107 segment = *(pchar / ";")
110 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
111 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
112 segment = *pchar
113 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
116 ; RFC 2368, RFC 2396
117 mailto-url = "MAILTO:" [to] [headers]
118 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
119 headers = "?" header *("&" header)
120 header = hname "=" hvalue
121 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
122 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
125 ; private (see RFC 1738, RFC 2396)
126 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
127 segment = *(pchar / ";")
130 ; RFC 1738, RFC 2396, RFC 2732
131 news-url = "NEWS:" grouppart
132 grouppart = "*" / group / article
133 group = alpha *(alphanum / "+" / "-" / "." / "_")
134 article = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "?" / "_" / "~") "@" host
137 ; private
138 private-url = "PRIVATE:" path ["?" *uric]
139 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
142 ; private
143 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
144 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
145 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
148 ; private
149 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
150 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
153 ; private
154 slot-url = "SLOT:" path ["?" *uric]
155 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
158 ; private
159 macro-url = "MACRO:" path ["?" *uric]
160 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
163 ; private
164 javascript-url = "JAVASCRIPT:" *uric
167 ; private (see RFC 2192)
168 imap-url = "IMAP://" user [";AUTH=" auth] "@" hostport "/" segment *("/" segment) ["/;UID=" nz_number]
169 user = 1*{RFC 2060 <CHAR8> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "=" / "_" / "~")}
170 auth = {RFC 2060 <atom> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "+" / "," / "-" / "." / "=" / "_" / "~")}
171 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / "=" / "@" / "_" / "~")
172 nz_number = {RFC 2060 <nz_number> using *DIGIT}
175 ; private
176 pop3-url = "POP3://" login ["/" ["<" *uric ">"]]
179 ; RFC 2397
180 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
181 mediatype = [type "/" subtype] *(";" attribute "=" value)
182 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
183 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
184 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
185 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
188 ; RFC 2392, RFC 2396
189 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
192 ; private
193 out-url = "OUT:///~" name ["/" *uric]
194 name = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "?" / "@" / "_" / "~"
197 ; prvate (see RFC 1738, RFC 2396)
198 vnd-sun-star-wfs-url = "VND.SUN.STAR.WFS://" [host / "LOCALHOST"] ["/" segment *("/" segment)]
199 segment = *pchar
202 ; private
203 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
204 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
206 ; private
207 vim-url = "VIM://" +vimc [":" *vimc] ["/" [("INBOX" message) / ("NEWSGROUPS" ["/" [+vimc message]])]]
208 message = ["/" [+vimc [":" +DIGIT "." +DIGIT "." +DIGIT]]]
209 vimc = ("=" HEXDIG HEXDIG) / alphanum
212 ; private
213 uno-url = ".UNO:" path ["?" *uric]
214 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
217 ; private
218 component-url = ".COMPONENT:" path ["?" *uric]
219 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
222 ; private
223 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
224 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
227 ; RFC 2255
228 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
229 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
230 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
231 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
232 extension = ["!"] ["X-"] extoken ["=" exvalue]
233 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
234 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
237 ; private
238 db-url = "DB:" *uric
241 ; private
242 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
243 opaque_part = uric_no_slash *uric
244 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
247 ; private
248 vnd-sun-star-url = "VND.SUN.STAR.ODMA:" ["/" *uric_no_slash]
249 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
252 ; RFC 1738
253 telnet-url = "TELNET://" login ["/"]
256 ; private
257 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
258 opaque_part = uric_no_slash *uric
259 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
262 ; private
263 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
264 segment = *pchar
267 ; private
268 unknown-url = scheme ":" 1*uric
269 scheme = ALPHA *(alphanum / "+" / "-" / ".")
272 ; private (http://ubiqx.org/cifs/Appendix-D.html):
273 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
274 segment = *(pchar / ";")
277 //============================================================================
278 inline sal_Int32 INetURLObject::SubString::clear()
280 sal_Int32 nDelta = -m_nLength;
281 m_nBegin = -1;
282 m_nLength = 0;
283 return nDelta;
286 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString,
287 rtl::OUString const & rSubString)
289 rtl::OUString sTemp(rString.makeStringAndClear());
290 sal_Int32 nDelta = set(sTemp, rSubString);
291 rString.append(sTemp);
292 return nDelta;
295 inline sal_Int32 INetURLObject::SubString::set(rtl::OUString & rString,
296 rtl::OUString const & rSubString)
298 sal_Int32 nDelta = rSubString.getLength() - m_nLength;
300 rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
302 m_nLength = rSubString.getLength();
303 return nDelta;
306 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString,
307 rtl::OUString const & rSubString,
308 sal_Int32 nTheBegin)
310 m_nBegin = nTheBegin;
311 return set(rString, rSubString);
314 //============================================================================
315 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
317 if (isPresent())
318 m_nBegin = m_nBegin + nDelta;
321 //============================================================================
322 int INetURLObject::SubString::compare(SubString const & rOther,
323 rtl::OUStringBuffer const & rThisString,
324 rtl::OUStringBuffer const & rOtherString) const
326 sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
327 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
328 sal_Unicode const * end = p1 + len;
329 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
330 while (p1 != end) {
331 if (*p1 < *p2) {
332 return -1;
333 } else if (*p1 > *p2) {
334 return 1;
336 ++p1;
337 ++p2;
339 return m_nLength < rOther.m_nLength ? -1
340 : m_nLength > rOther.m_nLength ? 1
341 : 0;
344 //============================================================================
345 struct INetURLObject::SchemeInfo
347 sal_Char const * m_pScheme;
348 sal_Char const * m_pPrefix;
349 sal_uInt16 m_nDefaultPort;
350 bool m_bAuthority;
351 bool m_bUser;
352 bool m_bAuth;
353 bool m_bPassword;
354 bool m_bHost;
355 bool m_bPort;
356 bool m_bHierarchical;
357 bool m_bQuery;
360 //============================================================================
361 struct INetURLObject::PrefixInfo
363 enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important!
365 sal_Char const * m_pPrefix;
366 sal_Char const * m_pTranslatedPrefix;
367 INetProtocol m_eScheme;
368 Kind m_eKind;
371 //============================================================================
372 static INetURLObject::SchemeInfo const aSchemeInfoMap[INET_PROT_END]
373 = { { "", "", 0, false, false, false, false, false, false, false,
374 false },
375 { "ftp", "ftp://", 21, true, true, false, true, true, true, true,
376 false },
377 { "http", "http://", 80, true, true, false, true, true, true,
378 true, true },
379 { "file", "file://", 0, true, false, false, false, true, false,
380 true, false },
381 { "mailto", "mailto:", 0, false, false, false, false, false,
382 false, false, true },
383 { "vnd.sun.star.webdav", "vnd.sun.star.webdav://", 80, true, true,
384 false, true, true, true, true, true },
385 { "news", "news:", 0, false, false, false, false, false, false, false,
386 false },
387 { "private", "private:", 0, false, false, false, false, false,
388 false, false, true },
389 { "vnd.sun.star.help", "vnd.sun.star.help://", 0, true, false, false,
390 false, false, false, true, true },
391 { "https", "https://", 443, true, true, false, true, true, true,
392 true, true },
393 { "slot", "slot:", 0, false, false, false, false, false, false,
394 false, true },
395 { "macro", "macro:", 0, false, false, false, false, false, false,
396 false, true },
397 { "javascript", "javascript:", 0, false, false, false, false,
398 false, false, false, false },
399 { "imap", "imap://", 143, true, true, true, false, true, true,
400 true, false },
401 { "pop3", "pop3://", 110, true, true, false, true, true, true,
402 false, false },
403 { "data", "data:", 0, false, false, false, false, false, false,
404 false, false },
405 { "cid", "cid:", 0, false, false, false, false, false, false,
406 false, false },
407 { "out", "out://", 0, true, false, false, false, false, false,
408 false, false },
409 { "vnd.sun.star.wfs", "vnd.sun.star.wfs://", 0, true, false, false,
410 false, true, true, true, false },
411 { "vnd.sun.star.hier", "vnd.sun.star.hier:", 0, true, false, false,
412 false, false, false, true, false },
413 { "vim", "vim://", 0, true, true, false, true, false, false, true,
414 false },
415 { ".uno", ".uno:", 0, false, false, false, false, false, false,
416 false, true },
417 { ".component", ".component:", 0, false, false, false, false,
418 false, false, false, true },
419 { "vnd.sun.star.pkg", "vnd.sun.star.pkg://", 0, true, false, false,
420 false, false, false, true, true },
421 { "ldap", "ldap://", 389, true, false, false, false, true, true,
422 false, true },
423 { "db", "db:", 0, false, false, false, false, false, false, false,
424 false },
425 { "vnd.sun.star.cmd", "vnd.sun.star.cmd:", 0, false, false, false,
426 false, false, false, false, false },
427 { "vnd.sun.star.odma", "vnd.sun.star.odma:", 0, false, false, false,
428 false, false, false, true, false },
429 { "telnet", "telnet://", 23, true, true, false, true, true, true, true,
430 false },
431 { "vnd.sun.star.expand", "vnd.sun.star.expand:", 0, false, false, false,
432 false, false, false, false, false },
433 { "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", 0, false, false, false,
434 false, false, false, true, false },
435 { "smb", "smb://", 139, true, true, false, true, true, true, true,
436 true },
437 { "dav", "dav://", 80, true, true, false, true, true, true, true,
438 true },
439 { "davs", "davs://", 443, true, true, false, true, true, true,
440 true, true },
441 { "webdav", "webdav://", 80, true, true, false, true, true, true, true,
442 true },
443 { "webdavs", "webdavs://", 443, true, true, false, true, true, true,
444 true, true },
445 { "", "", 0, false, false, false, false, false, false, false, false },
446 { "", "", 0, false, false, false, false, false, false, true, false }
449 // static
450 inline INetURLObject::SchemeInfo const &
451 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
453 return aSchemeInfoMap[eTheScheme];
456 //============================================================================
457 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
459 return getSchemeInfo(m_eScheme);
462 //============================================================================
463 // static
464 inline void INetURLObject::appendEscape(rtl::OUStringBuffer & rTheText,
465 sal_Char cEscapePrefix,
466 sal_uInt32 nOctet)
468 rTheText.append(sal_Unicode(cEscapePrefix));
469 rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet >> 4))));
470 rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet & 15))));
473 //============================================================================
474 namespace unnamed_tools_urlobj {
476 enum
478 PA = INetURLObject::PART_OBSOLETE_NORMAL,
479 PB = INetURLObject::PART_OBSOLETE_FILE,
480 PC = INetURLObject::PART_OBSOLETE_PARAM,
481 PD = INetURLObject::PART_USER_PASSWORD,
482 PE = INetURLObject::PART_IMAP_ACHAR,
483 PF = INetURLObject::PART_VIM,
484 PG = INetURLObject::PART_HOST_EXTRA,
485 PH = INetURLObject::PART_FPATH,
486 PI = INetURLObject::PART_AUTHORITY,
487 PJ = INetURLObject::PART_PATH_SEGMENTS_EXTRA,
488 PK = INetURLObject::PART_REL_SEGMENT_EXTRA,
489 PL = INetURLObject::PART_URIC,
490 PM = INetURLObject::PART_HTTP_PATH,
491 PN = INetURLObject::PART_FILE_SEGMENT_EXTRA,
492 PO = INetURLObject::PART_MESSAGE_ID,
493 PP = INetURLObject::PART_MESSAGE_ID_PATH,
494 PQ = INetURLObject::PART_MAILTO,
495 PR = INetURLObject::PART_PATH_BEFORE_QUERY,
496 PS = INetURLObject::PART_PCHAR,
497 PT = INetURLObject::PART_FRAGMENT,
498 PU = INetURLObject::PART_VISIBLE,
499 PV = INetURLObject::PART_VISIBLE_NONSPECIAL,
500 PW = INetURLObject::PART_CREATEFRAGMENT,
501 PX = INetURLObject::PART_UNO_PARAM_VALUE,
502 PY = INetURLObject::PART_UNAMBIGUOUS,
503 PZ = INetURLObject::PART_URIC_NO_SLASH,
504 P1 = INetURLObject::PART_HTTP_QUERY,
505 P2 = INetURLObject::PART_NEWS_ARTICLE_LOCALPART
508 static sal_uInt32 const aMustEncodeMap[128]
509 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 /* */ PY,
512 /* ! */ PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
513 /* " */ PU+PV +PY,
514 /* # */ PU,
515 /* $ */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
516 /* % */ PU,
517 /* & */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN+PO+PP +PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2,
518 /* ' */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
519 /* ( */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
520 /* ) */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
521 /* * */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
522 /* + */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2,
523 /* , */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW +PZ+P1+P2,
524 /* - */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
525 /* . */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
526 /* / */ PA+PB+PC +PH +PJ +PL+PM +PP+PQ+PR +PT+PU+PV +PX +P2,
527 /* 0 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
528 /* 1 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
529 /* 2 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
530 /* 3 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
531 /* 4 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
532 /* 5 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
533 /* 6 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
534 /* 7 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
535 /* 8 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
536 /* 9 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
537 /* : */ PB+PC +PH+PI+PJ +PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2,
538 /* ; */ PC+PD +PI+PJ+PK+PL+PM +PO+PP+PQ+PR +PT+PU +PW +PZ+P1+P2,
539 /* < */ PC +PO+PP +PU+PV +PY,
540 /* = */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN +PR+PS+PT+PU+PV+PW +PZ+P1+P2,
541 /* > */ PC +PO+PP +PU+PV +PY,
542 /* ? */ PC +PL +PT+PU +PW+PX +PZ +P2,
543 /* @ */ PC +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1,
544 /* A */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
545 /* B */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
546 /* C */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
547 /* D */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
548 /* E */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
549 /* F */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
550 /* G */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
551 /* H */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
552 /* I */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
553 /* J */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
554 /* K */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
555 /* L */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
556 /* M */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
557 /* N */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
558 /* O */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
559 /* P */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
560 /* Q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
561 /* R */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
562 /* S */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
563 /* T */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
564 /* U */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
565 /* V */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
566 /* W */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
567 /* X */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
568 /* Y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
569 /* Z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
570 /* [ */ PL +PU+PV +PX,
571 /* \ */ PB +PU+PV +PY,
572 /* ] */ PL +PU+PV +PX,
573 /* ^ */ PU+PV +PY,
574 /* _ */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
575 /* ` */ PU+PV +PY,
576 /* a */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
577 /* b */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
578 /* c */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
579 /* d */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
580 /* e */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
581 /* f */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
582 /* g */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
583 /* h */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
584 /* i */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
585 /* j */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
586 /* k */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
587 /* l */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
588 /* m */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
589 /* n */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
590 /* o */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
591 /* p */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
592 /* q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
593 /* r */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
594 /* s */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
595 /* t */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
596 /* u */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
597 /* v */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
598 /* w */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
599 /* x */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
600 /* y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
601 /* z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
602 /* { */ PU+PV +PY,
603 /* | */ PB+PC +PN +PT+PU+PV +PY,
604 /* } */ PU+PV +PY,
605 /* ~ */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ +P2,
606 0 };
608 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
610 return !INetMIME::isUSASCII(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
615 //============================================================================
616 void INetURLObject::setInvalid()
618 m_aAbsURIRef.setLength(0);
619 m_eScheme = INET_PROT_NOT_VALID;
620 m_aScheme.clear();
621 m_aUser.clear();
622 m_aAuth.clear();
623 m_aHost.clear();
624 m_aPort.clear();
625 m_aPath.clear();
626 m_aQuery.clear();
627 m_aFragment.clear();
630 //============================================================================
632 namespace unnamed_tools_urlobj {
634 INetURLObject::FSysStyle
635 guessFSysStyleByCounting(sal_Unicode const * pBegin,
636 sal_Unicode const * pEnd,
637 INetURLObject::FSysStyle eStyle)
639 DBG_ASSERT(eStyle
640 & (INetURLObject::FSYS_UNX
641 | INetURLObject::FSYS_DOS
642 | INetURLObject::FSYS_MAC),
643 "guessFSysStyleByCounting(): Bad style");
644 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
645 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
646 "guessFSysStyleByCounting(): Too big");
647 sal_Int32 nSlashCount
648 = eStyle & INetURLObject::FSYS_UNX ?
649 0 : std::numeric_limits< sal_Int32 >::min();
650 sal_Int32 nBackslashCount
651 = eStyle & INetURLObject::FSYS_DOS ?
652 0 : std::numeric_limits< sal_Int32 >::min();
653 sal_Int32 nColonCount
654 = eStyle & INetURLObject::FSYS_MAC ?
655 0 : std::numeric_limits< sal_Int32 >::min();
656 while (pBegin != pEnd)
657 switch (*pBegin++)
659 case '/':
660 ++nSlashCount;
661 break;
663 case '\\':
664 ++nBackslashCount;
665 break;
667 case ':':
668 ++nColonCount;
669 break;
671 return nSlashCount >= nBackslashCount ?
672 nSlashCount >= nColonCount ?
673 INetURLObject::FSYS_UNX : INetURLObject::FSYS_MAC :
674 nBackslashCount >= nColonCount ?
675 INetURLObject::FSYS_DOS : INetURLObject::FSYS_MAC;
678 rtl::OUString parseScheme(
679 sal_Unicode const ** begin, sal_Unicode const * end,
680 sal_uInt32 fragmentDelimiter)
682 sal_Unicode const * p = *begin;
683 if (p != end && INetMIME::isAlpha(*p)) {
684 do {
685 ++p;
686 } while (p != end
687 && (INetMIME::isAlphanumeric(*p) || *p == '+' || *p == '-'
688 || *p == '.'));
689 // #i34835# To avoid problems with Windows file paths like "C:\foo",
690 // do not accept generic schemes that are only one character long:
691 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
692 && p - *begin >= 2)
694 rtl::OUString scheme(
695 rtl::OUString(*begin, p - *begin).toAsciiLowerCase());
696 *begin = p + 1;
697 return scheme;
700 return rtl::OUString();
705 bool INetURLObject::setAbsURIRef(rtl::OUString const & rTheAbsURIRef,
706 bool bOctets,
707 EncodeMechanism eMechanism,
708 rtl_TextEncoding eCharset,
709 bool bSmart,
710 FSysStyle eStyle)
712 sal_Unicode const * pPos = rTheAbsURIRef.getStr();
713 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
715 setInvalid();
717 sal_uInt32 nFragmentDelimiter = '#';
719 rtl::OUStringBuffer aSynAbsURIRef;
721 // Parse <scheme>:
722 sal_Unicode const * p = pPos;
723 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
724 if (pPrefix)
726 pPos = p;
727 m_eScheme = pPrefix->m_eScheme;
729 rtl::OUString sTemp(rtl::OUString::createFromAscii(pPrefix->m_eKind
730 >= PrefixInfo::EXTERNAL ?
731 pPrefix->m_pTranslatedPrefix :
732 pPrefix->m_pPrefix));
733 aSynAbsURIRef.append(sTemp);
734 m_aScheme = SubString( 0, sTemp.indexOf(static_cast< sal_Unicode >(':')) );
736 else
738 if (bSmart)
740 // For scheme detection, the first (if any) of the following
741 // productions that matches the input string (and for which the
742 // appropriate style bit is set in eStyle, if applicable)
743 // determines the scheme. The productions use the auxiliary rules
745 // domain = label *("." label)
746 // label = alphanum [*(alphanum / "-") alphanum]
747 // alphanum = ALPHA / DIGIT
748 // IPv6reference = "[" IPv6address "]"
749 // IPv6address = hexpart [":" IPv4address]
750 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
751 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
752 // hexseq = hex4 *(":" hex4)
753 // hex4 = 1*4HEXDIG
754 // UCS4 = <any UCS4 character>
756 // 1st Production (known scheme):
757 // <one of the known schemes, ignoring case> ":" *UCS4
759 // 2nd Production (mailto):
760 // domain "@" domain
762 // 3rd Production (ftp):
763 // "FTP" 2*("." label) ["/" *UCS4]
765 // 4th Production (http):
766 // label 2*("." label) ["/" *UCS4]
768 // 5th Production (file):
769 // "//" (domain / IPv6reference) ["/" *UCS4]
771 // 6th Production (Unix file):
772 // "/" *UCS4
774 // 7th Production (UNC file; FSYS_DOS only):
775 // "\\" domain ["\" *UCS4]
777 // 8th Production (Unix-like DOS file; FSYS_DOS only):
778 // ALPHA ":" ["/" *UCS4]
780 // 9th Production (DOS file; FSYS_DOS only):
781 // ALPHA ":" ["\" *UCS4]
783 // For the 'non URL' file productions 6--9, the interpretation of
784 // the input as a (degenerate) URI is turned off, i.e., escape
785 // sequences and fragments are never detected as such, but are
786 // taken as literal characters.
788 sal_Unicode const * p1 = pPos;
789 if (eStyle & FSYS_DOS
790 && pEnd - p1 >= 2
791 && INetMIME::isAlpha(p1[0])
792 && p1[1] == ':'
793 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
795 m_eScheme = INET_PROT_FILE; // 8th, 9th
796 eMechanism = ENCODE_ALL;
797 nFragmentDelimiter = 0x80000000;
799 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
801 p1 += 2;
802 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
803 && (p1 == pEnd || *p1 == '/'))
804 m_eScheme = INET_PROT_FILE; // 5th
806 else if (p1 != pEnd && *p1 == '/')
808 m_eScheme = INET_PROT_FILE; // 6th
809 eMechanism = ENCODE_ALL;
810 nFragmentDelimiter = 0x80000000;
812 else if (eStyle & FSYS_DOS
813 && pEnd - p1 >= 2
814 && p1[0] == '\\'
815 && p1[1] == '\\')
817 p1 += 2;
818 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
819 p1, pEnd - p1, '\\');
820 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
821 if (
822 parseHostOrNetBiosName(
823 p1, pe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
824 true, NULL) ||
825 (scanDomain(p1, pe) > 0 && p1 == pe)
828 m_eScheme = INET_PROT_FILE; // 7th
829 eMechanism = ENCODE_ALL;
830 nFragmentDelimiter = 0x80000000;
833 else
835 sal_Unicode const * pDomainEnd = p1;
836 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
837 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
839 ++pDomainEnd;
840 if (scanDomain(pDomainEnd, pEnd) > 0
841 && pDomainEnd == pEnd)
842 m_eScheme = INET_PROT_MAILTO; // 2nd
844 else if (nLabels >= 3
845 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
846 m_eScheme
847 = pDomainEnd - p1 >= 4
848 && (p1[0] == 'f' || p1[0] == 'F')
849 && (p1[1] == 't' || p1[1] == 'T')
850 && (p1[2] == 'p' || p1[2] == 'P')
851 && p1[3] == '.' ?
852 INET_PROT_FTP : INET_PROT_HTTP; // 3rd, 4th
856 rtl::OUString aSynScheme;
857 if (m_eScheme == INET_PROT_NOT_VALID) {
858 sal_Unicode const * p1 = pPos;
859 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
860 if (aSynScheme.getLength() > 0)
862 if (p1 != pEnd && *p1 == '/')
863 m_eScheme = INET_PROT_GENERIC_HIERARCHICAL;
864 else
865 m_eScheme = INET_PROT_GENERIC;
866 pPos = p1;
870 if (bSmart && m_eScheme == INET_PROT_NOT_VALID && pPos != pEnd
871 && *pPos != nFragmentDelimiter)
873 m_eScheme = m_eSmartScheme;
876 if (m_eScheme == INET_PROT_NOT_VALID)
878 setInvalid();
879 return false;
882 const char *pSchemeName = getSchemeInfo().m_pScheme;
883 if (pSchemeName[0] != '\0') {
884 aSynScheme = rtl::OUString::createFromAscii(pSchemeName);
886 m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
887 aSynAbsURIRef.append(sal_Unicode(':'));
890 sal_Char cEscapePrefix = getEscapePrefix();
891 sal_uInt32 nSegmentDelimiter = '/';
892 sal_uInt32 nAltSegmentDelimiter = 0x80000000;
893 bool bSkippedInitialSlash = false;
895 // Parse //<user>;AUTH=<auth>@<host>:<port> or
896 // //<user>:<password>@<host>:<port> or
897 // //<reg_name>
898 if (getSchemeInfo().m_bAuthority)
900 sal_Unicode const * pUserInfoBegin = 0;
901 sal_Unicode const * pUserInfoEnd = 0;
902 sal_Unicode const * pHostPortBegin = 0;
903 sal_Unicode const * pHostPortEnd = 0;
905 switch (m_eScheme)
907 case INET_PROT_VND_SUN_STAR_HELP:
909 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
911 setInvalid();
912 return false;
914 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
915 rtl::OUStringBuffer aSynAuthority;
916 while (pPos < pEnd
917 && *pPos != '/' && *pPos != '?'
918 && *pPos != nFragmentDelimiter)
920 EscapeType eEscapeType;
921 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
922 cEscapePrefix, eMechanism,
923 eCharset, eEscapeType);
924 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
925 PART_AUTHORITY, cEscapePrefix, eCharset,
926 false);
928 m_aHost.set(aSynAbsURIRef,
929 aSynAuthority.makeStringAndClear(),
930 aSynAbsURIRef.getLength());
931 // misusing m_aHost to store the authority
932 break;
935 case INET_PROT_VND_SUN_STAR_HIER:
937 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
939 pPos += 2;
940 aSynAbsURIRef.
941 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
942 rtl::OUStringBuffer aSynAuthority;
943 while (pPos < pEnd
944 && *pPos != '/' && *pPos != '?'
945 && *pPos != nFragmentDelimiter)
947 EscapeType eEscapeType;
948 sal_uInt32 nUTF32 = getUTF32(pPos,
949 pEnd,
950 bOctets,
951 cEscapePrefix,
952 eMechanism,
953 eCharset,
954 eEscapeType);
955 appendUCS4(aSynAuthority,
956 nUTF32,
957 eEscapeType,
958 bOctets,
959 PART_AUTHORITY,
960 cEscapePrefix,
961 eCharset,
962 false);
964 if (aSynAuthority.getLength() == 0)
966 setInvalid();
967 return false;
969 m_aHost.set(aSynAbsURIRef,
970 aSynAuthority.makeStringAndClear(),
971 aSynAbsURIRef.getLength());
972 // misusing m_aHost to store the authority
974 break;
977 case INET_PROT_VND_SUN_STAR_PKG:
979 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
981 setInvalid();
982 return false;
984 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
985 rtl::OUStringBuffer aSynAuthority;
986 while (pPos < pEnd
987 && *pPos != '/' && *pPos != '?'
988 && *pPos != nFragmentDelimiter)
990 EscapeType eEscapeType;
991 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
992 cEscapePrefix, eMechanism,
993 eCharset, eEscapeType);
994 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
995 PART_AUTHORITY, cEscapePrefix, eCharset,
996 false);
998 if (aSynAuthority.getLength() == 0)
1000 setInvalid();
1001 return false;
1003 m_aHost.set(aSynAbsURIRef,
1004 aSynAuthority.makeStringAndClear(),
1005 aSynAbsURIRef.getLength());
1006 // misusing m_aHost to store the authority
1007 break;
1010 case INET_PROT_FILE:
1011 if (bSmart)
1013 // The first of the following seven productions that
1014 // matches the rest of the input string (and for which the
1015 // appropriate style bit is set in eStyle, if applicable)
1016 // determines the used notation. The productions use the
1017 // auxiliary rules
1019 // domain = label *("." label)
1020 // label = alphanum [*(alphanum / "-") alphanum]
1021 // alphanum = ALPHA / DIGIT
1022 // IPv6reference = "[" IPv6address "]"
1023 // IPv6address = hexpart [":" IPv4address]
1024 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1025 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1026 // hexseq = hex4 *(":" hex4)
1027 // hex4 = 1*4HEXDIG
1028 // path = <any UCS4 character except "#">
1029 // UCS4 = <any UCS4 character>
1031 // 1st Production (URL):
1032 // "//" [domain / IPv6reference] ["/" *path]
1033 // ["#" *UCS4]
1034 // becomes
1035 // "file://" domain "/" *path ["#" *UCS4]
1036 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1038 sal_Unicode const * p1 = pPos + 2;
1039 if (
1040 p1 == pEnd || *p1 == nFragmentDelimiter || *p1 == '/' ||
1043 scanDomain(p1, pEnd) > 0 ||
1044 scanIPv6reference(p1, pEnd)
1045 ) &&
1046 (p1 == pEnd || *p1 == nFragmentDelimiter || *p1 == '/')
1050 aSynAbsURIRef.
1051 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1052 pHostPortBegin = pPos + 2;
1053 pHostPortEnd = p1;
1054 pPos = p1;
1055 break;
1059 // 2nd Production (MS IE generated 1; FSYS_DOS only):
1060 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1061 // becomes
1062 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1063 // replacing "\" by "/" within <*path>
1065 // 3rd Production (MS IE generated 2; FSYS_DOS only):
1066 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1067 // becomes
1068 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1069 // replacing "\" by "/" within <*path>
1071 // 4th Production (misscounted slashes):
1072 // "//" *path ["#" *UCS4]
1073 // becomes
1074 // "file:///" *path ["#" *UCS4]
1075 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1077 aSynAbsURIRef.
1078 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1079 pPos += 2;
1080 bSkippedInitialSlash = true;
1081 if ((eStyle & FSYS_DOS) != 0
1082 && pEnd - pPos >= 2
1083 && INetMIME::isAlpha(pPos[0])
1084 && pPos[1] == ':'
1085 && (pEnd - pPos == 2
1086 || pPos[2] == '/' || pPos[2] == '\\'))
1087 nAltSegmentDelimiter = '\\';
1088 break;
1091 // 5th Production (Unix):
1092 // "/" *path ["#" *UCS4]
1093 // becomes
1094 // "file:///" *path ["#" *UCS4]
1095 if (pPos < pEnd && *pPos == '/')
1097 aSynAbsURIRef.
1098 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1099 break;
1102 // 6th Production (UNC; FSYS_DOS only):
1103 // "\\" domain ["\" *path] ["#" *UCS4]
1104 // becomes
1105 // "file://" domain "/" *path ["#" *UCS4]
1106 // replacing "\" by "/" within <*path>
1107 if (eStyle & FSYS_DOS
1108 && pEnd - pPos >= 2
1109 && pPos[0] == '\\'
1110 && pPos[1] == '\\')
1112 sal_Unicode const * p1 = pPos + 2;
1113 sal_Unicode const * pe = p1;
1114 while (pe < pEnd && *pe != '\\' &&
1115 *pe != nFragmentDelimiter)
1117 ++pe;
1119 if (
1120 parseHostOrNetBiosName(
1121 p1, pe, bOctets, ENCODE_ALL,
1122 RTL_TEXTENCODING_DONTKNOW, true, NULL) ||
1123 (scanDomain(p1, pe) > 0 && p1 == pe)
1126 aSynAbsURIRef.
1127 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1128 pHostPortBegin = pPos + 2;
1129 pHostPortEnd = pe;
1130 pPos = pe;
1131 nSegmentDelimiter = '\\';
1132 break;
1136 // 7th Production (Unix-like DOS; FSYS_DOS only):
1137 // ALPHA ":" ["/" *path] ["#" *UCS4]
1138 // becomes
1139 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1140 // replacing "\" by "/" within <*path>
1142 // 8th Production (DOS; FSYS_DOS only):
1143 // ALPHA ":" ["\" *path] ["#" *UCS4]
1144 // becomes
1145 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1146 // replacing "\" by "/" within <*path>
1147 if (eStyle & FSYS_DOS
1148 && pEnd - pPos >= 2
1149 && INetMIME::isAlpha(pPos[0])
1150 && pPos[1] == ':'
1151 && (pEnd - pPos == 2
1152 || pPos[2] == '/'
1153 || pPos[2] == '\\'))
1155 aSynAbsURIRef.
1156 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1157 nAltSegmentDelimiter = '\\';
1158 bSkippedInitialSlash = true;
1159 break;
1162 // 9th Production (any):
1163 // *path ["#" *UCS4]
1164 // becomes
1165 // "file:///" *path ["#" *UCS4]
1166 // replacing the delimiter by "/" within <*path>. The
1167 // delimiter is that character from the set { "/", "\",
1168 // ":" } which appears most often in <*path> (if FSYS_UNX
1169 // is not among the style bits, "/" is removed from the
1170 // set; if FSYS_DOS is not among the style bits, "\" is
1171 // removed from the set; if FSYS_MAC is not among the
1172 // style bits, ":" is removed from the set). If two or
1173 // more characters appear the same number of times, the
1174 // character mentioned first in that set is chosen. If
1175 // the first character of <*path> is the delimiter, that
1176 // character is not copied.
1177 if (eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC))
1179 aSynAbsURIRef.
1180 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1181 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1183 case FSYS_UNX:
1184 nSegmentDelimiter = '/';
1185 break;
1187 case FSYS_DOS:
1188 nSegmentDelimiter = '\\';
1189 break;
1191 case FSYS_MAC:
1192 nSegmentDelimiter = ':';
1193 break;
1195 default:
1196 DBG_ERROR(
1197 "INetURLObject::setAbsURIRef():"
1198 " Bad guessFSysStyleByCounting");
1199 break;
1201 bSkippedInitialSlash
1202 = pPos != pEnd && *pPos != nSegmentDelimiter;
1203 break;
1206 default:
1208 // For INET_PROT_FILE, allow an empty authority ("//") to be
1209 // missing if the following path starts with an explicit "/"
1210 // (Java is notorious in generating such file URLs, so be
1211 // liberal here):
1212 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1213 pPos += 2;
1214 else if (!bSmart
1215 && !(m_eScheme == INET_PROT_FILE
1216 && pPos != pEnd && *pPos == '/'))
1218 setInvalid();
1219 return false;
1221 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1223 sal_Unicode const * pAuthority = pPos;
1224 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1225 while (pPos < pEnd && *pPos != '/' && *pPos != c
1226 && *pPos != nFragmentDelimiter)
1227 ++pPos;
1228 if (getSchemeInfo().m_bUser)
1229 if (getSchemeInfo().m_bHost)
1231 sal_Unicode const * p1 = pAuthority;
1232 while (p1 < pPos && *p1 != '@')
1233 ++p1;
1234 if (p1 == pPos)
1236 pHostPortBegin = pAuthority;
1237 pHostPortEnd = pPos;
1239 else
1241 pUserInfoBegin = pAuthority;
1242 pUserInfoEnd = p1;
1243 pHostPortBegin = p1 + 1;
1244 pHostPortEnd = pPos;
1247 else
1249 pUserInfoBegin = pAuthority;
1250 pUserInfoEnd = pPos;
1252 else if (getSchemeInfo().m_bHost)
1254 pHostPortBegin = pAuthority;
1255 pHostPortEnd = pPos;
1257 else if (pPos != pAuthority)
1259 setInvalid();
1260 return false;
1262 break;
1266 if (pUserInfoBegin)
1268 Part ePart = m_eScheme == INET_PROT_IMAP ?
1269 PART_IMAP_ACHAR :
1270 m_eScheme == INET_PROT_VIM ?
1271 PART_VIM :
1272 PART_USER_PASSWORD;
1273 bool bSupportsPassword = getSchemeInfo().m_bPassword;
1274 bool bSupportsAuth
1275 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1276 bool bHasAuth = false;
1277 rtl::OUStringBuffer aSynUser;
1278 sal_Unicode const * p1 = pUserInfoBegin;
1279 while (p1 < pUserInfoEnd)
1281 EscapeType eEscapeType;
1282 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1283 cEscapePrefix, eMechanism,
1284 eCharset, eEscapeType);
1285 if (eEscapeType == ESCAPE_NO)
1287 if (nUTF32 == ':' && bSupportsPassword)
1289 bHasAuth = true;
1290 break;
1292 else if (nUTF32 == ';' && bSupportsAuth
1293 && pUserInfoEnd - p1
1294 > RTL_CONSTASCII_LENGTH("auth=")
1295 && INetMIME::equalIgnoreCase(
1297 p1 + RTL_CONSTASCII_LENGTH("auth="),
1298 "auth="))
1300 p1 += RTL_CONSTASCII_LENGTH("auth=");
1301 bHasAuth = true;
1302 break;
1305 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets, ePart,
1306 cEscapePrefix, eCharset, false);
1308 m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1309 aSynAbsURIRef.getLength());
1310 if (bHasAuth)
1312 if (bSupportsPassword)
1314 aSynAbsURIRef.append(sal_Unicode(':'));
1315 rtl::OUStringBuffer aSynAuth;
1316 while (p1 < pUserInfoEnd)
1318 EscapeType eEscapeType;
1319 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1320 cEscapePrefix,
1321 eMechanism, eCharset,
1322 eEscapeType);
1323 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1324 ePart, cEscapePrefix, eCharset, false);
1326 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1327 aSynAbsURIRef.getLength());
1329 else
1331 aSynAbsURIRef.
1332 appendAscii(RTL_CONSTASCII_STRINGPARAM(";AUTH="));
1333 rtl::OUStringBuffer aSynAuth;
1334 while (p1 < pUserInfoEnd)
1336 EscapeType eEscapeType;
1337 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1338 cEscapePrefix,
1339 eMechanism, eCharset,
1340 eEscapeType);
1341 if (!INetMIME::isIMAPAtomChar(nUTF32))
1343 setInvalid();
1344 return false;
1346 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1347 ePart, cEscapePrefix, eCharset, false);
1349 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1350 aSynAbsURIRef.getLength());
1353 if (pHostPortBegin)
1354 aSynAbsURIRef.append(sal_Unicode('@'));
1357 if (pHostPortBegin)
1359 sal_Unicode const * pPort = pHostPortEnd;
1360 if (getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd)
1362 sal_Unicode const * p1 = pHostPortEnd - 1;
1363 while (p1 > pHostPortBegin && INetMIME::isDigit(*p1))
1364 --p1;
1365 if (*p1 == ':')
1366 pPort = p1;
1368 bool bNetBiosName = false;
1369 switch (m_eScheme)
1371 case INET_PROT_FILE:
1372 case INET_PROT_VND_SUN_STAR_WFS:
1373 // If the host equals "LOCALHOST" (unencoded and ignoring
1374 // case), turn it into an empty host:
1375 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1376 "localhost"))
1377 pHostPortBegin = pPort;
1378 bNetBiosName = true;
1379 break;
1381 case INET_PROT_LDAP:
1382 case INET_PROT_SMB:
1383 if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1385 setInvalid();
1386 return false;
1388 break;
1390 default:
1391 if (pHostPortBegin == pPort)
1393 setInvalid();
1394 return false;
1396 break;
1398 rtl::OUStringBuffer aSynHost;
1399 if (!parseHostOrNetBiosName(
1400 pHostPortBegin, pPort, bOctets, eMechanism, eCharset,
1401 bNetBiosName, &aSynHost))
1403 setInvalid();
1404 return false;
1406 m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1407 aSynAbsURIRef.getLength());
1408 if (pPort != pHostPortEnd)
1410 aSynAbsURIRef.append(sal_Unicode(':'));
1411 m_aPort.set(aSynAbsURIRef,
1412 rtl::OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1413 aSynAbsURIRef.getLength());
1418 // Parse <path>
1419 rtl::OUStringBuffer aSynPath;
1420 if (!parsePath(m_eScheme, &pPos, pEnd, bOctets, eMechanism, eCharset,
1421 bSkippedInitialSlash, nSegmentDelimiter,
1422 nAltSegmentDelimiter,
1423 getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1424 nFragmentDelimiter, aSynPath))
1426 setInvalid();
1427 return false;
1429 m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1430 aSynAbsURIRef.getLength());
1432 // Parse ?<query>
1433 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1435 aSynAbsURIRef.append(sal_Unicode('?'));
1436 rtl::OUStringBuffer aSynQuery;
1437 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1439 EscapeType eEscapeType;
1440 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1441 eMechanism, eCharset, eEscapeType);
1442 appendUCS4(aSynQuery, nUTF32, eEscapeType, bOctets,
1443 PART_URIC, cEscapePrefix, eCharset, true);
1445 m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1446 aSynAbsURIRef.getLength());
1449 // Parse #<fragment>
1450 if (pPos < pEnd && *pPos == nFragmentDelimiter)
1452 aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1453 rtl::OUStringBuffer aSynFragment;
1454 for (++pPos; pPos < pEnd;)
1456 EscapeType eEscapeType;
1457 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1458 eMechanism, eCharset, eEscapeType);
1459 appendUCS4(aSynFragment, nUTF32, eEscapeType, bOctets, PART_URIC,
1460 cEscapePrefix, eCharset, true);
1462 m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1463 aSynAbsURIRef.getLength());
1466 if (pPos != pEnd)
1468 setInvalid();
1469 return false;
1472 m_aAbsURIRef = aSynAbsURIRef;
1474 // At this point references of type "\\server\paths" have
1475 // been converted to file:://server/path".
1476 #ifdef LINUX
1477 if (m_eScheme==INET_PROT_FILE && !m_aHost.isEmpty()) {
1478 // Change "file:://server/path" URIs to "smb:://server/path" on
1479 // Linux
1480 // Leave "file::path" URIs unchanged.
1481 changeScheme(INET_PROT_SMB);
1483 #endif
1485 #ifdef WIN
1486 if (m_eScheme==INET_PROT_SMB) {
1487 // Change "smb://server/path" URIs to "file://server/path"
1488 // URIs on Windows, since Windows doesn't understand the
1489 // SMB scheme.
1490 changeScheme(INET_PROT_FILE);
1492 #endif
1494 return true;
1497 //============================================================================
1498 void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
1499 ::rtl::OUString aTmpStr=m_aAbsURIRef.makeStringAndClear();
1500 int oldSchemeLen=strlen(getSchemeInfo().m_pScheme);
1501 m_eScheme=eTargetScheme;
1502 int newSchemeLen=strlen(getSchemeInfo().m_pScheme);
1503 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1504 m_aAbsURIRef.append(aTmpStr.getStr()+oldSchemeLen);
1505 int delta=newSchemeLen-oldSchemeLen;
1506 m_aUser+=delta;
1507 m_aAuth+=delta;
1508 m_aHost+=delta;
1509 m_aPort+=delta;
1510 m_aPath+=delta;
1511 m_aQuery+=delta;
1512 m_aFragment+=delta;
1515 //============================================================================
1516 bool INetURLObject::convertRelToAbs(rtl::OUString const & rTheRelURIRef,
1517 bool bOctets,
1518 INetURLObject & rTheAbsURIRef,
1519 bool & rWasAbsolute,
1520 EncodeMechanism eMechanism,
1521 rtl_TextEncoding eCharset,
1522 bool bIgnoreFragment, bool bSmart,
1523 bool bRelativeNonURIs, FSysStyle eStyle)
1524 const
1526 sal_Unicode const * p = rTheRelURIRef.getStr();
1527 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1529 sal_Unicode const * pPrefixBegin = p;
1530 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1531 bool hasScheme = pPrefix != 0;
1532 if (!hasScheme) {
1533 pPrefixBegin = p;
1534 hasScheme = parseScheme(&pPrefixBegin, pEnd, '#').getLength() > 0;
1537 sal_uInt32 nSegmentDelimiter = '/';
1538 sal_uInt32 nQueryDelimiter
1539 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1540 sal_uInt32 nFragmentDelimiter = '#';
1541 Part ePart = PART_VISIBLE;
1543 if (!hasScheme && bSmart)
1545 // If the input matches any of the following productions (for which
1546 // the appropriate style bit is set in eStyle), it is assumed to be an
1547 // absolute file system path, rather than a relative URI reference.
1548 // (This is only a subset of the productions used for scheme detection
1549 // in INetURLObject::setAbsURIRef(), because most of those productions
1550 // interfere with the syntax of relative URI references.) The
1551 // productions use the auxiliary rules
1553 // domain = label *("." label)
1554 // label = alphanum [*(alphanum / "-") alphanum]
1555 // alphanum = ALPHA / DIGIT
1556 // UCS4 = <any UCS4 character>
1558 // 1st Production (UNC file; FSYS_DOS only):
1559 // "\\" domain ["\" *UCS4]
1561 // 2nd Production (Unix-like DOS file; FSYS_DOS only):
1562 // ALPHA ":" ["/" *UCS4]
1564 // 3rd Production (DOS file; FSYS_DOS only):
1565 // ALPHA ":" ["\" *UCS4]
1566 if (eStyle & FSYS_DOS)
1568 bool bFSys = false;
1569 sal_Unicode const * q = p;
1570 if (pEnd - q >= 2
1571 && INetMIME::isAlpha(q[0])
1572 && q[1] == ':'
1573 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1574 bFSys = true; // 2nd, 3rd
1575 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1577 q += 2;
1578 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1579 q, pEnd - q, '\\');
1580 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1581 if (parseHostOrNetBiosName(
1582 q, qe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
1583 true, NULL))
1585 bFSys = true; // 1st
1588 if (bFSys)
1590 INetURLObject aNewURI;
1591 aNewURI.setAbsURIRef(rTheRelURIRef, bOctets, eMechanism,
1592 eCharset, true, eStyle);
1593 if (!aNewURI.HasError())
1595 rTheAbsURIRef = aNewURI;
1596 rWasAbsolute = true;
1597 return true;
1602 // When the base URL is a file URL, accept relative file system paths
1603 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1604 // and "#"), as well as relative URIs using "/" as delimiter:
1605 if (m_eScheme == INET_PROT_FILE)
1606 switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1608 case FSYS_UNX:
1609 nSegmentDelimiter = '/';
1610 break;
1612 case FSYS_DOS:
1613 nSegmentDelimiter = '\\';
1614 bRelativeNonURIs = true;
1615 break;
1617 case FSYS_MAC:
1618 nSegmentDelimiter = ':';
1619 bRelativeNonURIs = true;
1620 break;
1622 default:
1623 DBG_ERROR("INetURLObject::convertRelToAbs():"
1624 " Bad guessFSysStyleByCounting");
1625 break;
1628 if (bRelativeNonURIs)
1630 eMechanism = ENCODE_ALL;
1631 nQueryDelimiter = 0x80000000;
1632 nFragmentDelimiter = 0x80000000;
1633 ePart = PART_VISIBLE_NONSPECIAL;
1637 // If the relative URI has the same scheme as the base URI, and that
1638 // scheme is hierarchical, then ignore its presence in the relative
1639 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1640 // step 3):
1641 if (pPrefix && pPrefix->m_eScheme == m_eScheme
1642 && getSchemeInfo().m_bHierarchical)
1644 hasScheme = false;
1645 while (p != pEnd && *p++ != ':') ;
1647 rWasAbsolute = hasScheme;
1649 // Fast solution for non-relative URIs:
1650 if (hasScheme)
1652 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1653 if (aNewURI.HasError())
1655 rWasAbsolute = false;
1656 return false;
1659 if (bIgnoreFragment)
1660 aNewURI.clearFragment();
1661 rTheAbsURIRef = aNewURI;
1662 return true;
1665 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1666 STATE_DONE };
1668 rtl::OUStringBuffer aSynAbsURIRef;
1669 aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1670 aSynAbsURIRef.append(sal_Unicode(':'));
1672 sal_Char cEscapePrefix = getEscapePrefix();
1674 State eState = STATE_AUTH;
1675 bool bSameDoc = true;
1677 if (getSchemeInfo().m_bAuthority)
1679 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1681 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1682 p += 2;
1683 eState = STATE_ABS_PATH;
1684 bSameDoc = false;
1685 while (p != pEnd)
1687 EscapeType eEscapeType;
1688 sal_uInt32 nUTF32
1689 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1690 eCharset, eEscapeType);
1691 if (eEscapeType == ESCAPE_NO)
1693 if (nUTF32 == nSegmentDelimiter)
1694 break;
1695 else if (nUTF32 == nFragmentDelimiter)
1697 eState = STATE_FRAGMENT;
1698 break;
1701 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1702 PART_VISIBLE, cEscapePrefix, eCharset, true);
1705 else
1707 SubString aAuthority(getAuthority());
1708 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1709 + aAuthority.getBegin(),
1710 aAuthority.getLength());
1714 if (eState == STATE_AUTH)
1716 if (p == pEnd)
1717 eState = STATE_DONE;
1718 else if (*p == nFragmentDelimiter)
1720 ++p;
1721 eState = STATE_FRAGMENT;
1723 else if (*p == nSegmentDelimiter)
1725 ++p;
1726 eState = STATE_ABS_PATH;
1727 bSameDoc = false;
1729 else
1731 eState = STATE_REL_PATH;
1732 bSameDoc = false;
1736 if (eState == STATE_ABS_PATH)
1738 aSynAbsURIRef.append(sal_Unicode('/'));
1739 eState = STATE_DONE;
1740 while (p != pEnd)
1742 EscapeType eEscapeType;
1743 sal_uInt32 nUTF32
1744 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1745 eCharset, eEscapeType);
1746 if (eEscapeType == ESCAPE_NO)
1748 if (nUTF32 == nFragmentDelimiter)
1750 eState = STATE_FRAGMENT;
1751 break;
1753 else if (nUTF32 == nSegmentDelimiter)
1754 nUTF32 = '/';
1756 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1757 cEscapePrefix, eCharset, true);
1760 else if (eState == STATE_REL_PATH)
1762 if (!getSchemeInfo().m_bHierarchical)
1764 // Detect cases where a relative input could not be made absolute
1765 // because the given base URL is broken (most probably because it is
1766 // empty):
1767 OSL_ASSERT(!HasError());
1768 rWasAbsolute = false;
1769 return false;
1772 sal_Unicode const * pBasePathBegin
1773 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1774 sal_Unicode const * pBasePathEnd
1775 = pBasePathBegin + m_aPath.getLength();
1776 while (pBasePathEnd != pBasePathBegin)
1777 if (*(--pBasePathEnd) == '/')
1779 ++pBasePathEnd;
1780 break;
1783 sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1784 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1785 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1786 && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1) == '/',
1787 "INetURLObject::convertRelToAbs(): Bad base path");
1789 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1791 if (*p == '.')
1793 if (pEnd - p == 1
1794 || p[1] == nSegmentDelimiter
1795 || p[1] == nQueryDelimiter
1796 || p[1] == nFragmentDelimiter)
1798 ++p;
1799 if (p != pEnd && *p == nSegmentDelimiter)
1800 ++p;
1801 continue;
1803 else if (pEnd - p >= 2
1804 && p[1] == '.'
1805 && (pEnd - p == 2
1806 || p[2] == nSegmentDelimiter
1807 || p[2] == nQueryDelimiter
1808 || p[2] == nFragmentDelimiter)
1809 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1811 p += 2;
1812 if (p != pEnd && *p == nSegmentDelimiter)
1813 ++p;
1815 sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1816 while (i > nPathBegin && aSynAbsURIRef.charAt(i) != '/')
1817 --i;
1818 aSynAbsURIRef.setLength(i + 1);
1819 DBG_ASSERT(
1820 aSynAbsURIRef.getLength() > nPathBegin
1821 && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1)
1822 == '/',
1823 "INetURLObject::convertRelToAbs(): Bad base path");
1824 continue;
1828 while (p != pEnd
1829 && *p != nSegmentDelimiter
1830 && *p != nQueryDelimiter
1831 && *p != nFragmentDelimiter)
1833 EscapeType eEscapeType;
1834 sal_uInt32 nUTF32
1835 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1836 eCharset, eEscapeType);
1837 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1838 cEscapePrefix, eCharset, true);
1840 if (p != pEnd && *p == nSegmentDelimiter)
1842 aSynAbsURIRef.append(sal_Unicode('/'));
1843 ++p;
1847 while (p != pEnd && *p != nFragmentDelimiter)
1849 EscapeType eEscapeType;
1850 sal_uInt32 nUTF32
1851 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1852 eCharset, eEscapeType);
1853 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1854 cEscapePrefix, eCharset, true);
1857 if (p == pEnd)
1858 eState = STATE_DONE;
1859 else
1861 ++p;
1862 eState = STATE_FRAGMENT;
1865 else if (bSameDoc)
1867 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1868 m_aPath.getLength());
1869 if (m_aQuery.isPresent())
1870 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1871 + m_aQuery.getBegin() - 1,
1872 m_aQuery.getLength() + 1);
1875 if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1877 aSynAbsURIRef.append(sal_Unicode('#'));
1878 while (p != pEnd)
1880 EscapeType eEscapeType;
1881 sal_uInt32 nUTF32
1882 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1883 eCharset, eEscapeType);
1884 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1885 PART_VISIBLE, cEscapePrefix, eCharset, true);
1889 INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1890 if (aNewURI.HasError())
1892 // Detect cases where a relative input could not be made absolute
1893 // because the given base URL is broken (most probably because it is
1894 // empty):
1895 OSL_ASSERT(!HasError());
1896 rWasAbsolute = false;
1897 return false;
1900 rTheAbsURIRef = aNewURI;
1901 return true;
1904 //============================================================================
1905 bool INetURLObject::convertAbsToRel(rtl::OUString const & rTheAbsURIRef,
1906 bool bOctets, rtl::OUString & rTheRelURIRef,
1907 EncodeMechanism eEncodeMechanism,
1908 DecodeMechanism eDecodeMechanism,
1909 rtl_TextEncoding eCharset,
1910 FSysStyle eStyle) const
1912 // Check for hierarchical base URL:
1913 if (!getSchemeInfo().m_bHierarchical)
1915 rTheRelURIRef
1916 = decode(rTheAbsURIRef,
1917 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1918 eDecodeMechanism, eCharset);
1919 return false;
1922 // Convert the input (absolute or relative URI ref) to an absolute URI
1923 // ref:
1924 INetURLObject aSubject;
1925 bool bWasAbsolute;
1926 if (!convertRelToAbs(rTheAbsURIRef, bOctets, aSubject, bWasAbsolute,
1927 eEncodeMechanism, eCharset, false, false, false,
1928 eStyle))
1930 rTheRelURIRef
1931 = decode(rTheAbsURIRef,
1932 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1933 eDecodeMechanism, eCharset);
1934 return false;
1937 // Check for differing scheme or authority parts:
1938 if ((m_aScheme.compare(
1939 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1940 != 0)
1941 || (m_aUser.compare(
1942 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1943 != 0)
1944 || (m_aAuth.compare(
1945 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1946 != 0)
1947 || (m_aHost.compare(
1948 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1949 != 0)
1950 || (m_aPort.compare(
1951 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1952 != 0))
1954 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1955 return false;
1958 sal_Unicode const * pBasePathBegin
1959 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1960 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1961 sal_Unicode const * pSubjectPathBegin
1962 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1963 sal_Unicode const * pSubjectPathEnd
1964 = pSubjectPathBegin + aSubject.m_aPath.getLength();
1966 // Make nMatch point past the last matching slash, or past the end of the
1967 // paths, in case they are equal:
1968 sal_Unicode const * pSlash = 0;
1969 sal_Unicode const * p1 = pBasePathBegin;
1970 sal_Unicode const * p2 = pSubjectPathBegin;
1971 for (;;)
1973 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1975 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1976 pSlash = p1;
1977 break;
1980 sal_Unicode c = *p1++;
1981 if (c != *p2++)
1982 break;
1983 if (c == '/')
1984 pSlash = p1;
1986 if (!pSlash)
1988 // One of the paths does not start with '/':
1989 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1990 return false;
1992 sal_Int32 nMatch = pSlash - pBasePathBegin;
1994 // If the two URLs are DOS file URLs starting with different volumes
1995 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1996 // relative (it could be, but some people do not like that):
1997 if (m_eScheme == INET_PROT_FILE
1998 && nMatch <= 1
1999 && hasDosVolume(eStyle)
2000 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
2002 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
2003 return false;
2006 // For every slash in the base path after nMatch, a prefix of "../" is
2007 // added to the new relative URL (if the common prefix of the two paths is
2008 // only "/"---but see handling of file URLs above---, the complete subject
2009 // path could go into the new relative URL instead, but some people don't
2010 // like that):
2011 rtl::OUStringBuffer aSynRelURIRef;
2012 // if (nMatch <= 1) nMatch = 0; else // see comment above
2013 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
2014 ++p)
2016 if (*p == '/')
2017 aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("../"));
2020 // If the new relative URL would start with "//" (i.e., it would be
2021 // mistaken for a relative URL starting with an authority part), or if the
2022 // new relative URL would neither be empty nor start with <"/"> nor start
2023 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
2024 // with a scheme part), then the new relative URL is prefixed with "./":
2025 if (aSynRelURIRef.getLength() == 0)
2027 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
2028 && pSubjectPathBegin[nMatch] == '/'
2029 && pSubjectPathBegin[nMatch + 1] == '/')
2031 aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("./"));
2033 else
2035 for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
2036 p != pSubjectPathEnd && *p != '/'; ++p)
2038 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
2040 aSynRelURIRef.
2041 appendAscii(RTL_CONSTASCII_STRINGPARAM("./"));
2042 break;
2048 // The remainder of the subject path, starting at nMatch, is appended to
2049 // the new relative URL:
2050 sal_Char cEscapePrefix = getEscapePrefix();
2051 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2052 cEscapePrefix, eDecodeMechanism, eCharset));
2054 // If the subject has defined query or fragment parts, they are appended
2055 // to the new relative URL:
2056 if (aSubject.m_aQuery.isPresent())
2058 aSynRelURIRef.append(sal_Unicode('?'));
2059 aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery, cEscapePrefix,
2060 eDecodeMechanism, eCharset));
2062 if (aSubject.m_aFragment.isPresent())
2064 aSynRelURIRef.append(sal_Unicode('#'));
2065 aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2066 cEscapePrefix, eDecodeMechanism, eCharset));
2069 rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2070 return true;
2073 //============================================================================
2074 // static
2075 bool INetURLObject::convertIntToExt(rtl::OUString const & rTheIntURIRef,
2076 bool bOctets, rtl::OUString & rTheExtURIRef,
2077 DecodeMechanism eDecodeMechanism,
2078 rtl_TextEncoding eCharset)
2080 sal_Char cEscapePrefix
2081 = getEscapePrefix(CompareProtocolScheme(rTheIntURIRef));
2082 rtl::OUString aSynExtURIRef(encodeText(rTheIntURIRef, bOctets, PART_VISIBLE,
2083 cEscapePrefix, NOT_CANONIC, eCharset,
2084 true));
2085 sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2086 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2087 sal_Unicode const * p = pBegin;
2088 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2089 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL;
2090 if (bConvert)
2092 aSynExtURIRef =
2093 aSynExtURIRef.replaceAt(0, p - pBegin,
2094 rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2096 rTheExtURIRef = decode(aSynExtURIRef, cEscapePrefix, eDecodeMechanism,
2097 eCharset);
2098 return bConvert;
2101 //============================================================================
2102 // static
2103 bool INetURLObject::convertExtToInt(rtl::OUString const & rTheExtURIRef,
2104 bool bOctets, rtl::OUString & rTheIntURIRef,
2105 DecodeMechanism eDecodeMechanism,
2106 rtl_TextEncoding eCharset)
2108 sal_Char cEscapePrefix
2109 = getEscapePrefix(CompareProtocolScheme(rTheExtURIRef));
2110 rtl::OUString aSynIntURIRef(encodeText(rTheExtURIRef, bOctets, PART_VISIBLE,
2111 cEscapePrefix, NOT_CANONIC, eCharset,
2112 true));
2113 sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2114 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2115 sal_Unicode const * p = pBegin;
2116 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2117 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL;
2118 if (bConvert)
2120 aSynIntURIRef =
2121 aSynIntURIRef.replaceAt(0, p - pBegin,
2122 rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2124 rTheIntURIRef = decode(aSynIntURIRef, cEscapePrefix, eDecodeMechanism,
2125 eCharset);
2126 return bConvert;
2129 //============================================================================
2130 // static
2131 INetURLObject::PrefixInfo const *
2132 INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2133 sal_Unicode const * pEnd)
2135 static PrefixInfo const aMap[]
2136 = { // dummy entry at front needed, because pLast may point here:
2137 { 0, 0, INET_PROT_NOT_VALID, PrefixInfo::INTERNAL },
2138 { ".component:", "staroffice.component:", INET_PROT_COMPONENT,
2139 PrefixInfo::INTERNAL },
2140 { ".uno:", "staroffice.uno:", INET_PROT_UNO,
2141 PrefixInfo::INTERNAL },
2142 { "cid:", 0, INET_PROT_CID, PrefixInfo::OFFICIAL },
2143 { "data:", 0, INET_PROT_DATA, PrefixInfo::OFFICIAL },
2144 { "dav:", 0, INET_PROT_DAV, PrefixInfo::OFFICIAL },
2145 { "davs:", 0, INET_PROT_DAVS, PrefixInfo::OFFICIAL },
2146 { "db:", "staroffice.db:", INET_PROT_DB, PrefixInfo::INTERNAL },
2147 { "file:", 0, INET_PROT_FILE, PrefixInfo::OFFICIAL },
2148 { "ftp:", 0, INET_PROT_FTP, PrefixInfo::OFFICIAL },
2149 { "http:", 0, INET_PROT_HTTP, PrefixInfo::OFFICIAL },
2150 { "https:", 0, INET_PROT_HTTPS, PrefixInfo::OFFICIAL },
2151 { "imap:", 0, INET_PROT_IMAP, PrefixInfo::OFFICIAL },
2152 { "javascript:", 0, INET_PROT_JAVASCRIPT, PrefixInfo::OFFICIAL },
2153 { "ldap:", 0, INET_PROT_LDAP, PrefixInfo::OFFICIAL },
2154 { "macro:", "staroffice.macro:", INET_PROT_MACRO,
2155 PrefixInfo::INTERNAL },
2156 { "mailto:", 0, INET_PROT_MAILTO, PrefixInfo::OFFICIAL },
2157 { "news:", 0, INET_PROT_NEWS, PrefixInfo::OFFICIAL },
2158 { "out:", "staroffice.out:", INET_PROT_OUT,
2159 PrefixInfo::INTERNAL },
2160 { "pop3:", "staroffice.pop3:", INET_PROT_POP3,
2161 PrefixInfo::INTERNAL },
2162 { "private:", "staroffice.private:", INET_PROT_PRIV_SOFFICE,
2163 PrefixInfo::INTERNAL },
2164 { "private:factory/", "staroffice.factory:",
2165 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2166 { "private:helpid/", "staroffice.helpid:", INET_PROT_PRIV_SOFFICE,
2167 PrefixInfo::INTERNAL },
2168 { "private:java/", "staroffice.java:", INET_PROT_PRIV_SOFFICE,
2169 PrefixInfo::INTERNAL },
2170 { "private:searchfolder:", "staroffice.searchfolder:",
2171 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2172 { "private:trashcan:", "staroffice.trashcan:",
2173 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2174 { "slot:", "staroffice.slot:", INET_PROT_SLOT,
2175 PrefixInfo::INTERNAL },
2176 { "smb:", 0, INET_PROT_SMB, PrefixInfo::OFFICIAL },
2177 { "staroffice.component:", ".component:", INET_PROT_COMPONENT,
2178 PrefixInfo::EXTERNAL },
2179 { "staroffice.db:", "db:", INET_PROT_DB, PrefixInfo::EXTERNAL },
2180 { "staroffice.factory:", "private:factory/",
2181 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2182 { "staroffice.helpid:", "private:helpid/", INET_PROT_PRIV_SOFFICE,
2183 PrefixInfo::EXTERNAL },
2184 { "staroffice.java:", "private:java/", INET_PROT_PRIV_SOFFICE,
2185 PrefixInfo::EXTERNAL },
2186 { "staroffice.macro:", "macro:", INET_PROT_MACRO,
2187 PrefixInfo::EXTERNAL },
2188 { "staroffice.out:", "out:", INET_PROT_OUT,
2189 PrefixInfo::EXTERNAL },
2190 { "staroffice.pop3:", "pop3:", INET_PROT_POP3,
2191 PrefixInfo::EXTERNAL },
2192 { "staroffice.private:", "private:", INET_PROT_PRIV_SOFFICE,
2193 PrefixInfo::EXTERNAL },
2194 { "staroffice.searchfolder:", "private:searchfolder:",
2195 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2196 { "staroffice.slot:", "slot:", INET_PROT_SLOT,
2197 PrefixInfo::EXTERNAL },
2198 { "staroffice.trashcan:", "private:trashcan:",
2199 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2200 { "staroffice.uno:", ".uno:", INET_PROT_UNO,
2201 PrefixInfo::EXTERNAL },
2202 { "staroffice.vim:", "vim:", INET_PROT_VIM,
2203 PrefixInfo::EXTERNAL },
2204 { "staroffice:", "private:", INET_PROT_PRIV_SOFFICE,
2205 PrefixInfo::EXTERNAL },
2206 { "telnet:", 0, INET_PROT_TELNET, PrefixInfo::OFFICIAL },
2207 { "vim:", "staroffice.vim:", INET_PROT_VIM,
2208 PrefixInfo::INTERNAL },
2209 { "vnd.sun.star.cmd:", 0, INET_PROT_VND_SUN_STAR_CMD,
2210 PrefixInfo::OFFICIAL },
2211 { "vnd.sun.star.expand:", 0, INET_PROT_VND_SUN_STAR_EXPAND,
2212 PrefixInfo::OFFICIAL },
2213 { "vnd.sun.star.help:", 0, INET_PROT_VND_SUN_STAR_HELP,
2214 PrefixInfo::OFFICIAL },
2215 { "vnd.sun.star.hier:", 0, INET_PROT_VND_SUN_STAR_HIER,
2216 PrefixInfo::OFFICIAL },
2217 { "vnd.sun.star.odma:", 0, INET_PROT_VND_SUN_STAR_ODMA,
2218 PrefixInfo::OFFICIAL },
2219 { "vnd.sun.star.pkg:", 0, INET_PROT_VND_SUN_STAR_PKG,
2220 PrefixInfo::OFFICIAL },
2221 { "vnd.sun.star.tdoc:", 0, INET_PROT_VND_SUN_STAR_TDOC,
2222 PrefixInfo::OFFICIAL },
2223 { "vnd.sun.star.webdav:", 0, INET_PROT_VND_SUN_STAR_WEBDAV,
2224 PrefixInfo::OFFICIAL },
2225 { "vnd.sun.star.wfs:", 0, INET_PROT_VND_SUN_STAR_WFS,
2226 PrefixInfo::OFFICIAL },
2227 { "webdav:", 0, INET_PROT_WEBDAV, PrefixInfo::OFFICIAL },
2228 { "webdavs:", 0, INET_PROT_WEBDAVS, PrefixInfo::OFFICIAL },
2229 { "wfs:", "vnd.sun.star.wfs:", INET_PROT_VND_SUN_STAR_WFS,
2230 PrefixInfo::ALIAS } };
2231 PrefixInfo const * pFirst = aMap + 1;
2232 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2233 PrefixInfo const * pMatch = 0;
2234 sal_Unicode const * pMatched = rBegin;
2235 sal_Unicode const * p = rBegin;
2236 sal_Int32 i = 0;
2237 for (; pFirst < pLast; ++i)
2239 if (pFirst->m_pPrefix[i] == '\0')
2241 pMatch = pFirst++;
2242 pMatched = p;
2244 if (p >= pEnd)
2245 break;
2246 sal_uInt32 nChar = INetMIME::toLowerCase(*p++);
2247 while (pFirst <= pLast && sal_uChar(pFirst->m_pPrefix[i]) < nChar)
2248 ++pFirst;
2249 while (pFirst <= pLast && sal_uChar(pLast->m_pPrefix[i]) > nChar)
2250 --pLast;
2252 if (pFirst == pLast)
2254 sal_Char const * q = pFirst->m_pPrefix + i;
2255 while (p < pEnd && *q != '\0'
2256 && INetMIME::toLowerCase(*p) == sal_uChar(*q))
2258 ++p;
2259 ++q;
2261 if (*q == '\0')
2263 rBegin = p;
2264 return pFirst;
2267 rBegin = pMatched;
2268 return pMatch;
2271 //============================================================================
2272 sal_Int32 INetURLObject::getAuthorityBegin() const
2274 DBG_ASSERT(getSchemeInfo().m_bAuthority,
2275 "INetURLObject::getAuthority(): Bad scheme");
2276 sal_Int32 nBegin;
2277 if (m_aUser.isPresent())
2278 nBegin = m_aUser.getBegin();
2279 else if (m_aHost.isPresent())
2280 nBegin = m_aHost.getBegin();
2281 else
2282 nBegin = m_aPath.getBegin();
2283 nBegin -= RTL_CONSTASCII_LENGTH("//");
2284 DBG_ASSERT(m_aAbsURIRef.charAt(nBegin) == '/'
2285 && m_aAbsURIRef.charAt(nBegin + 1) == '/',
2286 "INetURLObject::getAuthority(): Bad authority");
2287 return nBegin;
2290 //============================================================================
2291 INetURLObject::SubString INetURLObject::getAuthority() const
2293 sal_Int32 nBegin = getAuthorityBegin();
2294 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2295 m_aHost.isPresent() ? m_aHost.getEnd() :
2296 m_aAuth.isPresent() ? m_aAuth.getEnd() :
2297 m_aUser.isPresent() ? m_aUser.getEnd() :
2298 nBegin + RTL_CONSTASCII_LENGTH("//");
2299 return SubString(nBegin, nEnd - nBegin);
2302 //============================================================================
2303 bool INetURLObject::setUser(rtl::OUString const & rTheUser,
2304 bool bOctets, EncodeMechanism eMechanism,
2305 rtl_TextEncoding eCharset)
2307 if (
2308 !getSchemeInfo().m_bUser ||
2309 (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0)
2312 return false;
2315 rtl::OUString aNewUser(encodeText(rTheUser, bOctets,
2316 m_eScheme == INET_PROT_IMAP ?
2317 PART_IMAP_ACHAR :
2318 m_eScheme == INET_PROT_VIM ?
2319 PART_VIM :
2320 PART_USER_PASSWORD,
2321 getEscapePrefix(), eMechanism, eCharset,
2322 false));
2323 sal_Int32 nDelta;
2324 if (m_aUser.isPresent())
2325 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2326 else if (m_aHost.isPresent())
2328 m_aAbsURIRef.insert(m_aHost.getBegin(), sal_Unicode('@'));
2329 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2331 else if (getSchemeInfo().m_bHost)
2332 return false;
2333 else
2334 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2335 m_aAuth += nDelta;
2336 m_aHost += nDelta;
2337 m_aPort += nDelta;
2338 m_aPath += nDelta;
2339 m_aQuery += nDelta;
2340 m_aFragment += nDelta;
2341 return true;
2344 namespace
2346 void lcl_Erase(rtl::OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2348 rtl::OUString sTemp(rBuf.makeStringAndClear());
2349 rBuf.append(sTemp.replaceAt(index, count, rtl::OUString()));
2353 //============================================================================
2354 bool INetURLObject::clearPassword()
2356 if (!getSchemeInfo().m_bPassword)
2357 return false;
2358 if (m_aAuth.isPresent())
2360 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2361 m_aAuth.getLength() + 1);
2362 sal_Int32 nDelta = m_aAuth.clear() - 1;
2363 m_aHost += nDelta;
2364 m_aPort += nDelta;
2365 m_aPath += nDelta;
2366 m_aQuery += nDelta;
2367 m_aFragment += nDelta;
2369 return true;
2372 //============================================================================
2373 bool INetURLObject::setPassword(rtl::OUString const & rThePassword,
2374 bool bOctets, EncodeMechanism eMechanism,
2375 rtl_TextEncoding eCharset)
2377 if (!getSchemeInfo().m_bPassword)
2378 return false;
2379 rtl::OUString aNewAuth(encodeText(rThePassword, bOctets,
2380 m_eScheme == INET_PROT_VIM ?
2381 PART_VIM : PART_USER_PASSWORD,
2382 getEscapePrefix(), eMechanism, eCharset,
2383 false));
2384 sal_Int32 nDelta;
2385 if (m_aAuth.isPresent())
2386 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2387 else if (m_aUser.isPresent())
2389 m_aAbsURIRef.insert(m_aUser.getEnd(), sal_Unicode(':'));
2390 nDelta
2391 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2393 else if (m_aHost.isPresent())
2395 m_aAbsURIRef.insert(m_aHost.getBegin(),
2396 rtl::OUString::createFromAscii(":@"));
2397 m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aHost.getBegin());
2398 nDelta
2399 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2401 else if (getSchemeInfo().m_bHost)
2402 return false;
2403 else
2405 m_aAbsURIRef.insert(m_aPath.getBegin(), sal_Unicode(':'));
2406 m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aPath.getBegin());
2407 nDelta
2408 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2410 m_aHost += nDelta;
2411 m_aPort += nDelta;
2412 m_aPath += nDelta;
2413 m_aQuery += nDelta;
2414 m_aFragment += nDelta;
2415 return true;
2418 //============================================================================
2419 // static
2420 bool INetURLObject::parseHost(
2421 sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2422 rtl::OUString & rCanonic)
2424 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2425 // IPv4 address directly follows the abbreviating "::". The ABNF in
2426 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2427 // mentions "::13:1.68.3". This algorithm accepts both variants:
2428 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2429 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2430 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2431 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2432 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2433 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2434 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2435 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2436 rtl::OUStringBuffer aTheCanonic;
2437 sal_uInt32 nNumber = 0;
2438 int nDigits = 0;
2439 int nOctets = 0;
2440 State eState = STATE_INITIAL;
2441 sal_Unicode const * p = rBegin;
2442 for (; p != pEnd; ++p)
2443 switch (eState)
2445 case STATE_INITIAL:
2446 if (*p == '[')
2448 aTheCanonic.append(sal_Unicode('['));
2449 eState = STATE_IP6;
2451 else if (INetMIME::isAlpha(*p))
2452 eState = STATE_TOPLABEL;
2453 else if (INetMIME::isDigit(*p))
2455 nNumber = INetMIME::getWeight(*p);
2456 nDigits = 1;
2457 nOctets = 1;
2458 eState = STATE_IP4;
2460 else
2461 goto done;
2462 break;
2464 case STATE_LABEL:
2465 if (*p == '.')
2466 eState = STATE_LABEL_DOT;
2467 else if (*p == '-')
2468 eState = STATE_LABEL_HYPHEN;
2469 else if (!INetMIME::isAlphanumeric(*p))
2470 goto done;
2471 break;
2473 case STATE_LABEL_HYPHEN:
2474 if (INetMIME::isAlphanumeric(*p))
2475 eState = STATE_LABEL;
2476 else if (*p != '-')
2477 goto done;
2478 break;
2480 case STATE_LABEL_DOT:
2481 if (INetMIME::isAlpha(*p))
2482 eState = STATE_TOPLABEL;
2483 else if (INetMIME::isDigit(*p))
2484 eState = STATE_LABEL;
2485 else
2486 goto done;
2487 break;
2489 case STATE_TOPLABEL:
2490 if (*p == '.')
2491 eState = STATE_TOPLABEL_DOT;
2492 else if (*p == '-')
2493 eState = STATE_TOPLABEL_HYPHEN;
2494 else if (!INetMIME::isAlphanumeric(*p))
2495 goto done;
2496 break;
2498 case STATE_TOPLABEL_HYPHEN:
2499 if (INetMIME::isAlphanumeric(*p))
2500 eState = STATE_TOPLABEL;
2501 else if (*p != '-')
2502 goto done;
2503 break;
2505 case STATE_TOPLABEL_DOT:
2506 if (INetMIME::isAlpha(*p))
2507 eState = STATE_TOPLABEL;
2508 else if (INetMIME::isDigit(*p))
2509 eState = STATE_LABEL;
2510 else
2511 goto done;
2512 break;
2514 case STATE_IP4:
2515 if (*p == '.')
2516 if (nOctets < 4)
2518 aTheCanonic.append(
2519 rtl::OUString::valueOf(sal_Int32(nNumber)));
2520 aTheCanonic.append(sal_Unicode('.'));
2521 ++nOctets;
2522 eState = STATE_IP4_DOT;
2524 else
2525 eState = STATE_LABEL_DOT;
2526 else if (*p == '-')
2527 eState = STATE_LABEL_HYPHEN;
2528 else if (INetMIME::isAlpha(*p))
2529 eState = STATE_LABEL;
2530 else if (INetMIME::isDigit(*p))
2531 if (nDigits < 3)
2533 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2534 ++nDigits;
2536 else
2537 eState = STATE_LABEL;
2538 else
2539 goto done;
2540 break;
2542 case STATE_IP4_DOT:
2543 if (INetMIME::isAlpha(*p))
2544 eState = STATE_TOPLABEL;
2545 else if (INetMIME::isDigit(*p))
2547 nNumber = INetMIME::getWeight(*p);
2548 nDigits = 1;
2549 eState = STATE_IP4;
2551 else
2552 goto done;
2553 break;
2555 case STATE_IP6:
2556 if (*p == ':')
2557 eState = STATE_IP6_COLON;
2558 else if (INetMIME::isHexDigit(*p))
2560 nNumber = INetMIME::getHexWeight(*p);
2561 nDigits = 1;
2562 eState = STATE_IP6_HEXSEQ1;
2564 else
2565 goto done;
2566 break;
2568 case STATE_IP6_COLON:
2569 if (*p == ':')
2571 aTheCanonic.appendAscii(RTL_CONSTASCII_STRINGPARAM("::"));
2572 eState = STATE_IP6_2COLON;
2574 else
2575 goto done;
2576 break;
2578 case STATE_IP6_2COLON:
2579 if (*p == ']')
2580 eState = STATE_IP6_DONE;
2581 else if (*p == ':')
2583 aTheCanonic.append(sal_Unicode(':'));
2584 eState = STATE_IP6_3COLON;
2586 else if (INetMIME::isDigit(*p))
2588 nNumber = INetMIME::getWeight(*p);
2589 nDigits = 1;
2590 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2592 else if (INetMIME::isHexDigit(*p))
2594 nNumber = INetMIME::getHexWeight(*p);
2595 nDigits = 1;
2596 eState = STATE_IP6_HEXSEQ2;
2598 else
2599 goto done;
2600 break;
2602 case STATE_IP6_3COLON:
2603 if (INetMIME::isDigit(*p))
2605 nNumber = INetMIME::getWeight(*p);
2606 nDigits = 1;
2607 nOctets = 1;
2608 eState = STATE_IP6_IP4;
2610 else
2611 goto done;
2612 break;
2614 case STATE_IP6_HEXSEQ1:
2615 if (*p == ']')
2617 aTheCanonic.append(
2618 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2619 eState = STATE_IP6_DONE;
2621 else if (*p == ':')
2623 aTheCanonic.append(
2624 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2625 aTheCanonic.append(sal_Unicode(':'));
2626 eState = STATE_IP6_HEXSEQ1_COLON;
2628 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2630 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2631 ++nDigits;
2633 else
2634 goto done;
2635 break;
2637 case STATE_IP6_HEXSEQ1_COLON:
2638 if (*p == ':')
2640 aTheCanonic.append(sal_Unicode(':'));
2641 eState = STATE_IP6_2COLON;
2643 else if (INetMIME::isDigit(*p))
2645 nNumber = INetMIME::getWeight(*p);
2646 nDigits = 1;
2647 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2649 else if (INetMIME::isHexDigit(*p))
2651 nNumber = INetMIME::getHexWeight(*p);
2652 nDigits = 1;
2653 eState = STATE_IP6_HEXSEQ1;
2655 else
2656 goto done;
2657 break;
2659 case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2660 if (*p == ']')
2662 aTheCanonic.append(
2663 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2664 eState = STATE_IP6_DONE;
2666 else if (*p == ':')
2668 aTheCanonic.append(
2669 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2670 aTheCanonic.append(sal_Unicode(':'));
2671 eState = STATE_IP6_HEXSEQ1_COLON;
2673 else if (*p == '.')
2675 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2676 + (nNumber & 15);
2677 aTheCanonic.append(
2678 rtl::OUString::valueOf(sal_Int32(nNumber)));
2679 aTheCanonic.append(sal_Unicode('.'));
2680 nOctets = 2;
2681 eState = STATE_IP6_IP4_DOT;
2683 else if (INetMIME::isDigit(*p) && nDigits < 3)
2685 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2686 ++nDigits;
2688 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2690 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2691 ++nDigits;
2692 eState = STATE_IP6_HEXSEQ1;
2694 else
2695 goto done;
2696 break;
2698 case STATE_IP6_HEXSEQ2:
2699 if (*p == ']')
2701 aTheCanonic.append(
2702 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2703 eState = STATE_IP6_DONE;
2705 else if (*p == ':')
2707 aTheCanonic.append(
2708 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2709 aTheCanonic.append(sal_Unicode(':'));
2710 eState = STATE_IP6_HEXSEQ2_COLON;
2712 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2714 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2715 ++nDigits;
2717 else
2718 goto done;
2719 break;
2721 case STATE_IP6_HEXSEQ2_COLON:
2722 if (INetMIME::isDigit(*p))
2724 nNumber = INetMIME::getWeight(*p);
2725 nDigits = 1;
2726 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2728 else if (INetMIME::isHexDigit(*p))
2730 nNumber = INetMIME::getHexWeight(*p);
2731 nDigits = 1;
2732 eState = STATE_IP6_HEXSEQ2;
2734 else
2735 goto done;
2736 break;
2738 case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2739 if (*p == ']')
2741 aTheCanonic.append(
2742 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2743 eState = STATE_IP6_DONE;
2745 else if (*p == ':')
2747 aTheCanonic.append(
2748 rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2749 aTheCanonic.append(sal_Unicode(':'));
2750 eState = STATE_IP6_HEXSEQ2_COLON;
2752 else if (*p == '.')
2754 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2755 + (nNumber & 15);
2756 aTheCanonic.append(
2757 rtl::OUString::valueOf(sal_Int32(nNumber)));
2758 aTheCanonic.append(sal_Unicode('.'));
2759 nOctets = 2;
2760 eState = STATE_IP6_IP4_DOT;
2762 else if (INetMIME::isDigit(*p) && nDigits < 3)
2764 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2765 ++nDigits;
2767 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2769 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2770 ++nDigits;
2771 eState = STATE_IP6_HEXSEQ2;
2773 else
2774 goto done;
2775 break;
2777 case STATE_IP6_IP4:
2778 if (*p == ']')
2779 if (nOctets == 4)
2781 aTheCanonic.append(
2782 rtl::OUString::valueOf(sal_Int32(nNumber)));
2783 eState = STATE_IP6_DONE;
2785 else
2786 goto done;
2787 else if (*p == '.')
2788 if (nOctets < 4)
2790 aTheCanonic.append(
2791 rtl::OUString::valueOf(sal_Int32(nNumber)));
2792 aTheCanonic.append(sal_Unicode('.'));
2793 ++nOctets;
2794 eState = STATE_IP6_IP4_DOT;
2796 else
2797 goto done;
2798 else if (INetMIME::isDigit(*p) && nDigits < 3)
2800 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2801 ++nDigits;
2803 else
2804 goto done;
2805 break;
2807 case STATE_IP6_IP4_DOT:
2808 if (INetMIME::isDigit(*p))
2810 nNumber = INetMIME::getWeight(*p);
2811 nDigits = 1;
2812 eState = STATE_IP6_IP4;
2814 else
2815 goto done;
2816 break;
2818 case STATE_IP6_DONE:
2819 goto done;
2821 done:
2822 switch (eState)
2824 case STATE_LABEL:
2825 case STATE_TOPLABEL:
2826 case STATE_TOPLABEL_DOT:
2827 aTheCanonic.setLength(0);
2828 aTheCanonic.append(rBegin, p - rBegin);
2829 rBegin = p;
2830 rCanonic = aTheCanonic.makeStringAndClear();
2831 return true;
2833 case STATE_IP4:
2834 if (nOctets == 4)
2836 aTheCanonic.append(
2837 rtl::OUString::valueOf(sal_Int32(nNumber)));
2838 rBegin = p;
2839 rCanonic = aTheCanonic.makeStringAndClear();
2840 return true;
2842 return false;
2844 case STATE_IP6_DONE:
2845 aTheCanonic.append(sal_Unicode(']'));
2846 rBegin = p;
2847 rCanonic = aTheCanonic.makeStringAndClear();
2848 return true;
2850 default:
2851 return false;
2855 //============================================================================
2856 // static
2857 bool INetURLObject::parseHostOrNetBiosName(
2858 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
2859 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2860 rtl::OUStringBuffer* pCanonic)
2862 rtl::OUString aTheCanonic;
2863 if (pBegin < pEnd)
2865 sal_Unicode const * p = pBegin;
2866 if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2868 if (bNetBiosName)
2870 rtl::OUStringBuffer buf;
2871 while (pBegin < pEnd)
2873 EscapeType eEscapeType;
2874 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, '%',
2875 eMechanism, eCharset,
2876 eEscapeType);
2877 if (!INetMIME::isVisible(nUTF32))
2878 return false;
2879 if (!INetMIME::isAlphanumeric(nUTF32))
2880 switch (nUTF32)
2882 case '"':
2883 case '*':
2884 case '+':
2885 case ',':
2886 case '/':
2887 case ':':
2888 case ';':
2889 case '<':
2890 case '=':
2891 case '>':
2892 case '?':
2893 case '[':
2894 case '\\':
2895 case ']':
2896 case '`':
2897 case '|':
2898 return false;;
2900 if (pCanonic != NULL) {
2901 appendUCS4(
2902 buf, nUTF32, eEscapeType, bOctets, PART_URIC, '%',
2903 eCharset, true);
2906 aTheCanonic = buf.makeStringAndClear();
2908 else
2909 return false;
2912 if (pCanonic != NULL) {
2913 *pCanonic = aTheCanonic;
2915 return true;
2918 //============================================================================
2919 // static
2920 rtl::OUString INetURLObject::encodeHostPort(rtl::OUString const & rTheHostPort,
2921 bool bOctets,
2922 EncodeMechanism eMechanism,
2923 rtl_TextEncoding eCharset)
2925 sal_Int32 nPort = rTheHostPort.getLength();
2926 if (nPort != 0)
2928 sal_Int32 i = nPort - 1;
2929 while (i != 0 && INetMIME::isDigit(rTheHostPort.getStr()[i]))
2930 --i;
2931 if (rTheHostPort.getStr()[i] == ':')
2932 nPort = i;
2934 rtl::OUString aResult(encodeText(rTheHostPort.copy(0, nPort), bOctets,
2935 PART_HOST_EXTRA, '%', eMechanism, eCharset,
2936 true));
2937 aResult += rTheHostPort.copy(nPort);
2938 return aResult;
2941 //============================================================================
2942 bool INetURLObject::setHost(rtl::OUString const & rTheHost, bool bOctets,
2943 EncodeMechanism eMechanism,
2944 rtl_TextEncoding eCharset)
2946 if (!getSchemeInfo().m_bHost)
2947 return false;
2948 rtl::OUStringBuffer aSynHost(rTheHost);
2949 bool bNetBiosName = false;
2950 switch (m_eScheme)
2952 case INET_PROT_FILE:
2953 case INET_PROT_VND_SUN_STAR_WFS:
2955 rtl::OUString sTemp(aSynHost);
2956 if (sTemp.equalsIgnoreAsciiCaseAsciiL(
2957 RTL_CONSTASCII_STRINGPARAM("localhost")))
2959 aSynHost.setLength(0);
2961 bNetBiosName = true;
2963 break;
2964 case INET_PROT_LDAP:
2965 if (aSynHost.getLength() == 0 && m_aPort.isPresent())
2966 return false;
2967 break;
2969 default:
2970 if (aSynHost.getLength() == 0)
2971 return false;
2972 break;
2974 if (!parseHostOrNetBiosName(
2975 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2976 bOctets, eMechanism, eCharset, bNetBiosName, &aSynHost))
2977 return false;
2978 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2979 m_aPort += nDelta;
2980 m_aPath += nDelta;
2981 m_aQuery += nDelta;
2982 m_aFragment += nDelta;
2983 return true;
2986 //============================================================================
2987 // static
2988 bool INetURLObject::parsePath(INetProtocol eScheme,
2989 sal_Unicode const ** pBegin,
2990 sal_Unicode const * pEnd,
2991 bool bOctets,
2992 EncodeMechanism eMechanism,
2993 rtl_TextEncoding eCharset,
2994 bool bSkippedInitialSlash,
2995 sal_uInt32 nSegmentDelimiter,
2996 sal_uInt32 nAltSegmentDelimiter,
2997 sal_uInt32 nQueryDelimiter,
2998 sal_uInt32 nFragmentDelimiter,
2999 rtl::OUStringBuffer &rSynPath)
3001 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
3003 sal_Unicode const * pPos = *pBegin;
3004 rtl::OUStringBuffer aTheSynPath;
3006 switch (eScheme)
3008 case INET_PROT_NOT_VALID:
3009 return false;
3011 case INET_PROT_FTP:
3012 case INET_PROT_IMAP:
3013 if (pPos < pEnd && *pPos != '/')
3014 return false;
3015 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3017 EscapeType eEscapeType;
3018 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3019 '%', eMechanism,
3020 eCharset, eEscapeType);
3021 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3022 PART_HTTP_PATH, '%', eCharset, true);
3024 if (aTheSynPath.getLength() == 0)
3025 aTheSynPath.append(sal_Unicode('/'));
3026 break;
3028 case INET_PROT_HTTP:
3029 case INET_PROT_VND_SUN_STAR_WEBDAV:
3030 case INET_PROT_HTTPS:
3031 case INET_PROT_SMB:
3032 case INET_PROT_DAV:
3033 case INET_PROT_DAVS:
3034 case INET_PROT_WEBDAV:
3035 case INET_PROT_WEBDAVS:
3036 if (pPos < pEnd && *pPos != '/')
3037 return false;
3038 while (pPos < pEnd && *pPos != nQueryDelimiter
3039 && *pPos != nFragmentDelimiter)
3041 EscapeType eEscapeType;
3042 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3043 '%', eMechanism,
3044 eCharset, eEscapeType);
3045 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3046 PART_HTTP_PATH, '%', eCharset, true);
3048 if (aTheSynPath.getLength() == 0)
3049 aTheSynPath.append(sal_Unicode('/'));
3050 break;
3052 case INET_PROT_FILE:
3053 case INET_PROT_VND_SUN_STAR_WFS:
3055 if (bSkippedInitialSlash)
3056 aTheSynPath.append(sal_Unicode('/'));
3057 else if (pPos < pEnd
3058 && *pPos != nSegmentDelimiter
3059 && *pPos != nAltSegmentDelimiter)
3060 return false;
3061 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3063 EscapeType eEscapeType;
3064 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3065 '%', eMechanism,
3066 eCharset, eEscapeType);
3067 if (eEscapeType == ESCAPE_NO)
3069 if (nUTF32 == nSegmentDelimiter
3070 || nUTF32 == nAltSegmentDelimiter)
3072 aTheSynPath.append(sal_Unicode('/'));
3073 continue;
3075 else if (nUTF32 == '|'
3076 && (pPos == pEnd
3077 || *pPos == nFragmentDelimiter
3078 || *pPos == nSegmentDelimiter
3079 || *pPos == nAltSegmentDelimiter)
3080 && aTheSynPath.getLength() == 2
3081 && INetMIME::isAlpha(aTheSynPath.charAt(1)))
3083 // A first segment of <ALPHA "|"> is translated to
3084 // <ALPHA ":">:
3085 aTheSynPath.append(sal_Unicode(':'));
3086 continue;
3089 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3090 PART_PCHAR, '%', eCharset, true);
3092 if (aTheSynPath.getLength() == 0)
3093 aTheSynPath.append(sal_Unicode('/'));
3094 break;
3097 case INET_PROT_MAILTO:
3098 while (pPos < pEnd && *pPos != nQueryDelimiter
3099 && *pPos != nFragmentDelimiter)
3101 EscapeType eEscapeType;
3102 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3103 '%', eMechanism,
3104 eCharset, eEscapeType);
3105 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3106 PART_MAILTO, '%', eCharset, true);
3108 break;
3110 case INET_PROT_NEWS:
3111 if (pPos == pEnd || *pPos == nQueryDelimiter
3112 || *pPos == nFragmentDelimiter)
3113 return false;
3115 // Match <"*">:
3116 if (*pPos == '*'
3117 && (pEnd - pPos == 1 || pPos[1] == nQueryDelimiter
3118 || pPos[1] == nFragmentDelimiter))
3120 ++pPos;
3121 aTheSynPath.append(sal_Unicode('*'));
3122 break;
3125 // Match <group>:
3126 if (INetMIME::isAlpha(*pPos))
3127 for (sal_Unicode const * p = pPos + 1;; ++p)
3128 if (p == pEnd || *p == nQueryDelimiter
3129 || *p == nFragmentDelimiter)
3131 aTheSynPath.setLength(0);
3132 aTheSynPath.append(pPos, p - pPos);
3133 pPos = p;
3134 goto done;
3136 else if (!INetMIME::isAlphanumeric(*p) && *p != '+'
3137 && *p != '-' && *p != '.' && *p != '_')
3138 break;
3140 // Match <article>:
3141 for (;;)
3143 if (pPos == pEnd || *pPos == nQueryDelimiter
3144 || *pPos == nFragmentDelimiter)
3145 return false;
3146 if (*pPos == '@')
3147 break;
3148 EscapeType eEscapeType;
3149 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, '%',
3150 eMechanism, eCharset, eEscapeType);
3151 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3152 PART_NEWS_ARTICLE_LOCALPART, '%', eCharset, true);
3154 if (aTheSynPath.getLength() == 0)
3155 return false;
3156 ++pPos;
3157 aTheSynPath.append(sal_Unicode('@'));
3159 sal_Unicode const * p = pPos;
3160 while (p < pEnd && *pPos != nQueryDelimiter
3161 && *pPos != nFragmentDelimiter)
3162 ++p;
3163 rtl::OUString aCanonic;
3164 if (!parseHost(pPos, p, aCanonic))
3165 return false;
3166 aTheSynPath.append(aCanonic);
3169 done:
3170 break;
3172 case INET_PROT_POP3:
3173 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3175 EscapeType eEscapeType;
3176 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3177 '%', eMechanism,
3178 eCharset, eEscapeType);
3179 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3180 PART_MESSAGE_ID_PATH, '%', eCharset,
3181 true);
3183 break;
3185 case INET_PROT_PRIV_SOFFICE:
3186 case INET_PROT_SLOT:
3187 case INET_PROT_MACRO:
3188 case INET_PROT_UNO:
3189 case INET_PROT_COMPONENT:
3190 case INET_PROT_LDAP:
3191 while (pPos < pEnd && *pPos != nQueryDelimiter
3192 && *pPos != nFragmentDelimiter)
3194 EscapeType eEscapeType;
3195 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3196 '%', eMechanism,
3197 eCharset, eEscapeType);
3198 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3199 PART_PATH_BEFORE_QUERY, '%', eCharset,
3200 true);
3202 break;
3204 case INET_PROT_VND_SUN_STAR_HELP:
3205 if (pPos == pEnd
3206 || *pPos == nQueryDelimiter
3207 || *pPos == nFragmentDelimiter)
3208 aTheSynPath.append(sal_Unicode('/'));
3209 else
3211 if (*pPos != '/')
3212 return false;
3213 while (pPos < pEnd && *pPos != nQueryDelimiter
3214 && *pPos != nFragmentDelimiter)
3216 EscapeType eEscapeType;
3217 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3218 '%', eMechanism,
3219 eCharset, eEscapeType);
3220 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3221 PART_HTTP_PATH, '%', eCharset, true);
3224 break;
3226 case INET_PROT_JAVASCRIPT:
3227 case INET_PROT_DATA:
3228 case INET_PROT_CID:
3229 case INET_PROT_DB:
3230 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3232 EscapeType eEscapeType;
3233 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3234 '%', eMechanism,
3235 eCharset, eEscapeType);
3236 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3237 PART_URIC, '%', eCharset, true);
3239 break;
3241 case INET_PROT_OUT:
3242 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '~')
3243 return false;
3244 aTheSynPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("/~"));
3245 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3247 EscapeType eEscapeType;
3248 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3249 '%', eMechanism,
3250 eCharset, eEscapeType);
3251 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3252 PART_URIC, '%', eCharset, true);
3254 break;
3256 case INET_PROT_VND_SUN_STAR_HIER:
3257 case INET_PROT_VND_SUN_STAR_PKG:
3258 if (pPos < pEnd && *pPos != '/'
3259 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3260 return false;
3261 while (pPos < pEnd && *pPos != nQueryDelimiter
3262 && *pPos != nFragmentDelimiter)
3264 EscapeType eEscapeType;
3265 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3266 '%', eMechanism,
3267 eCharset, eEscapeType);
3268 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3269 aTheSynPath.append(sal_Unicode('/'));
3270 else
3271 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3272 PART_PCHAR, '%', eCharset, false);
3274 if (aTheSynPath.getLength() == 0)
3275 aTheSynPath.append(sal_Unicode('/'));
3276 break;
3278 case INET_PROT_VIM:
3280 /* test had to be taken out to make parsePath static; ok since INET_PROT_VIM is
3281 obsolete, anyway
3282 if (m_aUser.isEmpty())
3283 return false;
3285 sal_Unicode const * pPathEnd = pPos;
3286 while (pPathEnd < pEnd && *pPathEnd != nFragmentDelimiter)
3287 ++pPathEnd;
3288 aTheSynPath.append(sal_Unicode('/'));
3289 if (pPos == pPathEnd)
3290 break;
3291 else if (*pPos++ != '/')
3292 return false;
3293 if (pPos == pPathEnd)
3294 break;
3295 while (pPos < pPathEnd && *pPos != '/')
3297 EscapeType eEscapeType;
3298 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3299 '=', eMechanism,
3300 eCharset, eEscapeType);
3301 appendUCS4(aTheSynPath,
3302 eEscapeType == ESCAPE_NO ?
3303 INetMIME::toLowerCase(nUTF32) : nUTF32,
3304 eEscapeType, bOctets, PART_VIM, '=',
3305 eCharset, false);
3307 bool bInbox;
3308 rtl::OUString sCompare(aTheSynPath);
3309 if (sCompare.equalsAscii("/inbox"))
3310 bInbox = true;
3311 else if (sCompare.equalsAscii("/newsgroups"))
3312 bInbox = false;
3313 else
3314 return false;
3315 aTheSynPath.append(sal_Unicode('/'));
3316 if (pPos == pPathEnd)
3317 break;
3318 else if (*pPos++ != '/')
3319 return false;
3320 if (!bInbox)
3322 bool bEmpty = true;
3323 while (pPos < pPathEnd && *pPos != '/')
3325 EscapeType eEscapeType;
3326 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3327 '=', eMechanism,
3328 eCharset, eEscapeType);
3329 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3330 PART_VIM, '=', eCharset, false);
3331 bEmpty = false;
3333 if (bEmpty)
3334 return false;
3335 aTheSynPath.append(sal_Unicode('/'));
3336 if (pPos == pPathEnd)
3337 break;
3338 else if (*pPos++ != '/')
3339 return false;
3341 bool bEmpty = true;
3342 while (pPos < pPathEnd && *pPos != ':')
3344 EscapeType eEscapeType;
3345 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3346 '=', eMechanism,
3347 eCharset, eEscapeType);
3348 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3349 PART_VIM, '=', eCharset, false);
3350 bEmpty = false;
3352 if (bEmpty)
3353 return false;
3354 if (pPos == pPathEnd)
3355 break;
3356 else if (*pPos++ != ':')
3357 return false;
3358 aTheSynPath.append(sal_Unicode(':'));
3359 for (int i = 0; i < 3; ++i)
3361 if (i != 0)
3363 if (pPos == pPathEnd || *pPos++ != '.')
3364 return false;
3365 aTheSynPath.append(sal_Unicode('.'));
3367 bEmpty = true;
3368 while (pPos < pPathEnd && *pPos != '.')
3370 EscapeType eEscapeType;
3371 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3372 '=', eMechanism,
3373 eCharset, eEscapeType);
3374 if (!INetMIME::isDigit(nUTF32))
3375 return false;
3376 aTheSynPath.append(sal_Unicode(nUTF32));
3377 bEmpty = false;
3379 if (bEmpty)
3380 return false;
3382 if (pPos != pPathEnd)
3383 return false;
3384 break;
3387 case INET_PROT_VND_SUN_STAR_CMD:
3388 case INET_PROT_VND_SUN_STAR_EXPAND:
3390 if (pPos == pEnd || *pPos == nFragmentDelimiter)
3391 return false;
3392 Part ePart = PART_URIC_NO_SLASH;
3393 while (pPos != pEnd && *pPos != nFragmentDelimiter)
3395 EscapeType eEscapeType;
3396 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3397 '%', eMechanism,
3398 eCharset, eEscapeType);
3399 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, ePart,
3400 '%', eCharset, true);
3401 ePart = PART_URIC;
3403 break;
3406 case INET_PROT_VND_SUN_STAR_ODMA:
3407 if (pPos < pEnd)
3409 if (*pPos == '/')
3410 ++pPos;
3411 else
3412 return false;
3414 aTheSynPath.append(sal_Unicode('/'));
3415 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3417 EscapeType eEscapeType;
3418 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3419 '%', eMechanism,
3420 eCharset, eEscapeType);
3421 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3422 PART_URIC_NO_SLASH, '%', eCharset, true);
3424 break;
3426 case INET_PROT_TELNET:
3427 if (pPos < pEnd)
3429 if (*pPos != '/' || pEnd - pPos > 1)
3430 return false;
3431 ++pPos;
3433 aTheSynPath.append(sal_Unicode('/'));
3434 break;
3436 case INET_PROT_VND_SUN_STAR_TDOC:
3437 if (pPos == pEnd || *pPos != '/')
3438 return false;
3439 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3441 EscapeType eEscapeType;
3442 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3443 '%', eMechanism,
3444 eCharset, eEscapeType);
3445 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3446 aTheSynPath.append(sal_Unicode('/'));
3447 else
3448 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3449 PART_PCHAR, '%', eCharset, false);
3451 break;
3453 case INET_PROT_GENERIC:
3454 case INET_PROT_GENERIC_HIERARCHICAL:
3455 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3457 EscapeType eEscapeType;
3458 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3459 '%', eMechanism,
3460 eCharset, eEscapeType);
3461 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3462 PART_URIC, '%', eCharset, true);
3464 if (aTheSynPath.getLength() == 0)
3465 return false;
3466 break;
3468 default:
3469 OSL_ASSERT(false);
3470 break;
3473 *pBegin = pPos;
3474 rSynPath = aTheSynPath;
3475 return true;
3478 //============================================================================
3479 bool INetURLObject::setPath(rtl::OUString const & rThePath, bool bOctets,
3480 EncodeMechanism eMechanism,
3481 rtl_TextEncoding eCharset)
3483 rtl::OUStringBuffer aSynPath;
3484 sal_Unicode const * p = rThePath.getStr();
3485 sal_Unicode const * pEnd = p + rThePath.getLength();
3486 if (!parsePath(m_eScheme, &p, pEnd, bOctets, eMechanism, eCharset, false,
3487 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3488 || p != pEnd)
3489 return false;
3490 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3491 m_aQuery += nDelta;
3492 m_aFragment += nDelta;
3493 return true;
3496 //============================================================================
3497 bool INetURLObject::checkHierarchical() const {
3498 if (m_eScheme == INET_PROT_VND_SUN_STAR_EXPAND) {
3499 OSL_ENSURE(
3500 false, "INetURLObject::checkHierarchical vnd.sun.star.expand");
3501 return true;
3502 } else {
3503 return getSchemeInfo().m_bHierarchical;
3507 //============================================================================
3508 bool INetURLObject::appendSegment(rtl::OUString const & rTheSegment,
3509 bool bOctets, EncodeMechanism eMechanism,
3510 rtl_TextEncoding eCharset)
3512 return insertName(rTheSegment, bOctets, false, LAST_SEGMENT, true,
3513 eMechanism, eCharset);
3516 //============================================================================
3517 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3518 bool bIgnoreFinalSlash)
3519 const
3521 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3522 "INetURLObject::getSegment(): Bad index");
3524 if (!checkHierarchical())
3525 return SubString();
3527 sal_Unicode const * pPathBegin
3528 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3529 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3530 sal_Unicode const * pSegBegin;
3531 sal_Unicode const * pSegEnd;
3532 if (nIndex == LAST_SEGMENT)
3534 pSegEnd = pPathEnd;
3535 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3536 --pSegEnd;
3537 if (pSegEnd <= pPathBegin)
3538 return SubString();
3539 pSegBegin = pSegEnd - 1;
3540 while (pSegBegin > pPathBegin && *pSegBegin != '/')
3541 --pSegBegin;
3543 else
3545 pSegBegin = pPathBegin;
3546 while (nIndex-- > 0)
3549 ++pSegBegin;
3550 if (pSegBegin >= pPathEnd)
3551 return SubString();
3553 while (*pSegBegin != '/');
3554 pSegEnd = pSegBegin + 1;
3555 while (pSegEnd < pPathEnd && *pSegEnd != '/')
3556 ++pSegEnd;
3559 return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3560 pSegEnd - pSegBegin);
3563 //============================================================================
3564 bool INetURLObject::insertName(rtl::OUString const & rTheName, bool bOctets,
3565 bool bAppendFinalSlash, sal_Int32 nIndex,
3566 bool bIgnoreFinalSlash,
3567 EncodeMechanism eMechanism,
3568 rtl_TextEncoding eCharset)
3570 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3571 "INetURLObject::insertName(): Bad index");
3573 if (!checkHierarchical())
3574 return false;
3576 sal_Unicode const * pPathBegin
3577 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3578 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3579 sal_Unicode const * pPrefixEnd;
3580 bool bInsertSlash;
3581 sal_Unicode const * pSuffixBegin;
3582 if (nIndex == LAST_SEGMENT)
3584 pPrefixEnd = pPathEnd;
3585 if (bIgnoreFinalSlash && pPrefixEnd > pPathBegin &&
3586 pPrefixEnd[-1] == '/')
3588 --pPrefixEnd;
3590 bInsertSlash = bAppendFinalSlash;
3591 pSuffixBegin = pPathEnd;
3593 else if (nIndex == 0)
3595 pPrefixEnd = pPathBegin;
3596 bInsertSlash =
3597 (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3598 (pPathBegin == pPathEnd && bAppendFinalSlash);
3599 pSuffixBegin =
3600 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3601 !bAppendFinalSlash && bIgnoreFinalSlash)
3602 ? pPathEnd : pPathBegin;
3604 else
3606 pPrefixEnd = pPathBegin;
3607 sal_Unicode const * pEnd = pPathEnd;
3608 if (bIgnoreFinalSlash && pEnd > pPathBegin && pEnd[-1] == '/')
3609 --pEnd;
3610 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3611 bInsertSlash = false;
3612 pSuffixBegin = pPathEnd;
3613 while (nIndex-- > 0)
3614 for (;;)
3616 if (bSkip)
3617 ++pPrefixEnd;
3618 bSkip = true;
3619 if (pPrefixEnd >= pEnd)
3621 if (nIndex == 0)
3623 bInsertSlash = bAppendFinalSlash;
3624 break;
3626 else
3627 return false;
3629 if (*pPrefixEnd == '/')
3631 pSuffixBegin = pPrefixEnd;
3632 break;
3637 rtl::OUStringBuffer aNewPath;
3638 aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3639 aNewPath.append(sal_Unicode('/'));
3640 aNewPath.append(encodeText(rTheName, bOctets, PART_PCHAR, getEscapePrefix(),
3641 eMechanism, eCharset, true));
3642 if (bInsertSlash) {
3643 aNewPath.append(sal_Unicode('/'));
3645 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3647 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
3648 RTL_TEXTENCODING_UTF8);
3651 //============================================================================
3652 bool INetURLObject::clearQuery()
3654 if (HasError())
3655 return false;
3656 if (m_aQuery.isPresent())
3658 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3659 m_aQuery.getLength() + 1);
3660 m_aFragment += m_aQuery.clear() - 1;
3662 return false;
3665 //============================================================================
3666 bool INetURLObject::setQuery(rtl::OUString const & rTheQuery, bool bOctets,
3667 EncodeMechanism eMechanism,
3668 rtl_TextEncoding eCharset)
3670 if (!getSchemeInfo().m_bQuery)
3671 return false;
3672 rtl::OUString aNewQuery(encodeText(rTheQuery, bOctets, PART_URIC,
3673 getEscapePrefix(), eMechanism, eCharset,
3674 true));
3675 sal_Int32 nDelta;
3676 if (m_aQuery.isPresent())
3677 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3678 else
3680 m_aAbsURIRef.insert(m_aPath.getEnd(), sal_Unicode('?'));
3681 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3682 + 1;
3684 m_aFragment += nDelta;
3685 return true;
3688 //============================================================================
3689 bool INetURLObject::clearFragment()
3691 if (HasError())
3692 return false;
3693 if (m_aFragment.isPresent())
3695 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3696 m_aFragment.clear();
3698 return true;
3701 //============================================================================
3702 bool INetURLObject::setFragment(rtl::OUString const & rTheFragment,
3703 bool bOctets, EncodeMechanism eMechanism,
3704 rtl_TextEncoding eCharset)
3706 if (HasError())
3707 return false;
3708 rtl::OUString aNewFragment(encodeText(rTheFragment, bOctets, PART_URIC,
3709 getEscapePrefix(), eMechanism,
3710 eCharset, true));
3711 if (m_aFragment.isPresent())
3712 m_aFragment.set(m_aAbsURIRef, aNewFragment);
3713 else
3715 m_aAbsURIRef.append(sal_Unicode('#'));
3716 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3718 return true;
3721 //============================================================================
3722 INetURLObject::FTPType INetURLObject::getFTPType() const
3724 if (m_eScheme == INET_PROT_FTP
3725 && m_aPath.getLength() >= RTL_CONSTASCII_LENGTH(";type=") + 1
3726 && rtl::OUString(m_aAbsURIRef).copy(
3727 m_aPath.getEnd() - (RTL_CONSTASCII_LENGTH(";type=") + 1),
3728 RTL_CONSTASCII_LENGTH(";type=")).equalsIgnoreAsciiCaseAscii(";type="))
3729 switch (m_aAbsURIRef.charAt(m_aPath.getEnd()))
3731 case 'A':
3732 case 'a':
3733 return FTP_TYPE_A;
3735 case 'D':
3736 case 'd':
3737 return FTP_TYPE_D;
3739 case 'I':
3740 case 'i':
3741 return FTP_TYPE_I;
3743 return FTP_TYPE_NONE;
3746 //============================================================================
3747 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3749 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3750 return (eStyle & FSYS_DOS) != 0
3751 && m_aPath.getLength() >= 3
3752 && p[0] == '/'
3753 && INetMIME::isAlpha(p[1])
3754 && p[2] == ':'
3755 && (m_aPath.getLength() == 3 || p[3] == '/');
3758 //============================================================================
3759 sal_uInt32 INetURLObject::getIMAPUID() const
3761 if (m_eScheme == INET_PROT_IMAP
3762 && m_aPath.getLength() >= RTL_CONSTASCII_LENGTH("/;uid=") + 1)
3764 sal_Unicode const * pBegin = m_aAbsURIRef.getStr()
3765 + m_aPath.getBegin()
3766 + RTL_CONSTASCII_LENGTH("/;uid=");
3767 sal_Unicode const * pEnd = pBegin + m_aPath.getLength();
3768 sal_Unicode const * p = pEnd;
3769 while (p > pBegin && INetMIME::isDigit(p[-1]))
3770 --p;
3771 if (p < pEnd && *--p != '0'
3772 && rtl::OUString(m_aAbsURIRef).copy(
3773 p - RTL_CONSTASCII_LENGTH("/;uid=") - m_aAbsURIRef.getStr(),
3774 RTL_CONSTASCII_LENGTH("/;uid=")).equalsIgnoreAsciiCaseAscii("/;uid=")
3777 sal_uInt32 nUID;
3778 if (INetMIME::scanUnsigned(p, pEnd, false, nUID))
3779 return nUID;
3782 return 0;
3785 //============================================================================
3786 // static
3787 rtl::OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3788 sal_Unicode const * pEnd, bool bOctets,
3789 Part ePart, sal_Char cEscapePrefix,
3790 EncodeMechanism eMechanism,
3791 rtl_TextEncoding eCharset,
3792 bool bKeepVisibleEscapes)
3794 rtl::OUStringBuffer aResult;
3795 while (pBegin < pEnd)
3797 EscapeType eEscapeType;
3798 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, cEscapePrefix,
3799 eMechanism, eCharset, eEscapeType);
3800 appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart,
3801 cEscapePrefix, eCharset, bKeepVisibleEscapes);
3803 return aResult.makeStringAndClear();
3806 //============================================================================
3807 // static
3808 rtl::OUString INetURLObject::decode(sal_Unicode const * pBegin,
3809 sal_Unicode const * pEnd,
3810 sal_Char cEscapePrefix,
3811 DecodeMechanism eMechanism,
3812 rtl_TextEncoding eCharset)
3814 switch (eMechanism)
3816 case NO_DECODE:
3817 return rtl::OUString(pBegin, pEnd - pBegin);
3819 case DECODE_TO_IURI:
3820 eCharset = RTL_TEXTENCODING_UTF8;
3821 break;
3823 default:
3824 break;
3826 rtl::OUStringBuffer aResult;
3827 while (pBegin < pEnd)
3829 EscapeType eEscapeType;
3830 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false, cEscapePrefix,
3831 WAS_ENCODED, eCharset, eEscapeType);
3832 switch (eEscapeType)
3834 case ESCAPE_NO:
3835 aResult.append(sal_Unicode(nUTF32));
3836 break;
3838 case ESCAPE_OCTET:
3839 appendEscape(aResult, cEscapePrefix, nUTF32);
3840 break;
3842 case ESCAPE_UTF32:
3843 if (
3844 INetMIME::isUSASCII(nUTF32) &&
3846 eMechanism == DECODE_TO_IURI ||
3848 eMechanism == DECODE_UNAMBIGUOUS &&
3849 mustEncode(nUTF32, PART_UNAMBIGUOUS)
3854 appendEscape(aResult, cEscapePrefix, nUTF32);
3856 else
3857 aResult.append(sal_Unicode(nUTF32));
3858 break;
3861 return aResult.makeStringAndClear();
3864 //============================================================================
3865 rtl::OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3866 rtl_TextEncoding eCharset) const
3868 INetURLObject aTemp(*this);
3869 aTemp.clearPassword();
3870 return aTemp.GetMainURL(eMechanism, eCharset);
3873 //============================================================================
3874 rtl::OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3875 rtl_TextEncoding eCharset) const
3877 INetURLObject aTemp(*this);
3878 aTemp.clearFragment();
3879 return aTemp.GetMainURL(eMechanism, eCharset);
3882 //============================================================================
3883 rtl::OUString
3884 INetURLObject::getAbbreviated(
3885 star::uno::Reference< star::util::XStringWidth > const & rStringWidth,
3886 sal_Int32 nWidth,
3887 DecodeMechanism eMechanism,
3888 rtl_TextEncoding eCharset)
3889 const
3891 OSL_ENSURE(rStringWidth.is(), "specification violation");
3892 sal_Char cEscapePrefix = getEscapePrefix();
3893 rtl::OUStringBuffer aBuffer;
3894 aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3895 aBuffer.append(static_cast< sal_Unicode >(':'));
3896 bool bAuthority = getSchemeInfo().m_bAuthority;
3897 sal_Unicode const * pCoreBegin
3898 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3899 m_aPath.getBegin());
3900 sal_Unicode const * pCoreEnd
3901 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3902 bool bSegment = false;
3903 if (getSchemeInfo().m_bHierarchical)
3905 rtl::OUString aRest;
3906 if (m_aQuery.isPresent())
3907 aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("?..."));
3908 else if (m_aFragment.isPresent())
3909 aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#..."));
3910 rtl::OUStringBuffer aTrailer;
3911 sal_Unicode const * pBegin = pCoreBegin;
3912 sal_Unicode const * pEnd = pCoreEnd;
3913 sal_Unicode const * pPrefixBegin = pBegin;
3914 sal_Unicode const * pSuffixEnd = pEnd;
3915 bool bPrefix = true;
3916 bool bSuffix = true;
3919 if (bSuffix)
3921 sal_Unicode const * p = pSuffixEnd - 1;
3922 if (pSuffixEnd == pCoreEnd && *p == '/')
3923 --p;
3924 while (*p != '/')
3925 --p;
3926 if (bAuthority && p == pCoreBegin + 1)
3927 --p;
3928 rtl::OUString
3929 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3930 1 : 0),
3931 pSuffixEnd,
3932 cEscapePrefix,
3933 eMechanism,
3934 eCharset));
3935 pSuffixEnd = p;
3936 rtl::OUStringBuffer aResult(aBuffer);
3937 if (pSuffixEnd != pBegin)
3938 aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3939 aResult.append(aSegment);
3940 aResult.append(aTrailer);
3941 aResult.append(aRest);
3942 if (rStringWidth->
3943 queryStringWidth(aResult.makeStringAndClear())
3944 <= nWidth)
3946 aTrailer.insert(0, aSegment);
3947 bSegment = true;
3948 pEnd = pSuffixEnd;
3950 else
3951 bSuffix = false;
3952 if (pPrefixBegin > pSuffixEnd)
3953 pPrefixBegin = pSuffixEnd;
3954 if (pBegin == pEnd)
3955 break;
3957 if (bPrefix)
3959 sal_Unicode const * p
3960 = pPrefixBegin
3961 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3963 OSL_ASSERT(p <= pEnd);
3964 while (p < pEnd && *p != '/')
3965 ++p;
3966 if (p == pCoreEnd - 1 && *p == '/')
3967 ++p;
3968 rtl::OUString
3969 aSegment(decode(pPrefixBegin
3970 + (pPrefixBegin == pCoreBegin ? 0 :
3972 p == pEnd ? p : p + 1,
3973 cEscapePrefix,
3974 eMechanism,
3975 eCharset));
3976 pPrefixBegin = p;
3977 rtl::OUStringBuffer aResult(aBuffer);
3978 aResult.append(aSegment);
3979 if (pPrefixBegin != pEnd)
3980 aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3981 aResult.append(aTrailer);
3982 aResult.append(aRest);
3983 if (rStringWidth->
3984 queryStringWidth(aResult.makeStringAndClear())
3985 <= nWidth)
3987 aBuffer.append(aSegment);
3988 bSegment = true;
3989 pBegin = pPrefixBegin;
3991 else
3992 bPrefix = false;
3993 if (pPrefixBegin > pSuffixEnd)
3994 pSuffixEnd = pPrefixBegin;
3995 if (pBegin == pEnd)
3996 break;
3999 while (bPrefix || bSuffix);
4000 if (bSegment)
4002 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
4003 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
4004 aBuffer.append(aTrailer);
4007 if (!bSegment)
4008 aBuffer.append(decode(pCoreBegin,
4009 pCoreEnd,
4010 cEscapePrefix,
4011 eMechanism,
4012 eCharset));
4013 if (m_aQuery.isPresent())
4015 aBuffer.append(static_cast< sal_Unicode >('?'));
4016 aBuffer.append(decode(m_aQuery, cEscapePrefix, eMechanism, eCharset));
4018 if (m_aFragment.isPresent())
4020 aBuffer.append(static_cast< sal_Unicode >('#'));
4021 aBuffer.
4022 append(decode(m_aFragment, cEscapePrefix, eMechanism, eCharset));
4024 if (aBuffer.getLength() != 0)
4026 rtl::OUStringBuffer aResult(aBuffer);
4027 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
4028 > nWidth)
4029 for (sal_Int32 i = aBuffer.getLength();;)
4031 if (i == 0)
4033 aBuffer.setLength(aBuffer.getLength() - 1);
4034 if (aBuffer.getLength() == 0)
4035 break;
4037 else
4039 aBuffer.setLength(--i);
4040 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
4042 aResult = aBuffer;
4043 if (rStringWidth->
4044 queryStringWidth(aResult.makeStringAndClear())
4045 <= nWidth)
4046 break;
4049 return aBuffer.makeStringAndClear();
4052 //============================================================================
4053 bool INetURLObject::operator ==(INetURLObject const & rObject) const
4055 if (m_eScheme != rObject.m_eScheme)
4056 return false;
4057 if (m_eScheme == INET_PROT_NOT_VALID)
4058 return (m_aAbsURIRef == rObject.m_aAbsURIRef) != false;
4059 if ((m_aScheme.compare(
4060 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
4061 != 0)
4062 || GetUser(NO_DECODE) != rObject.GetUser(NO_DECODE)
4063 || GetPass(NO_DECODE) != rObject.GetPass(NO_DECODE)
4064 || !GetHost(NO_DECODE).equalsIgnoreAsciiCase(
4065 rObject.GetHost(NO_DECODE))
4066 || GetPort() != rObject.GetPort()
4067 || HasParam() != rObject.HasParam()
4068 || GetParam(NO_DECODE) != rObject.GetParam(NO_DECODE)
4069 || GetMsgId(NO_DECODE) != rObject.GetMsgId(NO_DECODE))
4070 return false;
4071 rtl::OUString aPath1(GetURLPath(NO_DECODE));
4072 rtl::OUString aPath2(rObject.GetURLPath(NO_DECODE));
4073 switch (m_eScheme)
4075 case INET_PROT_FILE:
4076 case INET_PROT_VND_SUN_STAR_WFS:
4078 // If the URL paths of two file URLs only differ in that one has a
4079 // final '/' and the other has not, take the two paths as
4080 // equivalent (this could be usefull for other schemes, too):
4081 sal_Int32 nLength = aPath1.getLength();
4082 switch (nLength - aPath2.getLength())
4084 case -1:
4085 if (aPath2.getStr()[nLength] != '/')
4086 return false;
4087 break;
4089 case 0:
4090 break;
4092 case 1:
4093 if (aPath1.getStr()[--nLength] != '/')
4094 return false;
4095 break;
4097 default:
4098 return false;
4100 return aPath1.compareTo(aPath2, nLength) == 0;
4103 default:
4104 return (aPath1 == aPath2) != false;
4108 //============================================================================
4109 bool INetURLObject::operator <(INetURLObject const & rObject) const
4111 sal_Int32 nCompare = m_aScheme.compare(
4112 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef);
4113 if (nCompare < 0) {
4114 return true;
4115 } else if (nCompare > 0) {
4116 return false;
4118 sal_uInt32 nPort1 = GetPort();
4119 sal_uInt32 nPort2 = rObject.GetPort();
4120 if (nPort1 < nPort2)
4121 return true;
4122 else if (nPort1 > nPort2)
4123 return false;
4124 nCompare = GetUser(NO_DECODE).compareTo(rObject.GetUser(NO_DECODE));
4125 if (nCompare < 0)
4126 return true;
4127 else if (nCompare > 0)
4128 return false;
4129 nCompare = GetPass(NO_DECODE).compareTo(rObject.GetPass(NO_DECODE));
4130 if (nCompare < 0)
4131 return true;
4132 else if (nCompare > 0)
4133 return false;
4134 nCompare = GetHost(NO_DECODE).compareTo(rObject.GetHost(NO_DECODE));
4135 if (nCompare < 0)
4136 return true;
4137 else if (nCompare > 0)
4138 return false;
4139 const rtl::OUString &rPath1(GetURLPath(NO_DECODE));
4140 const rtl::OUString &rPath2(rObject.GetURLPath(NO_DECODE));
4141 nCompare = rPath1.compareTo(rPath2);
4142 if (nCompare < 0)
4143 return true;
4144 else if (nCompare > 0)
4145 return false;
4146 nCompare = GetParam(NO_DECODE).compareTo(rObject.GetParam(NO_DECODE));
4147 if (nCompare < 0)
4148 return true;
4149 else if (nCompare > 0)
4150 return false;
4151 return GetMsgId(NO_DECODE).compareTo(rObject.GetMsgId(NO_DECODE)) < 0;
4154 //============================================================================
4155 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
4156 rtl::OUString const & rTheUser,
4157 rtl::OUString const & rThePassword,
4158 rtl::OUString const & rTheHost,
4159 sal_uInt32 nThePort,
4160 rtl::OUString const & rThePath,
4161 EncodeMechanism eMechanism,
4162 rtl_TextEncoding eCharset)
4164 setInvalid();
4165 m_eScheme = eTheScheme;
4166 const char *pSchemeName = getSchemeInfo().m_pScheme;
4168 if (HasError() || pSchemeName[0] == '\0')
4169 return false;
4171 m_aAbsURIRef.setLength(0);
4172 m_aAbsURIRef.appendAscii(pSchemeName);
4173 m_aAbsURIRef.append(sal_Unicode(':'));
4174 if (getSchemeInfo().m_bAuthority)
4176 m_aAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
4177 bool bUserInfo = false;
4178 if (getSchemeInfo().m_bUser)
4180 if (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0)
4182 setInvalid();
4183 return false;
4185 if (rTheUser.getLength() != 0)
4187 m_aUser.set(m_aAbsURIRef,
4188 encodeText(rTheUser, false,
4189 m_eScheme == INET_PROT_IMAP ?
4190 PART_IMAP_ACHAR :
4191 m_eScheme == INET_PROT_VIM ?
4192 PART_VIM :
4193 PART_USER_PASSWORD,
4194 getEscapePrefix(), eMechanism,
4195 eCharset, false),
4196 m_aAbsURIRef.getLength());
4197 bUserInfo = true;
4200 else if (rTheUser.getLength() != 0)
4202 setInvalid();
4203 return false;
4205 if (rThePassword.getLength() != 0)
4207 if (getSchemeInfo().m_bPassword)
4209 m_aAbsURIRef.append(sal_Unicode(':'));
4210 m_aAuth.set(m_aAbsURIRef,
4211 encodeText(rThePassword, false,
4212 m_eScheme == INET_PROT_VIM ?
4213 PART_VIM : PART_USER_PASSWORD,
4214 getEscapePrefix(), eMechanism,
4215 eCharset, false),
4216 m_aAbsURIRef.getLength());
4217 bUserInfo = true;
4219 else
4221 setInvalid();
4222 return false;
4225 if (bUserInfo && getSchemeInfo().m_bHost)
4226 m_aAbsURIRef.append(sal_Unicode('@'));
4227 if (getSchemeInfo().m_bHost)
4229 rtl::OUStringBuffer aSynHost(rTheHost);
4230 bool bNetBiosName = false;
4231 switch (m_eScheme)
4233 case INET_PROT_FILE:
4234 case INET_PROT_VND_SUN_STAR_WFS:
4236 rtl::OUString sTemp(aSynHost);
4237 if (sTemp.equalsIgnoreAsciiCaseAsciiL(
4238 RTL_CONSTASCII_STRINGPARAM("localhost")))
4240 aSynHost.setLength(0);
4242 bNetBiosName = true;
4244 break;
4246 case INET_PROT_LDAP:
4247 if (aSynHost.getLength() == 0 && nThePort != 0)
4249 setInvalid();
4250 return false;
4252 break;
4254 default:
4255 if (aSynHost.getLength() == 0)
4257 setInvalid();
4258 return false;
4260 break;
4262 if (!parseHostOrNetBiosName(
4263 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
4264 false, eMechanism, eCharset, bNetBiosName, &aSynHost))
4266 setInvalid();
4267 return false;
4269 m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
4270 m_aAbsURIRef.getLength());
4271 if (nThePort != 0)
4273 if (getSchemeInfo().m_bPort)
4275 m_aAbsURIRef.append(sal_Unicode(':'));
4276 m_aPort.set(m_aAbsURIRef,
4277 rtl::OUString::valueOf(sal_Int64(nThePort)),
4278 m_aAbsURIRef.getLength());
4280 else
4282 setInvalid();
4283 return false;
4287 else if (rTheHost.getLength() != 0 || nThePort != 0)
4289 setInvalid();
4290 return false;
4293 rtl::OUStringBuffer aSynPath;
4294 sal_Unicode const * p = rThePath.getStr();
4295 sal_Unicode const * pEnd = p + rThePath.getLength();
4296 if (!parsePath(m_eScheme, &p, pEnd, false, eMechanism, eCharset, false, '/',
4297 0x80000000, 0x80000000, 0x80000000, aSynPath)
4298 || p != pEnd)
4300 setInvalid();
4301 return false;
4303 m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
4304 m_aAbsURIRef.getLength());
4305 return true;
4308 //============================================================================
4309 // static
4310 rtl::OUString INetURLObject::GetAbsURL(rtl::OUString const & rTheBaseURIRef,
4311 rtl::OUString const & rTheRelURIRef,
4312 bool bIgnoreFragment,
4313 EncodeMechanism eEncodeMechanism,
4314 DecodeMechanism eDecodeMechanism,
4315 rtl_TextEncoding eCharset,
4316 FSysStyle eStyle)
4318 // Backwards compatibility:
4319 if (rTheRelURIRef.getLength() == 0 || rTheRelURIRef[0] == '#')
4320 return rTheRelURIRef;
4322 INetURLObject aTheAbsURIRef;
4323 bool bWasAbsolute;
4324 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
4325 convertRelToAbs(rTheRelURIRef, false, aTheAbsURIRef,
4326 bWasAbsolute, eEncodeMechanism,
4327 eCharset, bIgnoreFragment, false,
4328 false, eStyle)
4329 || eEncodeMechanism != WAS_ENCODED
4330 || eDecodeMechanism != DECODE_TO_IURI
4331 || eCharset != RTL_TEXTENCODING_UTF8 ?
4332 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
4333 rTheRelURIRef;
4336 //============================================================================
4337 rtl::OUString INetURLObject::getExternalURL(DecodeMechanism eMechanism,
4338 rtl_TextEncoding eCharset) const
4340 rtl::OUString aTheExtURIRef;
4341 translateToExternal(
4342 rtl::OUString(m_aAbsURIRef), aTheExtURIRef, eMechanism, eCharset);
4343 return aTheExtURIRef;
4346 //============================================================================
4347 // static
4348 rtl::OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
4350 return rtl::OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
4353 //============================================================================
4354 // static
4355 INetProtocol INetURLObject::CompareProtocolScheme(rtl::OUString const &
4356 rTheAbsURIRef)
4358 sal_Unicode const * p = rTheAbsURIRef.getStr();
4359 PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
4360 return pPrefix ? pPrefix->m_eScheme : INET_PROT_NOT_VALID;
4363 //============================================================================
4364 bool INetURLObject::hasPassword() const
4366 return m_aAuth.isPresent() && getSchemeInfo().m_bPassword;
4369 //============================================================================
4370 void INetURLObject::makeAuthCanonic()
4372 if (m_eScheme == INET_PROT_IMAP && m_aAuth.getLength() == 1
4373 && m_aAbsURIRef.charAt(m_aAuth.getBegin()) == '*')
4375 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin()
4376 - RTL_CONSTASCII_LENGTH(";AUTH="),
4377 RTL_CONSTASCII_LENGTH(";AUTH=*"));
4378 sal_Int32 nDelta = m_aAuth.clear() - RTL_CONSTASCII_LENGTH(";AUTH=");
4379 m_aPath += nDelta;
4380 m_aQuery += nDelta;
4381 m_aFragment += nDelta;
4385 //============================================================================
4386 rtl::OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
4387 rtl_TextEncoding eCharset)
4389 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
4390 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
4391 if (!getSchemeInfo().m_bHost)
4392 return rtl::OUString();
4393 rtl::OUStringBuffer aHostPort(decode(m_aHost, getEscapePrefix(),
4394 eMechanism, eCharset));
4395 if (m_aPort.isPresent())
4397 aHostPort.append(sal_Unicode(':'));
4398 aHostPort.append(decode(m_aPort, getEscapePrefix(),
4399 eMechanism, eCharset));
4401 return aHostPort.makeStringAndClear();
4404 //============================================================================
4405 sal_uInt32 INetURLObject::GetPort() const
4407 if (m_aPort.isPresent())
4409 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4410 sal_Unicode const * pEnd = p + m_aPort.getLength();
4411 sal_uInt32 nThePort;
4412 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4413 return nThePort;
4415 return 0;
4418 //============================================================================
4419 bool INetURLObject::SetPort(sal_uInt32 nThePort)
4421 if (getSchemeInfo().m_bPort && m_aHost.isPresent())
4423 rtl::OUString aNewPort(rtl::OUString::valueOf(sal_Int64(nThePort)));
4424 sal_Int32 nDelta;
4425 if (m_aPort.isPresent())
4426 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
4427 else
4429 m_aAbsURIRef.insert(m_aHost.getEnd(), sal_Unicode(':'));
4430 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
4431 + 1;
4433 m_aPath += nDelta;
4434 m_aQuery += nDelta;
4435 m_aFragment += nDelta;
4436 return true;
4438 return false;
4441 //============================================================================
4442 void INetURLObject::makePortCanonic()
4444 if (m_aPort.isPresent())
4446 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4447 sal_Unicode const * pEnd = p + m_aPort.getLength();
4448 sal_uInt32 nThePort;
4449 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4451 sal_Int32 nDelta;
4452 if (nThePort != 0 && nThePort == getSchemeInfo().m_nDefaultPort)
4454 lcl_Erase(m_aAbsURIRef, m_aPort.getBegin() - 1,
4455 m_aPort.getLength() + 1);
4456 nDelta = m_aPort.clear() - 1;
4458 else
4459 nDelta = m_aPort.set(m_aAbsURIRef,
4460 rtl::OUString::valueOf(sal_Int64(nThePort)));
4461 m_aPath += nDelta;
4462 m_aQuery += nDelta;
4463 m_aFragment += nDelta;
4468 //============================================================================
4469 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
4471 if (!checkHierarchical())
4472 return 0;
4474 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4475 sal_Unicode const * pEnd = p + m_aPath.getLength();
4476 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
4477 --pEnd;
4478 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
4479 while (p != pEnd)
4480 if (*p++ == '/')
4481 ++n;
4482 return n;
4485 //============================================================================
4486 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4488 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4489 if (!aSegment.isPresent())
4490 return false;
4492 rtl::OUStringBuffer aNewPath;
4493 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
4494 aSegment.getBegin() - m_aPath.getBegin());
4495 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
4496 aNewPath.append(sal_Unicode('/'));
4497 else
4498 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
4499 m_aPath.getEnd() - aSegment.getEnd());
4500 if (aNewPath.getLength() == 0 && !aSegment.isEmpty() &&
4501 m_aAbsURIRef[aSegment.getBegin()] == '/')
4503 aNewPath.append(sal_Unicode('/'));
4506 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4507 RTL_TEXTENCODING_UTF8);
4510 //============================================================================
4511 rtl::OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4512 DecodeMechanism eMechanism,
4513 rtl_TextEncoding eCharset) const
4515 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4516 if (!aSegment.isPresent())
4517 return rtl::OUString();
4519 sal_Unicode const * pSegBegin
4520 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4521 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4523 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4524 ++pSegBegin;
4525 sal_Unicode const * p = pSegBegin;
4526 while (p != pSegEnd && *p != ';')
4527 ++p;
4529 return decode(pSegBegin, p, getEscapePrefix(), eMechanism, eCharset);
4532 //============================================================================
4533 bool INetURLObject::setName(rtl::OUString const & rTheName, sal_Int32 nIndex,
4534 bool bIgnoreFinalSlash,
4535 EncodeMechanism eMechanism,
4536 rtl_TextEncoding eCharset)
4538 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4539 if (!aSegment.isPresent())
4540 return false;
4542 sal_Unicode const * pPathBegin
4543 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4544 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4545 sal_Unicode const * pSegBegin
4546 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4547 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4549 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4550 ++pSegBegin;
4551 sal_Unicode const * p = pSegBegin;
4552 while (p != pSegEnd && *p != ';')
4553 ++p;
4555 rtl::OUStringBuffer aNewPath;
4556 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4557 aNewPath.append(encodeText(rTheName, false, PART_PCHAR, getEscapePrefix(),
4558 eMechanism, eCharset, true));
4559 aNewPath.append(p, pPathEnd - p);
4561 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4562 RTL_TEXTENCODING_UTF8);
4565 //============================================================================
4566 bool INetURLObject::hasExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4567 const
4569 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4570 if (!aSegment.isPresent())
4571 return false;
4573 sal_Unicode const * pSegBegin
4574 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4575 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4577 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4578 ++pSegBegin;
4579 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4580 if (*p == '.' && p != pSegBegin)
4581 return true;
4582 return false;
4585 //============================================================================
4586 rtl::OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4587 DecodeMechanism eMechanism,
4588 rtl_TextEncoding eCharset) const
4590 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4591 if (!aSegment.isPresent())
4592 return rtl::OUString();
4594 sal_Unicode const * pSegBegin
4595 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4596 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4598 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4599 ++pSegBegin;
4600 sal_Unicode const * pExtension = 0;
4601 sal_Unicode const * p = pSegBegin;
4602 for (; p != pSegEnd && *p != ';'; ++p)
4603 if (*p == '.' && p != pSegBegin)
4604 pExtension = p;
4605 if (!pExtension)
4606 pExtension = p;
4608 return decode(pSegBegin, pExtension, getEscapePrefix(), eMechanism,
4609 eCharset);
4612 //============================================================================
4613 bool INetURLObject::setBase(rtl::OUString const & rTheBase, sal_Int32 nIndex,
4614 bool bIgnoreFinalSlash,
4615 EncodeMechanism eMechanism,
4616 rtl_TextEncoding eCharset)
4618 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4619 if (!aSegment.isPresent())
4620 return false;
4622 sal_Unicode const * pPathBegin
4623 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4624 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4625 sal_Unicode const * pSegBegin
4626 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4627 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4629 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4630 ++pSegBegin;
4631 sal_Unicode const * pExtension = 0;
4632 sal_Unicode const * p = pSegBegin;
4633 for (; p != pSegEnd && *p != ';'; ++p)
4634 if (*p == '.' && p != pSegBegin)
4635 pExtension = p;
4636 if (!pExtension)
4637 pExtension = p;
4639 rtl::OUStringBuffer aNewPath;
4640 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4641 aNewPath.append(encodeText(rTheBase, false, PART_PCHAR, getEscapePrefix(),
4642 eMechanism, eCharset, true));
4643 aNewPath.append(pExtension, pPathEnd - pExtension);
4645 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4646 RTL_TEXTENCODING_UTF8);
4649 //============================================================================
4650 rtl::OUString INetURLObject::getExtension(sal_Int32 nIndex,
4651 bool bIgnoreFinalSlash,
4652 DecodeMechanism eMechanism,
4653 rtl_TextEncoding eCharset) const
4655 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4656 if (!aSegment.isPresent())
4657 return rtl::OUString();
4659 sal_Unicode const * pSegBegin
4660 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4661 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4663 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4664 ++pSegBegin;
4665 sal_Unicode const * pExtension = 0;
4666 sal_Unicode const * p = pSegBegin;
4667 for (; p != pSegEnd && *p != ';'; ++p)
4668 if (*p == '.' && p != pSegBegin)
4669 pExtension = p;
4671 if (!pExtension)
4672 return rtl::OUString();
4674 return decode(pExtension + 1, p, getEscapePrefix(), eMechanism, eCharset);
4677 //============================================================================
4678 bool INetURLObject::setExtension(rtl::OUString const & rTheExtension,
4679 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4680 EncodeMechanism eMechanism,
4681 rtl_TextEncoding eCharset)
4683 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4684 if (!aSegment.isPresent())
4685 return false;
4687 sal_Unicode const * pPathBegin
4688 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4689 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4690 sal_Unicode const * pSegBegin
4691 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4692 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4694 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4695 ++pSegBegin;
4696 sal_Unicode const * pExtension = 0;
4697 sal_Unicode const * p = pSegBegin;
4698 for (; p != pSegEnd && *p != ';'; ++p)
4699 if (*p == '.' && p != pSegBegin)
4700 pExtension = p;
4701 if (!pExtension)
4702 pExtension = p;
4704 rtl::OUStringBuffer aNewPath;
4705 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4706 aNewPath.append(sal_Unicode('.'));
4707 aNewPath.append(encodeText(rTheExtension, false, PART_PCHAR,
4708 getEscapePrefix(), eMechanism, eCharset, true));
4709 aNewPath.append(p, pPathEnd - p);
4711 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4712 RTL_TEXTENCODING_UTF8);
4715 //============================================================================
4716 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4718 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4719 if (!aSegment.isPresent())
4720 return false;
4722 sal_Unicode const * pPathBegin
4723 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4724 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4725 sal_Unicode const * pSegBegin
4726 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4727 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4729 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4730 ++pSegBegin;
4731 sal_Unicode const * pExtension = 0;
4732 sal_Unicode const * p = pSegBegin;
4733 for (; p != pSegEnd && *p != ';'; ++p)
4734 if (*p == '.' && p != pSegBegin)
4735 pExtension = p;
4736 if (!pExtension)
4737 return true;
4739 rtl::OUStringBuffer aNewPath;
4740 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4741 aNewPath.append(p, pPathEnd - p);
4743 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4744 RTL_TEXTENCODING_UTF8);
4747 //============================================================================
4748 bool INetURLObject::hasFinalSlash() const
4750 if (!checkHierarchical())
4751 return false;
4753 sal_Unicode const * pPathBegin
4754 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4755 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4756 return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4759 //============================================================================
4760 bool INetURLObject::setFinalSlash()
4762 if (!checkHierarchical())
4763 return false;
4765 sal_Unicode const * pPathBegin
4766 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4767 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4768 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4769 return true;
4771 rtl::OUStringBuffer aNewPath;
4772 aNewPath.append(pPathBegin, pPathEnd - pPathBegin);
4773 aNewPath.append(sal_Unicode('/'));
4775 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4776 RTL_TEXTENCODING_UTF8);
4779 //============================================================================
4780 bool INetURLObject::removeFinalSlash()
4782 if (!checkHierarchical())
4783 return false;
4785 sal_Unicode const * pPathBegin
4786 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4787 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4788 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4789 return true;
4791 --pPathEnd;
4792 if (pPathEnd == pPathBegin && *pPathBegin == '/')
4793 return false;
4794 rtl::OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4796 return setPath(aNewPath, false, NOT_CANONIC, RTL_TEXTENCODING_UTF8);
4799 //============================================================================
4800 // static
4801 rtl::OUString INetURLObject::createFragment(rtl::OUString const & rText)
4803 rtl::OUString aFragment(rText);
4804 for (sal_Int32 i = 0; i < aFragment.getLength();)
4806 sal_Unicode c = aFragment.getStr()[i];
4807 if (mustEncode(c, PART_CREATEFRAGMENT))
4808 aFragment = aFragment.replaceAt(i, 1, rtl::OUString());
4809 else
4810 ++i;
4812 return aFragment;
4815 //============================================================================
4816 bool INetURLObject::setFSysPath(rtl::OUString const & rFSysPath,
4817 FSysStyle eStyle)
4819 sal_Unicode const * pFSysBegin = rFSysPath.getStr();
4820 sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength();
4822 switch ((eStyle & FSYS_VOS ? 1 : 0)
4823 + (eStyle & FSYS_UNX ? 1 : 0)
4824 + (eStyle & FSYS_DOS ? 1 : 0)
4825 + (eStyle & FSYS_MAC ? 1 : 0))
4827 case 0:
4828 return false;
4830 case 1:
4831 break;
4833 default:
4834 if (eStyle & FSYS_VOS
4835 && pFSysEnd - pFSysBegin >= 2
4836 && pFSysBegin[0] == '/'
4837 && pFSysBegin[1] == '/')
4839 if (pFSysEnd - pFSysBegin >= 3
4840 && pFSysBegin[2] == '.'
4841 && (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/'))
4843 eStyle = FSYS_VOS; // Production T1
4844 break;
4847 sal_Unicode const * p = pFSysBegin + 2;
4848 rtl::OUString aHost;
4849 if (parseHost(p, pFSysEnd, aHost)
4850 && (p == pFSysEnd || *p == '/'))
4852 eStyle = FSYS_VOS; // Production T2
4853 break;
4857 if (eStyle & FSYS_DOS
4858 && pFSysEnd - pFSysBegin >= 2
4859 && pFSysBegin[0] == '\\'
4860 && pFSysBegin[1] == '\\')
4862 sal_Unicode const * p = pFSysBegin + 2;
4863 rtl::OUString aHost;
4864 if (parseHost(p, pFSysEnd, aHost)
4865 && (p == pFSysEnd || *p == '\\'))
4867 eStyle = FSYS_DOS; // Production T3
4868 break;
4872 if (eStyle & FSYS_DOS
4873 && pFSysEnd - pFSysBegin >= 2
4874 && INetMIME::isAlpha(pFSysBegin[0])
4875 && pFSysBegin[1] == ':'
4876 && (pFSysEnd - pFSysBegin == 2
4877 || pFSysBegin[2] == '/'
4878 || pFSysBegin[2] == '\\'))
4880 eStyle = FSYS_DOS; // Productions T4, T5
4881 break;
4884 if (!(eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC)))
4885 return false;
4887 eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle);
4888 // Production T6
4889 break;
4892 rtl::OUStringBuffer aSynAbsURIRef(rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("file://")));
4894 switch (eStyle)
4896 case FSYS_VOS:
4898 sal_Unicode const * p = pFSysBegin;
4899 if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/')
4900 return false;
4901 if (p != pFSysEnd && *p == '.'
4902 && (pFSysEnd - p == 1 || p[1] == '/'))
4903 ++p;
4904 for (; p != pFSysEnd; ++p)
4905 switch (*p)
4907 case '#':
4908 case '%':
4909 appendEscape(aSynAbsURIRef, '%', *p);
4910 break;
4912 default:
4913 aSynAbsURIRef.append(*p);
4914 break;
4916 break;
4919 case FSYS_UNX:
4921 sal_Unicode const * p = pFSysBegin;
4922 if (p != pFSysEnd && *p != '/')
4923 return false;
4924 for (; p != pFSysEnd; ++p)
4925 switch (*p)
4927 case '|':
4928 case '#':
4929 case '%':
4930 appendEscape(aSynAbsURIRef, '%', *p);
4931 break;
4933 default:
4934 aSynAbsURIRef.append(*p);
4935 break;
4937 break;
4940 case FSYS_DOS:
4942 sal_uInt32 nAltDelimiter = 0x80000000;
4943 sal_Unicode const * p = pFSysBegin;
4944 if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\')
4945 p += 2;
4946 else
4948 aSynAbsURIRef.append(sal_Unicode('/'));
4949 if (pFSysEnd - p >= 2
4950 && INetMIME::isAlpha(p[0])
4951 && p[1] == ':'
4952 && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/'))
4953 nAltDelimiter = '/';
4955 for (; p != pFSysEnd; ++p)
4956 if (*p == '\\' || *p == nAltDelimiter)
4957 aSynAbsURIRef.append(sal_Unicode('/'));
4958 else
4959 switch (*p)
4961 case '/':
4962 case '#':
4963 case '%':
4964 appendEscape(aSynAbsURIRef, '%', *p);
4965 break;
4967 default:
4968 aSynAbsURIRef.append(*p);
4969 break;
4971 break;
4974 case FSYS_MAC:
4975 aSynAbsURIRef.append(sal_Unicode('/'));
4976 {for (sal_Unicode const * p = pFSysBegin; p != pFSysEnd; ++p)
4977 switch (*p)
4979 case ':':
4980 aSynAbsURIRef.append(sal_Unicode('/'));
4981 break;
4983 case '/':
4984 case '|':
4985 case '#':
4986 case '%':
4987 appendEscape(aSynAbsURIRef, '%', *p);
4988 break;
4990 default:
4991 aSynAbsURIRef.append(*p);
4992 break;
4995 break;
4997 default:
4998 OSL_ASSERT(false);
4999 break;
5002 INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), WAS_ENCODED,
5003 RTL_TEXTENCODING_UTF8);
5004 if (aTemp.HasError())
5005 return false;
5007 *this = aTemp;
5008 return true;
5011 //============================================================================
5012 rtl::OUString INetURLObject::getFSysPath(FSysStyle eStyle,
5013 sal_Unicode * pDelimiter) const
5015 if (m_eScheme != INET_PROT_FILE)
5016 return rtl::OUString();
5018 if ((eStyle & FSYS_VOS ? 1 : 0)
5019 + (eStyle & FSYS_UNX ? 1 : 0)
5020 + (eStyle & FSYS_DOS ? 1 : 0)
5021 + (eStyle & FSYS_MAC ? 1 : 0)
5022 > 1)
5024 eStyle = eStyle & FSYS_VOS
5025 && m_aHost.isPresent()
5026 && m_aHost.getLength() > 0 ?
5027 FSYS_VOS :
5028 hasDosVolume(eStyle)
5029 || ((eStyle & FSYS_DOS) != 0
5030 && m_aHost.isPresent()
5031 && m_aHost.getLength() > 0) ?
5032 FSYS_DOS :
5033 eStyle & FSYS_UNX
5034 && (!m_aHost.isPresent() || m_aHost.getLength() == 0) ?
5035 FSYS_UNX :
5036 FSysStyle(0);
5039 switch (eStyle)
5041 case FSYS_VOS:
5043 if (pDelimiter)
5044 *pDelimiter = '/';
5046 rtl::OUStringBuffer aSynFSysPath;
5047 aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
5048 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5049 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
5050 RTL_TEXTENCODING_UTF8));
5051 else
5052 aSynFSysPath.append(sal_Unicode('.'));
5053 aSynFSysPath.append(decode(m_aPath, '%', DECODE_WITH_CHARSET,
5054 RTL_TEXTENCODING_UTF8));
5055 return aSynFSysPath.makeStringAndClear();
5058 case FSYS_UNX:
5060 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5061 return rtl::OUString();
5063 if (pDelimiter)
5064 *pDelimiter = '/';
5066 return decode(m_aPath, '%', DECODE_WITH_CHARSET,
5067 RTL_TEXTENCODING_UTF8);
5070 case FSYS_DOS:
5072 if (pDelimiter)
5073 *pDelimiter = '\\';
5075 rtl::OUStringBuffer aSynFSysPath;
5076 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5078 aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\\\"));
5079 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
5080 RTL_TEXTENCODING_UTF8));
5081 aSynFSysPath.append(sal_Unicode('\\'));
5083 sal_Unicode const * p
5084 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5085 sal_Unicode const * pEnd = p + m_aPath.getLength();
5086 DBG_ASSERT(p < pEnd && *p == '/',
5087 "INetURLObject::getFSysPath(): Bad path");
5088 ++p;
5089 while (p < pEnd)
5091 EscapeType eEscapeType;
5092 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
5093 RTL_TEXTENCODING_UTF8,
5094 eEscapeType);
5095 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
5096 aSynFSysPath.append(sal_Unicode('\\'));
5097 else
5098 aSynFSysPath.appendUtf32(nUTF32);
5100 return aSynFSysPath.makeStringAndClear();
5103 case FSYS_MAC:
5105 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5106 return rtl::OUString();
5108 if (pDelimiter)
5109 *pDelimiter = ':';
5111 rtl::OUStringBuffer aSynFSysPath;
5112 sal_Unicode const * p
5113 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5114 sal_Unicode const * pEnd = p + m_aPath.getLength();
5115 DBG_ASSERT(p < pEnd && *p == '/',
5116 "INetURLObject::getFSysPath(): Bad path");
5117 ++p;
5118 while (p < pEnd)
5120 EscapeType eEscapeType;
5121 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
5122 RTL_TEXTENCODING_UTF8,
5123 eEscapeType);
5124 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
5125 aSynFSysPath.append(sal_Unicode(':'));
5126 else
5127 aSynFSysPath.appendUtf32(nUTF32);
5129 return aSynFSysPath.makeStringAndClear();
5132 default:
5133 return rtl::OUString();
5137 //============================================================================
5138 bool INetURLObject::HasMsgId() const
5140 if (m_eScheme != INET_PROT_POP3)
5141 return false;
5142 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5143 sal_Unicode const * pEnd = p + m_aPath.getLength();
5144 for (; p < pEnd; ++p)
5145 if (*p == '<')
5146 return true;
5147 return false;
5150 //============================================================================
5151 rtl::OUString INetURLObject::GetMsgId(DecodeMechanism eMechanism,
5152 rtl_TextEncoding eCharset) const
5154 if (m_eScheme != INET_PROT_POP3)
5155 return rtl::OUString();
5156 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5157 sal_Unicode const * pEnd = p + m_aPath.getLength();
5158 for (; p < pEnd; ++p)
5159 if (*p == '<')
5160 return decode(p, pEnd, getEscapePrefix(), eMechanism, eCharset);
5161 return rtl::OUString();
5164 //============================================================================
5165 // static
5166 void INetURLObject::appendUCS4Escape(rtl::OUStringBuffer & rTheText,
5167 sal_Char cEscapePrefix, sal_uInt32 nUCS4)
5169 DBG_ASSERT(nUCS4 < 0x80000000,
5170 "INetURLObject::appendUCS4Escape(): Bad char");
5171 if (nUCS4 < 0x80)
5172 appendEscape(rTheText, cEscapePrefix, nUCS4);
5173 else if (nUCS4 < 0x800)
5175 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 | 0xC0);
5176 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5178 else if (nUCS4 < 0x10000)
5180 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 | 0xE0);
5181 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5182 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5184 else if (nUCS4 < 0x200000)
5186 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 | 0xF0);
5187 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5188 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5189 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5191 else if (nUCS4 < 0x4000000)
5193 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 | 0xF8);
5194 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
5195 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5196 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5197 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5199 else
5201 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 30 | 0xFC);
5202 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 24 & 0x3F) | 0x80);
5203 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
5204 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5205 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5206 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5210 //============================================================================
5211 // static
5212 void INetURLObject::appendUCS4(rtl::OUStringBuffer& rTheText, sal_uInt32 nUCS4,
5213 EscapeType eEscapeType, bool bOctets,
5214 Part ePart, sal_Char cEscapePrefix,
5215 rtl_TextEncoding eCharset,
5216 bool bKeepVisibleEscapes)
5218 bool bEscape;
5219 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
5220 switch (eEscapeType)
5222 case ESCAPE_NO:
5223 if (mustEncode(nUCS4, ePart))
5225 bEscape = true;
5226 eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 :
5227 RTL_TEXTENCODING_UTF8;
5229 else
5230 bEscape = false;
5231 break;
5233 case ESCAPE_OCTET:
5234 bEscape = true;
5235 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
5236 break;
5238 case ESCAPE_UTF32:
5239 if (mustEncode(nUCS4, ePart))
5241 bEscape = true;
5242 eTargetCharset = eCharset;
5244 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
5246 bEscape = true;
5247 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
5249 else
5250 bEscape = false;
5251 break;
5252 default:
5253 bEscape = false;
5256 if (bEscape)
5258 switch (eTargetCharset)
5260 default:
5261 DBG_ERROR("INetURLObject::appendUCS4(): Unsupported charset");
5262 case RTL_TEXTENCODING_ASCII_US:
5263 case RTL_TEXTENCODING_ISO_8859_1:
5264 appendEscape(rTheText, cEscapePrefix, nUCS4);
5265 break;
5267 case RTL_TEXTENCODING_UTF8:
5268 appendUCS4Escape(rTheText, cEscapePrefix, nUCS4);
5269 break;
5272 else
5273 rTheText.append(sal_Unicode(nUCS4));
5276 //============================================================================
5277 // static
5278 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
5279 sal_Unicode const * pEnd, bool bOctets,
5280 sal_Char cEscapePrefix,
5281 EncodeMechanism eMechanism,
5282 rtl_TextEncoding eCharset,
5283 EscapeType & rEscapeType)
5285 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
5286 sal_uInt32 nUTF32 = bOctets ? *rBegin++ :
5287 INetMIME::getUTF32Character(rBegin, pEnd);
5288 switch (eMechanism)
5290 case ENCODE_ALL:
5291 rEscapeType = ESCAPE_NO;
5292 break;
5294 case WAS_ENCODED:
5296 int nWeight1;
5297 int nWeight2;
5298 if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5299 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
5300 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
5302 rBegin += 2;
5303 nUTF32 = nWeight1 << 4 | nWeight2;
5304 switch (eCharset)
5306 default:
5307 DBG_ERROR(
5308 "INetURLObject::getUTF32(): Unsupported charset");
5309 case RTL_TEXTENCODING_ASCII_US:
5310 rEscapeType = INetMIME::isUSASCII(nUTF32) ?
5311 ESCAPE_UTF32 : ESCAPE_OCTET;
5312 break;
5314 case RTL_TEXTENCODING_ISO_8859_1:
5315 rEscapeType = ESCAPE_UTF32;
5316 break;
5318 case RTL_TEXTENCODING_UTF8:
5319 if (INetMIME::isUSASCII(nUTF32))
5320 rEscapeType = ESCAPE_UTF32;
5321 else
5323 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
5325 sal_uInt32 nEncoded;
5326 int nShift;
5327 sal_uInt32 nMin;
5328 if (nUTF32 <= 0xDF)
5330 nEncoded = (nUTF32 & 0x1F) << 6;
5331 nShift = 0;
5332 nMin = 0x80;
5334 else if (nUTF32 <= 0xEF)
5336 nEncoded = (nUTF32 & 0x0F) << 12;
5337 nShift = 6;
5338 nMin = 0x800;
5340 else
5342 nEncoded = (nUTF32 & 0x07) << 18;
5343 nShift = 12;
5344 nMin = 0x10000;
5346 sal_Unicode const * p = rBegin;
5347 bool bUTF8 = true;
5348 for (;;)
5350 if (pEnd - p < 3
5351 || p[0] != cEscapePrefix
5352 || (nWeight1
5353 = INetMIME::getHexWeight(p[1]))
5355 || nWeight1 > 11
5356 || (nWeight2
5357 = INetMIME::getHexWeight(p[2]))
5358 < 0)
5360 bUTF8 = false;
5361 break;
5363 p += 3;
5364 nEncoded
5365 |= ((nWeight1 & 3) << 4 | nWeight2)
5366 << nShift;
5367 if (nShift == 0)
5368 break;
5369 nShift -= 6;
5371 if (bUTF8 && nEncoded >= nMin
5372 && !INetMIME::isHighSurrogate(nEncoded)
5373 && !INetMIME::isLowSurrogate(nEncoded)
5374 && nEncoded <= 0x10FFFF)
5376 rBegin = p;
5377 nUTF32 = nEncoded;
5378 rEscapeType = ESCAPE_UTF32;
5379 break;
5382 rEscapeType = ESCAPE_OCTET;
5384 break;
5387 else
5388 rEscapeType = ESCAPE_NO;
5389 break;
5392 case NOT_CANONIC:
5394 int nWeight1;
5395 int nWeight2;
5396 if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5397 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
5398 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
5400 rBegin += 2;
5401 nUTF32 = nWeight1 << 4 | nWeight2;
5402 rEscapeType = ESCAPE_OCTET;
5404 else
5405 rEscapeType = ESCAPE_NO;
5406 break;
5409 return nUTF32;
5412 //============================================================================
5413 // static
5414 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
5415 sal_Unicode const * pEnd,
5416 bool bEager)
5418 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
5419 State eState = STATE_DOT;
5420 sal_Int32 nLabels = 0;
5421 sal_Unicode const * pLastAlphanumeric = 0;
5422 for (sal_Unicode const * p = rBegin;; ++p)
5423 switch (eState)
5425 case STATE_DOT:
5426 if (p != pEnd && INetMIME::isAlphanumeric(*p))
5428 ++nLabels;
5429 eState = STATE_LABEL;
5430 break;
5432 if (bEager || nLabels == 0)
5433 return 0;
5434 rBegin = p - 1;
5435 return nLabels;
5437 case STATE_LABEL:
5438 if (p != pEnd)
5440 if (INetMIME::isAlphanumeric(*p))
5441 break;
5442 else if (*p == '.')
5444 eState = STATE_DOT;
5445 break;
5447 else if (*p == '-')
5449 pLastAlphanumeric = p;
5450 eState = STATE_HYPHEN;
5451 break;
5454 rBegin = p;
5455 return nLabels;
5457 case STATE_HYPHEN:
5458 if (p != pEnd)
5460 if (INetMIME::isAlphanumeric(*p))
5462 eState = STATE_LABEL;
5463 break;
5465 else if (*p == '-')
5466 break;
5468 if (bEager)
5469 return 0;
5470 rBegin = pLastAlphanumeric;
5471 return nLabels;
5475 //============================================================================
5476 // static
5477 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
5478 sal_Unicode const * pEnd)
5480 if (rBegin != pEnd && *rBegin == '[') {
5481 sal_Unicode const * p = rBegin + 1;
5482 //TODO: check for valid IPv6address (RFC 2373):
5483 while (p != pEnd && (INetMIME::isHexDigit(*p) || *p == ':' || *p == '.'))
5485 ++p;
5487 if (p != pEnd && *p == ']') {
5488 rBegin = p + 1;
5489 return true;
5492 return false;
5495 //============================================================================
5496 rtl::OUString INetURLObject::GetPartBeforeLastName(DecodeMechanism eMechanism,
5497 rtl_TextEncoding eCharset)
5498 const
5500 if (!checkHierarchical())
5501 return rtl::OUString();
5502 INetURLObject aTemp(*this);
5503 aTemp.clearFragment();
5504 aTemp.clearQuery();
5505 aTemp.removeSegment(LAST_SEGMENT, false);
5506 aTemp.setFinalSlash();
5507 return aTemp.GetMainURL(eMechanism, eCharset);
5510 //============================================================================
5511 rtl::OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
5512 rtl_TextEncoding eCharset) const
5514 return getName(LAST_SEGMENT, true, eMechanism, eCharset);
5517 //============================================================================
5518 rtl::OUString INetURLObject::GetFileExtension(DecodeMechanism eMechanism,
5519 rtl_TextEncoding eCharset) const
5521 return getExtension(LAST_SEGMENT, false, eMechanism, eCharset);
5524 //============================================================================
5525 bool INetURLObject::CutLastName()
5527 INetURLObject aTemp(*this);
5528 aTemp.clearFragment();
5529 aTemp.clearQuery();
5530 if (!aTemp.removeSegment(LAST_SEGMENT, false))
5531 return false;
5532 *this = aTemp;
5533 return true;
5536 //============================================================================
5537 rtl::OUString INetURLObject::PathToFileName() const
5539 if (m_eScheme != INET_PROT_FILE)
5540 return rtl::OUString();
5541 rtl::OUString aSystemPath;
5542 if (osl::FileBase::getSystemPathFromFileURL(
5543 decode(m_aAbsURIRef.getStr(),
5544 m_aAbsURIRef.getStr() + m_aPath.getEnd(),
5545 getEscapePrefix(), NO_DECODE, RTL_TEXTENCODING_UTF8),
5546 aSystemPath)
5547 != osl::FileBase::E_None)
5548 return rtl::OUString();
5549 return aSystemPath;
5552 //============================================================================
5553 rtl::OUString INetURLObject::GetFull() const
5555 INetURLObject aTemp(*this);
5556 aTemp.removeFinalSlash();
5557 return aTemp.PathToFileName();
5560 //============================================================================
5561 rtl::OUString INetURLObject::GetPath() const
5563 INetURLObject aTemp(*this);
5564 aTemp.removeSegment(LAST_SEGMENT, true);
5565 aTemp.removeFinalSlash();
5566 return aTemp.PathToFileName();
5569 //============================================================================
5570 void INetURLObject::SetBase(rtl::OUString const & rTheBase)
5572 setBase(rTheBase, LAST_SEGMENT, true, ENCODE_ALL);
5575 //============================================================================
5576 rtl::OUString INetURLObject::GetBase() const
5578 return getBase(LAST_SEGMENT, true, DECODE_WITH_CHARSET);
5581 //============================================================================
5582 void INetURLObject::SetName(rtl::OUString const & rTheName,
5583 EncodeMechanism eMechanism,
5584 rtl_TextEncoding eCharset)
5586 INetURLObject aTemp(*this);
5587 if (aTemp.removeSegment(LAST_SEGMENT, true)
5588 && aTemp.insertName(rTheName, false, LAST_SEGMENT, true, eMechanism,
5589 eCharset))
5590 *this = aTemp;
5593 //============================================================================
5594 rtl::OUString INetURLObject::CutName(DecodeMechanism eMechanism,
5595 rtl_TextEncoding eCharset)
5597 rtl::OUString aTheName(getName(LAST_SEGMENT, true, eMechanism, eCharset));
5598 return removeSegment(LAST_SEGMENT, true) ? aTheName : rtl::OUString();
5601 //============================================================================
5602 void INetURLObject::SetExtension(rtl::OUString const & rTheExtension,
5603 EncodeMechanism eMechanism,
5604 rtl_TextEncoding eCharset)
5606 setExtension(rTheExtension, LAST_SEGMENT, false, eMechanism, eCharset);
5609 //============================================================================
5610 rtl::OUString INetURLObject::CutExtension(DecodeMechanism eMechanism,
5611 rtl_TextEncoding eCharset)
5613 rtl::OUString aTheExtension(getExtension(LAST_SEGMENT, false, eMechanism,
5614 eCharset));
5615 return removeExtension(LAST_SEGMENT, false)
5616 ? aTheExtension : rtl::OUString();
5619 //============================================================================
5620 bool INetURLObject::IsCaseSensitive() const
5622 return true;