2 * Copyright 2010-2016 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
6 * Christophe Huriaux, c.huriaux@gmail.com
7 * Andrew Lindesay, apl@lindesay.co.nz
21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
22 #include <ICUWrapper.h>
25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
26 #include <unicode/idna.h>
27 #include <unicode/stringpiece.h>
31 static const char* kArchivedUrl
= "be:url string";
34 BUrl::BUrl(const char* url
)
51 BUrl::BUrl(BMessage
* archive
)
66 if (archive
->FindString(kArchivedUrl
, &url
) == B_OK
)
73 BUrl::BUrl(const BUrl
& other
)
77 fProtocol(other
.fProtocol
),
79 fPassword(other
.fPassword
),
83 fRequest(other
.fRequest
),
84 fFragment(other
.fFragment
),
85 fUrlStringValid(other
.fUrlStringValid
),
86 fAuthorityValid(other
.fAuthorityValid
),
87 fUserInfoValid(other
.fUserInfoValid
),
88 fHasProtocol(other
.fHasProtocol
),
89 fHasUserName(other
.fHasUserName
),
90 fHasPassword(other
.fHasPassword
),
91 fHasHost(other
.fHasHost
),
92 fHasPort(other
.fHasPort
),
93 fHasPath(other
.fHasPath
),
94 fHasRequest(other
.fHasRequest
),
95 fHasFragment(other
.fHasFragment
)
98 fUrlString
= other
.fUrlString
;
101 fAuthority
= other
.fAuthority
;
104 fUserInfo
= other
.fUserInfo
;
109 BUrl::BUrl(const BUrl
& base
, const BString
& location
)
119 fAuthorityValid(false),
120 fUserInfoValid(false),
127 // This implements the algorithm in RFC3986, Section 5.2.
129 BUrl
relative(location
);
130 if (relative
.HasProtocol()) {
131 SetProtocol(relative
.Protocol());
132 if (relative
.HasAuthority())
133 SetAuthority(relative
.Authority());
134 SetPath(relative
.Path());
135 SetRequest(relative
.Request());
137 if (relative
.HasAuthority()) {
138 SetAuthority(relative
.Authority());
139 SetPath(relative
.Path());
140 SetRequest(relative
.Request());
142 if (relative
.Path().IsEmpty()) {
143 _SetPathUnsafe(base
.Path());
144 if (relative
.HasRequest())
145 SetRequest(relative
.Request());
147 SetRequest(base
.Request());
149 if (relative
.Path()[0] == '/')
150 SetPath(relative
.Path());
152 BString path
= base
._MergePath(relative
.Path());
155 SetRequest(relative
.Request());
158 if (base
.HasAuthority())
159 SetAuthority(base
.Authority());
161 SetProtocol(base
.Protocol());
164 if (relative
.HasFragment())
165 SetFragment(relative
.Fragment());
186 BUrl::BUrl(const BPath
& path
)
199 SetUrlString(UrlEncode(path
.Path(), true, true));
209 // #pragma mark URL fields modifiers
213 BUrl::SetUrlString(const BString
& url
)
215 _ExplodeUrlString(url
);
221 BUrl::SetProtocol(const BString
& protocol
)
223 fProtocol
= protocol
;
224 fHasProtocol
= !fProtocol
.IsEmpty();
225 fUrlStringValid
= false;
231 BUrl::SetUserName(const BString
& user
)
234 fHasUserName
= !fUser
.IsEmpty();
235 fUrlStringValid
= false;
236 fAuthorityValid
= false;
237 fUserInfoValid
= false;
243 BUrl::SetPassword(const BString
& password
)
245 fPassword
= password
;
246 fHasPassword
= !fPassword
.IsEmpty();
247 fUrlStringValid
= false;
248 fAuthorityValid
= false;
249 fUserInfoValid
= false;
255 BUrl::SetHost(const BString
& host
)
258 fHasHost
= !fHost
.IsEmpty();
259 fUrlStringValid
= false;
260 fAuthorityValid
= false;
266 BUrl::SetPort(int port
)
269 fHasPort
= (port
!= 0);
270 fUrlStringValid
= false;
271 fAuthorityValid
= false;
277 BUrl::SetPath(const BString
& path
)
279 // Implements RFC3986 section 5.2.4, "Remove dot segments"
286 while(!input
.IsEmpty())
289 if (input
.StartsWith("./"))
295 if (input
.StartsWith("../"))
302 if (input
.StartsWith("/./"))
315 if (input
.StartsWith("/../"))
318 output
.Truncate(output
.FindLast('/'));
325 output
.Truncate(output
.FindLast('/'));
330 if (input
== "." || input
== "..")
342 int slashpos
= input
.FindFirst('/', 1);
344 output
.Append(input
, slashpos
);
345 input
.Remove(0, slashpos
);
347 output
.Append(input
);
352 _SetPathUnsafe(output
);
358 BUrl::SetRequest(const BString
& request
)
361 fHasRequest
= !fRequest
.IsEmpty();
362 fUrlStringValid
= false;
368 BUrl::SetFragment(const BString
& fragment
)
370 fFragment
= fragment
;
372 fUrlStringValid
= false;
377 // #pragma mark URL fields access
381 BUrl::UrlString() const
383 if (!fUrlStringValid
) {
384 fUrlString
.Truncate(0);
387 fUrlString
<< fProtocol
<< ':';
390 if (HasAuthority()) {
392 fUrlString
<< Authority();
394 fUrlString
<< Path();
397 fUrlString
<< '?' << fRequest
;
400 fUrlString
<< '#' << fFragment
;
402 fUrlStringValid
= true;
410 BUrl::Protocol() const
417 BUrl::UserName() const
424 BUrl::Password() const
431 BUrl::UserInfo() const
433 if (!fUserInfoValid
) {
437 fUserInfo
<< ':' << fPassword
;
439 fUserInfoValid
= true;
461 BUrl::Authority() const
463 if (!fAuthorityValid
) {
464 fAuthority
.Truncate(0);
467 fAuthority
<< UserInfo() << '@';
468 fAuthority
<< Host();
471 fAuthority
<< ':' << fPort
;
473 fAuthorityValid
= true;
487 BUrl::Request() const
494 BUrl::Fragment() const
500 // #pragma mark URL fields tests
504 BUrl::IsValid() const
509 if (fProtocol
== "http" || fProtocol
== "https" || fProtocol
== "ftp"
510 || fProtocol
== "ipp" || fProtocol
== "afp" || fProtocol
== "telnet"
511 || fProtocol
== "gopher" || fProtocol
== "nntp" || fProtocol
== "sftp"
512 || fProtocol
== "finger" || fProtocol
== "pop" || fProtocol
== "imap") {
513 return fHasHost
&& !fHost
.IsEmpty();
516 if (fProtocol
== "file")
524 BUrl::HasProtocol() const
531 BUrl::HasAuthority() const
533 return fHasHost
|| fHasUserName
;
538 BUrl::HasUserName() const
545 BUrl::HasPassword() const
552 BUrl::HasUserInfo() const
554 return fHasUserName
|| fHasPassword
;
559 BUrl::HasHost() const
566 BUrl::HasPort() const
573 BUrl::HasPath() const
580 BUrl::HasRequest() const
587 BUrl::HasFragment() const
593 // #pragma mark URL encoding/decoding of needed fields
597 BUrl::UrlEncode(bool strict
)
599 fUser
= _DoUrlEncodeChunk(fUser
, strict
);
600 fPassword
= _DoUrlEncodeChunk(fPassword
, strict
);
601 fHost
= _DoUrlEncodeChunk(fHost
, strict
);
602 fFragment
= _DoUrlEncodeChunk(fFragment
, strict
);
603 fPath
= _DoUrlEncodeChunk(fPath
, strict
, true);
608 BUrl::UrlDecode(bool strict
)
610 fUser
= _DoUrlDecodeChunk(fUser
, strict
);
611 fPassword
= _DoUrlDecodeChunk(fPassword
, strict
);
612 fHost
= _DoUrlDecodeChunk(fHost
, strict
);
613 fFragment
= _DoUrlDecodeChunk(fFragment
, strict
);
614 fPath
= _DoUrlDecodeChunk(fPath
, strict
);
618 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
622 UErrorCode err
= U_ZERO_ERROR
;
623 icu::IDNA
* converter
= icu::IDNA::createUTS46Instance(0, err
);
627 BStringByteSink
sink(&result
);
628 converter
->nameToASCII_UTF8(icu::StringPiece(fHost
.String()), sink
, info
,
642 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
644 BUrl::IDNAToUnicode()
646 UErrorCode err
= U_ZERO_ERROR
;
647 icu::IDNA
* converter
= icu::IDNA::createUTS46Instance(0, err
);
651 BStringByteSink
sink(&result
);
652 converter
->nameToUnicodeUTF8(icu::StringPiece(fHost
.String()), sink
, info
,
666 // #pragma mark - utility functionality
669 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
671 BUrl::HasPreferredApplication() const
673 BString appSignature
= PreferredApplication();
674 BMimeType
mime(appSignature
.String());
676 if (appSignature
.IFindFirst("application/") == 0
685 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
687 BUrl::PreferredApplication() const
689 BString appSignature
;
690 BMimeType
mime(_UrlMimeType().String());
691 mime
.GetPreferredApp(appSignature
.LockBuffer(B_MIME_TYPE_LENGTH
));
692 appSignature
.UnlockBuffer();
694 return BString(appSignature
);
699 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
701 BUrl::OpenWithPreferredApplication(bool onProblemAskUser
) const
706 BString urlString
= UrlString();
707 if (urlString
.Length() > B_PATH_NAME_LENGTH
) {
709 // if (onProblemAskUser)
710 // BAlert ... Too long URL!
712 fprintf(stderr
, "URL too long");
714 return B_NAME_TOO_LONG
;
718 const_cast<char*>("BUrlInvokedApplication"),
719 const_cast<char*>(urlString
.String()),
724 if (HasPreferredApplication())
725 printf("HasPreferredApplication() == true\n");
727 printf("HasPreferredApplication() == false\n");
730 status_t status
= be_roster
->Launch(_UrlMimeType().String(), 1, argv
+1);
731 if (status
!= B_OK
) {
733 fprintf(stderr
, "Opening URL failed: %s\n", strerror(status
));
742 // #pragma mark Url encoding/decoding of string
746 BUrl::UrlEncode(const BString
& url
, bool strict
, bool directory
)
748 return _DoUrlEncodeChunk(url
, strict
, directory
);
753 BUrl::UrlDecode(const BString
& url
, bool strict
)
755 return _DoUrlDecodeChunk(url
, strict
);
759 // #pragma mark BArchivable members
763 BUrl::Archive(BMessage
* into
, bool deep
) const
765 status_t ret
= BArchivable::Archive(into
, deep
);
768 ret
= into
->AddString(kArchivedUrl
, UrlString());
774 /*static*/ BArchivable
*
775 BUrl::Instantiate(BMessage
* archive
)
777 if (validate_instantiation(archive
, "BUrl"))
778 return new(std::nothrow
) BUrl(archive
);
783 // #pragma mark URL comparison
787 BUrl::operator==(BUrl
& other
) const
792 return fUrlString
== other
.fUrlString
;
797 BUrl::operator!=(BUrl
& other
) const
799 return !(*this == other
);
803 // #pragma mark URL assignment
807 BUrl::operator=(const BUrl
& other
)
809 fUrlStringValid
= other
.fUrlStringValid
;
811 fUrlString
= other
.fUrlString
;
813 fAuthorityValid
= other
.fAuthorityValid
;
815 fAuthority
= other
.fAuthority
;
817 fUserInfoValid
= other
.fUserInfoValid
;
819 fUserInfo
= other
.fUserInfo
;
821 fProtocol
= other
.fProtocol
;
823 fPassword
= other
.fPassword
;
827 fRequest
= other
.fRequest
;
828 fFragment
= other
.fFragment
;
830 fHasProtocol
= other
.fHasProtocol
;
831 fHasUserName
= other
.fHasUserName
;
832 fHasPassword
= other
.fHasPassword
;
833 fHasHost
= other
.fHasHost
;
834 fHasPort
= other
.fHasPort
;
835 fHasPath
= other
.fHasPath
;
836 fHasRequest
= other
.fHasRequest
;
837 fHasFragment
= other
.fHasFragment
;
844 BUrl::operator=(const BString
& string
)
846 SetUrlString(string
);
852 BUrl::operator=(const char* string
)
854 SetUrlString(string
);
859 // #pragma mark URL to string conversion
862 BUrl::operator const char*() const
871 fHasProtocol
= false;
872 fHasUserName
= false;
873 fHasPassword
= false;
878 fHasFragment
= false;
880 fProtocol
.Truncate(0);
882 fPassword
.Truncate(0);
886 fRequest
.Truncate(0);
887 fFragment
.Truncate(0);
889 // Force re-generation of these fields
890 fUrlStringValid
= false;
891 fUserInfoValid
= false;
892 fAuthorityValid
= false;
897 BUrl::_ContainsDelimiter(const BString
& url
)
899 int32 len
= url
.Length();
901 for (int32 i
= 0; i
< len
; i
++) {
918 enum explode_url_parse_state
{
920 EXPLODE_PROTOCOLTERMINATOR
,
921 EXPLODE_AUTHORITYORPATH
,
924 EXPLODE_REQUEST
, // query
930 typedef bool (*explode_char_match_fn
)(char c
);
934 explode_is_protocol_char(char c
)
936 return isalnum(c
) || c
== '+' || c
== '.' || c
== '-';
941 explode_is_authority_char(char c
)
943 return !(c
== '/' || c
== '?' || c
== '#');
948 explode_is_path_char(char c
)
950 return !(c
== '#' || c
== '?');
955 explode_is_request_char(char c
)
962 char_offset_until_fn_false(const char* url
, int32 len
, int32 offset
,
963 explode_char_match_fn fn
)
965 while (offset
< len
&& fn(url
[offset
]))
972 * This function takes a URL in string-form and parses the components of the URL out.
975 BUrl::_ExplodeUrlString(const BString
& url
)
979 // RFC3986, Appendix C; the URL should not contain whitespace or delimiters
982 if (_ContainsDelimiter(url
))
985 explode_url_parse_state state
= EXPLODE_PROTOCOL
;
987 int32 length
= url
.Length();
988 const char *url_c
= url
.String();
990 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B
991 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
992 // The ensuing logic attempts to simulate the behaviour of extracting the groups
993 // from the string without requiring a group-capable regex engine.
995 while (offset
< length
) {
998 case EXPLODE_PROTOCOL
:
1000 int32 end_protocol
= char_offset_until_fn_false(url_c
, length
,
1001 offset
, explode_is_protocol_char
);
1003 if (end_protocol
< length
) {
1004 SetProtocol(BString(&url_c
[offset
], end_protocol
- offset
));
1005 state
= EXPLODE_PROTOCOLTERMINATOR
;
1006 offset
= end_protocol
;
1008 // No protocol was found, try parsing from the string
1009 // start, beginning with authority or path
1012 state
= EXPLODE_AUTHORITYORPATH
;
1017 case EXPLODE_PROTOCOLTERMINATOR
:
1019 if (url
[offset
] == ':') {
1022 // No protocol was found, try parsing from the string
1023 // start, beginning with authority or path
1027 state
= EXPLODE_AUTHORITYORPATH
;
1031 case EXPLODE_AUTHORITYORPATH
:
1033 // The authority must start with //. If it isn't there, skip
1034 // to parsing the path.
1035 if (strncmp(&url_c
[offset
], "//", 2) == 0) {
1036 state
= EXPLODE_AUTHORITY
;
1039 state
= EXPLODE_PATH
;
1044 case EXPLODE_AUTHORITY
:
1046 int end_authority
= char_offset_until_fn_false(url_c
, length
,
1047 offset
, explode_is_authority_char
);
1048 SetAuthority(BString(&url_c
[offset
], end_authority
- offset
));
1049 state
= EXPLODE_PATH
;
1050 offset
= end_authority
;
1056 int end_path
= char_offset_until_fn_false(url_c
, length
, offset
,
1057 explode_is_path_char
);
1058 SetPath(BString(&url_c
[offset
], end_path
- offset
));
1059 state
= EXPLODE_REQUEST
;
1064 case EXPLODE_REQUEST
: // query
1066 if (url_c
[offset
] == '?') {
1068 int end_request
= char_offset_until_fn_false(url_c
, length
,
1069 offset
, explode_is_request_char
);
1070 SetRequest(BString(&url_c
[offset
], end_request
- offset
));
1071 offset
= end_request
;
1073 state
= EXPLODE_FRAGMENT
;
1077 case EXPLODE_FRAGMENT
:
1079 if (url_c
[offset
] == '#') {
1081 SetFragment(BString(&url_c
[offset
], length
- offset
));
1084 state
= EXPLODE_COMPLETE
;
1088 case EXPLODE_COMPLETE
:
1089 // should never be reached - keeps the compiler happy
1100 BUrl::_MergePath(const BString
& relative
) const
1102 // This implements RFC3986, Section 5.2.3.
1103 if (HasAuthority() && fPath
== "")
1105 BString
result("/");
1110 BString
result(fPath
);
1111 result
.Truncate(result
.FindLast("/") + 1);
1118 // This sets the path without normalizing it. If fed with a path that has . or
1119 // .. segments, this would make the URL invalid.
1121 BUrl::_SetPathUnsafe(const BString
& path
)
1124 fHasPath
= true; // RFC says an empty path is still a path
1125 fUrlStringValid
= false;
1129 enum authority_parse_state
{
1139 authority_is_username_char(char c
)
1141 return !(c
== ':' || c
== '@');
1146 authority_is_password_char(char c
)
1153 authority_is_ipv6_host_char(char c
) {
1154 return (c
>= 'A' && c
<= 'F') || (c
>= 'a' && c
<= 'f')
1155 || (c
>= '0' && c
<= '9') || c
== ':';
1160 authority_is_host_char(char c
) {
1161 return !(c
== ':' || c
== '/');
1166 authority_is_port_char(char c
) {
1167 return c
>= '0' && c
<= '9';
1172 BUrl::SetAuthority(const BString
& authority
)
1174 fAuthority
= authority
;
1177 fPassword
.Truncate(0);
1181 fHasUserName
= false;
1182 fHasPassword
= false;
1184 bool hasUsernamePassword
= B_ERROR
!= fAuthority
.FindFirst('@');
1185 authority_parse_state state
= AUTHORITY_USERNAME
;
1187 int32 length
= authority
.Length();
1188 const char *authority_c
= authority
.String();
1190 while (AUTHORITY_COMPLETE
!= state
&& offset
< length
) {
1194 case AUTHORITY_USERNAME
:
1196 if (hasUsernamePassword
) {
1197 int32 end_username
= char_offset_until_fn_false(
1198 authority_c
, length
, offset
,
1199 authority_is_username_char
);
1201 SetUserName(BString(&authority_c
[offset
],
1202 end_username
- offset
));
1204 state
= AUTHORITY_PASSWORD
;
1205 offset
= end_username
;
1207 state
= AUTHORITY_HOST
;
1212 case AUTHORITY_PASSWORD
:
1214 if (hasUsernamePassword
&& ':' == authority
[offset
]) {
1215 offset
++; // move past the delimiter
1216 int32 end_password
= char_offset_until_fn_false(
1217 authority_c
, length
, offset
,
1218 authority_is_password_char
);
1220 SetPassword(BString(&authority_c
[offset
],
1221 end_password
- offset
));
1223 offset
= end_password
;
1226 // if the host was preceded by a username + password couple
1227 // then there will be an '@' delimiter to avoid.
1229 if (authority_c
[offset
] == '@') {
1233 state
= AUTHORITY_HOST
;
1237 case AUTHORITY_HOST
:
1240 // the host may be enclosed within brackets in order to express
1243 if (authority_c
[offset
] == '[') {
1244 int32 end_ipv6_host
= char_offset_until_fn_false(
1245 authority_c
, length
, offset
+ 1,
1246 authority_is_ipv6_host_char
);
1248 if (authority_c
[end_ipv6_host
] == ']') {
1249 SetHost(BString(&authority_c
[offset
],
1250 (end_ipv6_host
- offset
) + 1));
1251 state
= AUTHORITY_PORT
;
1252 offset
= end_ipv6_host
+ 1;
1256 // if an IPV6 host was not found.
1258 if (AUTHORITY_HOST
== state
) {
1259 int32 end_host
= char_offset_until_fn_false(
1260 authority_c
, length
, offset
, authority_is_host_char
);
1262 SetHost(BString(&authority_c
[offset
], end_host
- offset
));
1263 state
= AUTHORITY_PORT
;
1270 case AUTHORITY_PORT
:
1272 if (authority_c
[offset
] == ':') {
1274 int32 end_port
= char_offset_until_fn_false(
1275 authority_c
, length
, offset
, authority_is_port_char
);
1276 SetPort(atoi(&authority_c
[offset
]));
1280 state
= AUTHORITY_COMPLETE
;
1285 case AUTHORITY_COMPLETE
:
1286 // should never be reached - keeps the compiler happy
1291 // An empty authority is still an authority, making it possible to have
1292 // URLs such as file:///path/to/file.
1293 // TODO however, there is no way to unset the authority once it is set...
1294 // We may want to take a const char* parameter and allow NULL.
1300 BUrl::_DoUrlEncodeChunk(const BString
& chunk
, bool strict
, bool directory
)
1304 for (int32 i
= 0; i
< chunk
.Length(); i
++) {
1305 if (_IsUnreserved(chunk
[i
])
1306 || (directory
&& (chunk
[i
] == '/' || chunk
[i
] == '\\'))) {
1309 if (chunk
[i
] == ' ' && !strict
) {
1311 // In non-strict mode, spaces are encoded by a plus sign
1314 snprintf(hexString
, 5, "%X", chunk
[i
]);
1316 result
<< '%' << hexString
;
1326 BUrl::_DoUrlDecodeChunk(const BString
& chunk
, bool strict
)
1330 for (int32 i
= 0; i
< chunk
.Length(); i
++) {
1331 if (chunk
[i
] == '+' && !strict
)
1338 if (chunk
[i
] == '%' && i
< chunk
.Length() - 2
1339 && isxdigit(chunk
[i
+ 1]) && isxdigit(chunk
[i
+2])) {
1340 hexString
[0] = chunk
[i
+ 1];
1341 hexString
[1] = chunk
[i
+ 2];
1343 decoded
= (char)strtol(hexString
, &out
, 16);
1346 if (out
== hexString
+ 2) {
1358 BUrl::_IsProtocolValid()
1360 for (int8 index
= 0; index
< fProtocol
.Length(); index
++) {
1361 char c
= fProtocol
[index
];
1363 if (index
== 0 && !isalpha(c
))
1365 else if (!isalnum(c
) && c
!= '+' && c
!= '-' && c
!= '.')
1369 return fProtocol
.Length() > 0;
1374 BUrl::_IsUnreserved(char c
)
1376 return isalnum(c
) || c
== '-' || c
== '.' || c
== '_' || c
== '~';
1381 BUrl::_IsGenDelim(char c
)
1383 return c
== ':' || c
== '/' || c
== '?' || c
== '#' || c
== '['
1384 || c
== ']' || c
== '@';
1389 BUrl::_IsSubDelim(char c
)
1391 return c
== '!' || c
== '$' || c
== '&' || c
== '\'' || c
== '('
1392 || c
== ')' || c
== '*' || c
== '+' || c
== ',' || c
== ';'
1398 BUrl::_UrlMimeType() const
1401 mime
<< "application/x-vnd.Be.URL." << fProtocol
;
1403 return BString(mime
);