2 * Copyright 2010 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
6 * Christophe Huriaux, c.huriaux@gmail.com
20 #include <ICUWrapper.h>
23 #include <unicode/idna.h>
24 #include <unicode/stringpiece.h>
27 static const char* kArchivedUrl
= "be:url string";
30 BUrl::BUrl(const char* url
)
47 BUrl::BUrl(BMessage
* archive
)
62 if (archive
->FindString(kArchivedUrl
, &url
) == B_OK
)
69 BUrl::BUrl(const BUrl
& other
)
73 fProtocol(other
.fProtocol
),
75 fPassword(other
.fPassword
),
79 fRequest(other
.fRequest
),
80 fFragment(other
.fFragment
),
81 fUrlStringValid(other
.fUrlStringValid
),
82 fAuthorityValid(other
.fAuthorityValid
),
83 fUserInfoValid(other
.fUserInfoValid
),
84 fHasProtocol(other
.fHasProtocol
),
85 fHasUserName(other
.fHasUserName
),
86 fHasPassword(other
.fHasPassword
),
87 fHasHost(other
.fHasHost
),
88 fHasPort(other
.fHasPort
),
89 fHasPath(other
.fHasPath
),
90 fHasRequest(other
.fHasRequest
),
91 fHasFragment(other
.fHasFragment
)
94 fUrlString
= other
.fUrlString
;
97 fAuthority
= other
.fAuthority
;
100 fUserInfo
= other
.fUserInfo
;
105 BUrl::BUrl(const BUrl
& base
, const BString
& location
)
115 fAuthorityValid(false),
116 fUserInfoValid(false),
123 // This implements the algorithm in RFC3986, Section 5.2.
125 BUrl
relative(location
);
126 if (relative
.HasProtocol()) {
127 SetProtocol(relative
.Protocol());
128 if (relative
.HasAuthority())
129 SetAuthority(relative
.Authority());
130 SetPath(relative
.Path());
131 SetRequest(relative
.Request());
133 if (relative
.HasAuthority()) {
134 SetAuthority(relative
.Authority());
135 SetPath(relative
.Path());
136 SetRequest(relative
.Request());
138 if (relative
.Path().IsEmpty()) {
139 _SetPathUnsafe(base
.Path());
140 if (relative
.HasRequest())
141 SetRequest(relative
.Request());
143 SetRequest(base
.Request());
145 if (relative
.Path()[0] == '/')
146 SetPath(relative
.Path());
148 BString path
= base
._MergePath(relative
.Path());
151 SetRequest(relative
.Request());
154 if (base
.HasAuthority())
155 SetAuthority(base
.Authority());
157 SetProtocol(base
.Protocol());
160 if (relative
.HasFragment())
161 SetFragment(relative
.Fragment());
182 BUrl::BUrl(const BPath
& path
)
195 SetUrlString(UrlEncode(path
.Path(), true, true));
205 // #pragma mark URL fields modifiers
209 BUrl::SetUrlString(const BString
& url
)
211 _ExplodeUrlString(url
);
217 BUrl::SetProtocol(const BString
& protocol
)
219 fProtocol
= protocol
;
220 fHasProtocol
= !fProtocol
.IsEmpty();
221 fUrlStringValid
= false;
227 BUrl::SetUserName(const BString
& user
)
230 fHasUserName
= !fUser
.IsEmpty();
231 fUrlStringValid
= false;
232 fAuthorityValid
= false;
233 fUserInfoValid
= false;
239 BUrl::SetPassword(const BString
& password
)
241 fPassword
= password
;
242 fHasPassword
= !fPassword
.IsEmpty();
243 fUrlStringValid
= false;
244 fAuthorityValid
= false;
245 fUserInfoValid
= false;
251 BUrl::SetHost(const BString
& host
)
254 fHasHost
= !fHost
.IsEmpty();
255 fUrlStringValid
= false;
256 fAuthorityValid
= false;
262 BUrl::SetPort(int port
)
265 fHasPort
= (port
!= 0);
266 fUrlStringValid
= false;
267 fAuthorityValid
= false;
273 BUrl::SetPath(const BString
& path
)
275 // Implements RFC3986 section 5.2.4, "Remove dot segments"
282 while(!input
.IsEmpty())
285 if (input
.StartsWith("./"))
291 if (input
.StartsWith("../"))
298 if (input
.StartsWith("/./"))
311 if (input
.StartsWith("/../"))
314 output
.Truncate(output
.FindLast('/'));
321 output
.Truncate(output
.FindLast('/'));
326 if (input
== "." || input
== "..")
338 int slashpos
= input
.FindFirst('/', 1);
340 output
.Append(input
, slashpos
);
341 input
.Remove(0, slashpos
);
343 output
.Append(input
);
348 _SetPathUnsafe(output
);
354 BUrl::SetRequest(const BString
& request
)
357 fHasRequest
= !fRequest
.IsEmpty();
358 fUrlStringValid
= false;
364 BUrl::SetFragment(const BString
& fragment
)
366 fFragment
= fragment
;
368 fUrlStringValid
= false;
373 // #pragma mark URL fields access
377 BUrl::UrlString() const
379 if (!fUrlStringValid
) {
380 fUrlString
.Truncate(0);
383 fUrlString
<< fProtocol
<< ':';
388 fUrlString
<< Authority();
389 fUrlString
<< Path();
392 fUrlString
<< '?' << fRequest
;
395 fUrlString
<< '#' << fFragment
;
397 fUrlStringValid
= true;
405 BUrl::Protocol() const
412 BUrl::UserName() const
419 BUrl::Password() const
426 BUrl::UserInfo() const
428 if (!fUserInfoValid
) {
432 fUserInfo
<< ':' << fPassword
;
434 fUserInfoValid
= true;
456 BUrl::Authority() const
458 if (!fAuthorityValid
) {
459 fAuthority
.Truncate(0);
462 fAuthority
<< UserInfo() << '@';
463 fAuthority
<< Host();
466 fAuthority
<< ':' << fPort
;
468 fAuthorityValid
= true;
482 BUrl::Request() const
489 BUrl::Fragment() const
495 // #pragma mark URL fields tests
499 BUrl::IsValid() const
501 // TODO: Implement for real!
502 return fHasProtocol
&& (fHasHost
|| fHasPath
);
507 BUrl::HasProtocol() const
514 BUrl::HasAuthority() const
516 return fHasHost
|| fHasUserName
;
521 BUrl::HasUserName() const
528 BUrl::HasPassword() const
535 BUrl::HasUserInfo() const
537 return fHasUserName
|| fHasPassword
;
542 BUrl::HasHost() const
549 BUrl::HasPort() const
556 BUrl::HasPath() const
563 BUrl::HasRequest() const
570 BUrl::HasFragment() const
576 // #pragma mark URL encoding/decoding of needed fields
580 BUrl::UrlEncode(bool strict
)
582 fUser
= _DoUrlEncodeChunk(fUser
, strict
);
583 fPassword
= _DoUrlEncodeChunk(fPassword
, strict
);
584 fHost
= _DoUrlEncodeChunk(fHost
, strict
);
585 fFragment
= _DoUrlEncodeChunk(fFragment
, strict
);
586 fPath
= _DoUrlEncodeChunk(fPath
, strict
, true);
591 BUrl::UrlDecode(bool strict
)
593 fUser
= _DoUrlDecodeChunk(fUser
, strict
);
594 fPassword
= _DoUrlDecodeChunk(fPassword
, strict
);
595 fHost
= _DoUrlDecodeChunk(fHost
, strict
);
596 fFragment
= _DoUrlDecodeChunk(fFragment
, strict
);
597 fPath
= _DoUrlDecodeChunk(fPath
, strict
);
604 UErrorCode err
= U_ZERO_ERROR
;
605 icu::IDNA
* converter
= icu::IDNA::createUTS46Instance(0, err
);
609 BStringByteSink
sink(&result
);
610 converter
->nameToASCII_UTF8(icu::StringPiece(fHost
.String()), sink
, info
,
624 BUrl::IDNAToUnicode()
626 UErrorCode err
= U_ZERO_ERROR
;
627 icu::IDNA
* converter
= icu::IDNA::createUTS46Instance(0, err
);
631 BStringByteSink
sink(&result
);
632 converter
->nameToUnicodeUTF8(icu::StringPiece(fHost
.String()), sink
, info
,
645 // #pragma mark - utility functionality
649 BUrl::HasPreferredApplication() const
651 BString appSignature
= PreferredApplication();
652 BMimeType
mime(appSignature
.String());
654 if (appSignature
.IFindFirst("application/") == 0
663 BUrl::PreferredApplication() const
665 BString appSignature
;
666 BMimeType
mime(_UrlMimeType().String());
667 mime
.GetPreferredApp(appSignature
.LockBuffer(B_MIME_TYPE_LENGTH
));
668 appSignature
.UnlockBuffer();
670 return BString(appSignature
);
675 BUrl::OpenWithPreferredApplication(bool onProblemAskUser
) const
680 BString urlString
= UrlString();
681 if (urlString
.Length() > B_PATH_NAME_LENGTH
) {
683 // if (onProblemAskUser)
684 // BAlert ... Too long URL!
686 fprintf(stderr
, "URL too long");
688 return B_NAME_TOO_LONG
;
692 const_cast<char*>("BUrlInvokedApplication"),
693 const_cast<char*>(urlString
.String()),
698 if (HasPreferredApplication())
699 printf("HasPreferredApplication() == true\n");
701 printf("HasPreferredApplication() == false\n");
704 status_t status
= be_roster
->Launch(_UrlMimeType().String(), 1, argv
+1);
705 if (status
!= B_OK
) {
707 fprintf(stderr
, "Opening URL failed: %s\n", strerror(status
));
715 // #pragma mark Url encoding/decoding of string
719 BUrl::UrlEncode(const BString
& url
, bool strict
, bool directory
)
721 return _DoUrlEncodeChunk(url
, strict
, directory
);
726 BUrl::UrlDecode(const BString
& url
, bool strict
)
728 return _DoUrlDecodeChunk(url
, strict
);
732 // #pragma mark BArchivable members
736 BUrl::Archive(BMessage
* into
, bool deep
) const
738 status_t ret
= BArchivable::Archive(into
, deep
);
741 ret
= into
->AddString(kArchivedUrl
, UrlString());
747 /*static*/ BArchivable
*
748 BUrl::Instantiate(BMessage
* archive
)
750 if (validate_instantiation(archive
, "BUrl"))
751 return new(std::nothrow
) BUrl(archive
);
756 // #pragma mark URL comparison
760 BUrl::operator==(BUrl
& other
) const
765 return fUrlString
== other
.fUrlString
;
770 BUrl::operator!=(BUrl
& other
) const
772 return !(*this == other
);
776 // #pragma mark URL assignment
780 BUrl::operator=(const BUrl
& other
)
782 fUrlStringValid
= other
.fUrlStringValid
;
784 fUrlString
= other
.fUrlString
;
786 fAuthorityValid
= other
.fAuthorityValid
;
788 fAuthority
= other
.fAuthority
;
790 fUserInfoValid
= other
.fUserInfoValid
;
792 fUserInfo
= other
.fUserInfo
;
794 fProtocol
= other
.fProtocol
;
796 fPassword
= other
.fPassword
;
800 fRequest
= other
.fRequest
;
801 fFragment
= other
.fFragment
;
803 fHasProtocol
= other
.fHasProtocol
;
804 fHasUserName
= other
.fHasUserName
;
805 fHasPassword
= other
.fHasPassword
;
806 fHasHost
= other
.fHasHost
;
807 fHasPort
= other
.fHasPort
;
808 fHasPath
= other
.fHasPath
;
809 fHasRequest
= other
.fHasRequest
;
810 fHasFragment
= other
.fHasFragment
;
817 BUrl::operator=(const BString
& string
)
819 SetUrlString(string
);
825 BUrl::operator=(const char* string
)
827 SetUrlString(string
);
832 // #pragma mark URL to string conversion
835 BUrl::operator const char*() const
844 fHasProtocol
= false;
845 fHasUserName
= false;
846 fHasPassword
= false;
851 fHasFragment
= false;
853 fProtocol
.Truncate(0);
855 fPassword
.Truncate(0);
859 fRequest
.Truncate(0);
860 fFragment
.Truncate(0);
862 // Force re-generation of these fields
863 fUrlStringValid
= false;
864 fUserInfoValid
= false;
865 fAuthorityValid
= false;
870 BUrl::_ExplodeUrlString(const BString
& url
)
872 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B
873 static RegExp
urlMatcher(
874 "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?");
878 RegExp::MatchResult match
= urlMatcher
.Match(url
.String());
880 if (!match
.HasMatched())
881 return; // TODO error reporting
884 url
.CopyInto(fProtocol
, match
.GroupStartOffsetAt(1),
885 match
.GroupEndOffsetAt(1) - match
.GroupStartOffsetAt(1));
886 if (!_IsProtocolValid()) {
887 fHasProtocol
= false;
888 fProtocol
.Truncate(0);
892 // Authority (including user credentials, host, and port
893 if (match
.GroupEndOffsetAt(2) - match
.GroupStartOffsetAt(2) > 0)
895 url
.CopyInto(fAuthority
, match
.GroupStartOffsetAt(3),
896 match
.GroupEndOffsetAt(3) - match
.GroupStartOffsetAt(3));
897 SetAuthority(fAuthority
);
901 fHasUserName
= false;
902 fHasPassword
= false;
906 url
.CopyInto(fPath
, match
.GroupStartOffsetAt(4),
907 match
.GroupEndOffsetAt(4) - match
.GroupStartOffsetAt(4));
908 if (!fPath
.IsEmpty())
912 if (match
.GroupEndOffsetAt(5) - match
.GroupStartOffsetAt(5) > 0)
914 url
.CopyInto(fRequest
, match
.GroupStartOffsetAt(6),
915 match
.GroupEndOffsetAt(6) - match
.GroupStartOffsetAt(6));
923 if (match
.GroupEndOffsetAt(7) - match
.GroupStartOffsetAt(7) > 0)
925 url
.CopyInto(fFragment
, match
.GroupStartOffsetAt(8),
926 match
.GroupEndOffsetAt(8) - match
.GroupStartOffsetAt(8));
930 fHasFragment
= false;
936 BUrl::_MergePath(const BString
& relative
) const
938 // This implements RFC3986, Section 5.2.3.
939 if (HasAuthority() && fPath
== "")
946 BString
result(fPath
);
947 result
.Truncate(result
.FindLast("/") + 1);
954 // This sets the path without normalizing it. If fed with a path that has . or
955 // .. segments, this would make the URL invalid.
957 BUrl::_SetPathUnsafe(const BString
& path
)
960 fHasPath
= true; // RFC says an empty path is still a path
961 fUrlStringValid
= false;
966 BUrl::SetAuthority(const BString
& authority
)
968 fAuthority
= authority
;
971 fHasUserName
= false;
972 fHasPassword
= false;
974 // An empty authority is still an authority, making it possible to have
975 // URLs such as file:///path/to/file.
976 // TODO however, there is no way to unset the authority once it is set...
977 // We may want to take a const char* parameter and allow NULL.
980 if (fAuthority
.IsEmpty())
983 int32 userInfoEnd
= fAuthority
.FindFirst('@');
985 // URL contains userinfo field
986 if (userInfoEnd
!= -1) {
988 fAuthority
.CopyInto(userInfo
, 0, userInfoEnd
);
990 int16 colonDelimiter
= userInfo
.FindFirst(':', 0);
992 if (colonDelimiter
== 0) {
993 SetPassword(userInfo
);
994 } else if (colonDelimiter
!= -1) {
995 userInfo
.CopyInto(fUser
, 0, colonDelimiter
);
996 userInfo
.CopyInto(fPassword
, colonDelimiter
+ 1,
997 userInfo
.Length() - colonDelimiter
);
999 SetPassword(fPassword
);
1006 // Extract the host part
1007 int16 hostEnd
= fAuthority
.FindFirst(':', userInfoEnd
);
1011 // no ':' found, the host extends to the end of the URL
1012 hostEnd
= fAuthority
.Length() + 1;
1015 // The host is likely to be present if an authority is
1016 // defined, but in some weird cases, it's not.
1017 if (hostEnd
!= userInfoEnd
) {
1018 fAuthority
.CopyInto(fHost
, userInfoEnd
, hostEnd
- userInfoEnd
);
1022 // Extract the port part
1024 if (fAuthority
.ByteAt(hostEnd
) == ':') {
1026 int16 portEnd
= fAuthority
.Length();
1029 fAuthority
.CopyInto(portString
, hostEnd
, portEnd
- hostEnd
);
1030 fPort
= atoi(portString
.String());
1032 // Even if the port is invalid, the URL is considered to
1034 fHasPort
= portString
.Length() > 0;
1040 BUrl::_DoUrlEncodeChunk(const BString
& chunk
, bool strict
, bool directory
)
1044 for (int32 i
= 0; i
< chunk
.Length(); i
++) {
1045 if (_IsUnreserved(chunk
[i
])
1046 || (directory
&& (chunk
[i
] == '/' || chunk
[i
] == '\\'))) {
1049 if (chunk
[i
] == ' ' && !strict
) {
1051 // In non-strict mode, spaces are encoded by a plus sign
1054 snprintf(hexString
, 5, "%X", chunk
[i
]);
1056 result
<< '%' << hexString
;
1066 BUrl::_DoUrlDecodeChunk(const BString
& chunk
, bool strict
)
1070 for (int32 i
= 0; i
< chunk
.Length(); i
++) {
1071 if (chunk
[i
] == '+' && !strict
)
1078 if (chunk
[i
] == '%' && i
< chunk
.Length() - 2
1079 && isxdigit(chunk
[i
+ 1]) && isxdigit(chunk
[i
+2])) {
1080 hexString
[0] = chunk
[i
+ 1];
1081 hexString
[1] = chunk
[i
+ 2];
1083 decoded
= (char)strtol(hexString
, &out
, 16);
1086 if (out
== hexString
+ 2) {
1098 BUrl::_IsProtocolValid()
1100 for (int8 index
= 0; index
< fProtocol
.Length(); index
++) {
1101 char c
= fProtocol
[index
];
1103 if (index
== 0 && !isalpha(c
))
1105 else if (!isalnum(c
) && c
!= '+' && c
!= '-' && c
!= '.')
1109 return fProtocol
.Length() > 0;
1114 BUrl::_IsUnreserved(char c
)
1116 return isalnum(c
) || c
== '-' || c
== '.' || c
== '_' || c
== '~';
1121 BUrl::_IsGenDelim(char c
)
1123 return c
== ':' || c
== '/' || c
== '?' || c
== '#' || c
== '['
1124 || c
== ']' || c
== '@';
1129 BUrl::_IsSubDelim(char c
)
1131 return c
== '!' || c
== '$' || c
== '&' || c
== '\'' || c
== '('
1132 || c
== ')' || c
== '*' || c
== '+' || c
== ',' || c
== ';'
1138 BUrl::_UrlMimeType() const
1141 mime
<< "application/x-vnd.Be.URL." << fProtocol
;
1143 return BString(mime
);