2 * Lightweight URL & URI parser (RFC 1738, RFC 3986)
3 * https://github.com/corporateshark/LUrlParser
5 * The MIT License (MIT)
7 * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to deal
11 * in the Software without restriction, including without limitation the rights
12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 * copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included in all
17 * copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 #include "LUrlParser.h"
34 // check if the scheme name is valid
35 static bool IsSchemeValid( const std::string
& SchemeName
)
37 for ( auto c
: SchemeName
)
39 if ( !isalpha( c
) && c
!= '+' && c
!= '-' && c
!= '.' ) return false;
45 bool LUrlParser::clParseURL::GetPort( int* OutPort
) const
47 if ( !IsValid() ) { return false; }
49 int Port
= atoi( m_Port
.c_str() );
51 if ( Port
<= 0 || Port
> 65535 ) { return false; }
53 if ( OutPort
) { *OutPort
= Port
; }
58 // based on RFC 1738 and RFC 3986
59 LUrlParser::clParseURL
LUrlParser::clParseURL::ParseURL( const std::string
& URL
)
61 LUrlParser::clParseURL Result
;
63 const char* CurrentString
= URL
.c_str();
66 * <scheme>:<scheme-specific-part>
67 * <scheme> := [a-z\+\-\.]+
68 * For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names
73 const char* LocalString
= strchr( CurrentString
, ':' );
77 return clParseURL( LUrlParserError_NoUrlCharacter
);
80 // save the scheme name
81 Result
.m_Scheme
= std::string( CurrentString
, LocalString
- CurrentString
);
83 if ( !IsSchemeValid( Result
.m_Scheme
) )
85 return clParseURL( LUrlParserError_InvalidSchemeName
);
88 // scheme should be lowercase
89 std::transform( Result
.m_Scheme
.begin(), Result
.m_Scheme
.end(), Result
.m_Scheme
.begin(), ::tolower
);
92 CurrentString
= LocalString
+1;
96 * //<user>:<password>@<host>:<port>/<url-path>
97 * any ":", "@" and "/" must be normalized
101 if ( *CurrentString
++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash
);
102 if ( *CurrentString
++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash
);
104 // check if the user name and password are specified
105 bool bHasUserName
= false;
107 const char* LocalString
= CurrentString
;
109 while ( *LocalString
)
111 if ( *LocalString
== '@' )
113 // user name and password are specified
117 else if ( *LocalString
== '/' )
119 // end of <host>:<port> specification
120 bHasUserName
= false;
127 // user name and password
128 LocalString
= CurrentString
;
133 while ( *LocalString
&& *LocalString
!= ':' && *LocalString
!= '@' ) LocalString
++;
135 Result
.m_UserName
= std::string( CurrentString
, LocalString
- CurrentString
);
137 // proceed with the current pointer
138 CurrentString
= LocalString
;
140 if ( *CurrentString
== ':' )
146 LocalString
= CurrentString
;
148 while ( *LocalString
&& *LocalString
!= '@' ) LocalString
++;
150 Result
.m_Password
= std::string( CurrentString
, LocalString
- CurrentString
);
152 CurrentString
= LocalString
;
156 if ( *CurrentString
!= '@' )
158 return clParseURL( LUrlParserError_NoAtSign
);
164 bool bHasBracket
= ( *CurrentString
== '[' );
166 // go ahead, read the host name
167 LocalString
= CurrentString
;
169 while ( *LocalString
)
171 if ( bHasBracket
&& *LocalString
== ']' )
173 // end of IPv6 address
177 else if ( !bHasBracket
&& ( *LocalString
== ':' || *LocalString
== '/' ) )
179 // port number is specified
186 Result
.m_Host
= std::string( CurrentString
, LocalString
- CurrentString
);
188 CurrentString
= LocalString
;
190 // is port number specified?
191 if ( *CurrentString
== ':' )
196 LocalString
= CurrentString
;
198 while ( *LocalString
&& *LocalString
!= '/' ) LocalString
++;
200 Result
.m_Port
= std::string( CurrentString
, LocalString
- CurrentString
);
202 CurrentString
= LocalString
;
206 if ( !*CurrentString
)
208 Result
.m_ErrorCode
= LUrlParserError_Ok
;
214 if ( *CurrentString
!= '/' )
216 return clParseURL( LUrlParserError_NoSlash
);
222 LocalString
= CurrentString
;
224 while ( *LocalString
&& *LocalString
!= '#' && *LocalString
!= '?' ) LocalString
++;
226 Result
.m_Path
= std::string( CurrentString
, LocalString
- CurrentString
);
228 CurrentString
= LocalString
;
231 if ( *CurrentString
== '?' )
237 LocalString
= CurrentString
;
239 while ( *LocalString
&& *LocalString
!= '#' ) LocalString
++;
241 Result
.m_Query
= std::string( CurrentString
, LocalString
- CurrentString
);
243 CurrentString
= LocalString
;
246 // check for fragment
247 if ( *CurrentString
== '#' )
253 LocalString
= CurrentString
;
255 while ( *LocalString
) LocalString
++;
257 Result
.m_Fragment
= std::string( CurrentString
, LocalString
- CurrentString
);
259 CurrentString
= LocalString
;
262 Result
.m_ErrorCode
= LUrlParserError_Ok
;