Support conversion of linkshere
[dueringa_WikiWalker.git] / lib / LUrlParser / LUrlParser.cpp
blobffe345b670a3dc22e5d10b5134420e7c885e4229
1 /*
2 * Lightweight URL & URI parser (RFC 1738, RFC 3986)
3 * https://github.com/corporateshark/LUrlParser
4 *
5 * The MIT License (MIT)
6 *
7 * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to deal
11 * in the Software without restriction, including without limitation the rights
12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 * copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included in all
17 * copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
28 #include "LUrlParser.h"
30 #include <algorithm>
31 #include <cstring>
32 #include <stdlib.h>
34 // check if the scheme name is valid
35 static bool IsSchemeValid( const std::string& SchemeName )
37 for ( auto c : SchemeName )
39 if ( !isalpha( c ) && c != '+' && c != '-' && c != '.' ) return false;
42 return true;
45 bool LUrlParser::clParseURL::GetPort( int* OutPort ) const
47 if ( !IsValid() ) { return false; }
49 int Port = atoi( m_Port.c_str() );
51 if ( Port <= 0 || Port > 65535 ) { return false; }
53 if ( OutPort ) { *OutPort = Port; }
55 return true;
58 // based on RFC 1738 and RFC 3986
59 LUrlParser::clParseURL LUrlParser::clParseURL::ParseURL( const std::string& URL )
61 LUrlParser::clParseURL Result;
63 const char* CurrentString = URL.c_str();
66 * <scheme>:<scheme-specific-part>
67 * <scheme> := [a-z\+\-\.]+
68 * For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names
71 // try to read scheme
73 const char* LocalString = strchr( CurrentString, ':' );
75 if ( !LocalString )
77 return clParseURL( LUrlParserError_NoUrlCharacter );
80 // save the scheme name
81 Result.m_Scheme = std::string( CurrentString, LocalString - CurrentString );
83 if ( !IsSchemeValid( Result.m_Scheme ) )
85 return clParseURL( LUrlParserError_InvalidSchemeName );
88 // scheme should be lowercase
89 std::transform( Result.m_Scheme.begin(), Result.m_Scheme.end(), Result.m_Scheme.begin(), ::tolower );
91 // skip ':'
92 CurrentString = LocalString+1;
96 * //<user>:<password>@<host>:<port>/<url-path>
97 * any ":", "@" and "/" must be normalized
100 // skip "//"
101 if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
102 if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
104 // check if the user name and password are specified
105 bool bHasUserName = false;
107 const char* LocalString = CurrentString;
109 while ( *LocalString )
111 if ( *LocalString == '@' )
113 // user name and password are specified
114 bHasUserName = true;
115 break;
117 else if ( *LocalString == '/' )
119 // end of <host>:<port> specification
120 bHasUserName = false;
121 break;
124 LocalString++;
127 // user name and password
128 LocalString = CurrentString;
130 if ( bHasUserName )
132 // read user name
133 while ( *LocalString && *LocalString != ':' && *LocalString != '@' ) LocalString++;
135 Result.m_UserName = std::string( CurrentString, LocalString - CurrentString );
137 // proceed with the current pointer
138 CurrentString = LocalString;
140 if ( *CurrentString == ':' )
142 // skip ':'
143 CurrentString++;
145 // read password
146 LocalString = CurrentString;
148 while ( *LocalString && *LocalString != '@' ) LocalString++;
150 Result.m_Password = std::string( CurrentString, LocalString - CurrentString );
152 CurrentString = LocalString;
155 // skip '@'
156 if ( *CurrentString != '@' )
158 return clParseURL( LUrlParserError_NoAtSign );
161 CurrentString++;
164 bool bHasBracket = ( *CurrentString == '[' );
166 // go ahead, read the host name
167 LocalString = CurrentString;
169 while ( *LocalString )
171 if ( bHasBracket && *LocalString == ']' )
173 // end of IPv6 address
174 LocalString++;
175 break;
177 else if ( !bHasBracket && ( *LocalString == ':' || *LocalString == '/' ) )
179 // port number is specified
180 break;
183 LocalString++;
186 Result.m_Host = std::string( CurrentString, LocalString - CurrentString );
188 CurrentString = LocalString;
190 // is port number specified?
191 if ( *CurrentString == ':' )
193 CurrentString++;
195 // read port number
196 LocalString = CurrentString;
198 while ( *LocalString && *LocalString != '/' ) LocalString++;
200 Result.m_Port = std::string( CurrentString, LocalString - CurrentString );
202 CurrentString = LocalString;
205 // end of string
206 if ( !*CurrentString )
208 Result.m_ErrorCode = LUrlParserError_Ok;
210 return Result;
213 // skip '/'
214 if ( *CurrentString != '/' )
216 return clParseURL( LUrlParserError_NoSlash );
219 CurrentString++;
221 // parse the path
222 LocalString = CurrentString;
224 while ( *LocalString && *LocalString != '#' && *LocalString != '?' ) LocalString++;
226 Result.m_Path = std::string( CurrentString, LocalString - CurrentString );
228 CurrentString = LocalString;
230 // check for query
231 if ( *CurrentString == '?' )
233 // skip '?'
234 CurrentString++;
236 // read query
237 LocalString = CurrentString;
239 while ( *LocalString && *LocalString != '#' ) LocalString++;
241 Result.m_Query = std::string( CurrentString, LocalString - CurrentString );
243 CurrentString = LocalString;
246 // check for fragment
247 if ( *CurrentString == '#' )
249 // skip '#'
250 CurrentString++;
252 // read fragment
253 LocalString = CurrentString;
255 while ( *LocalString ) LocalString++;
257 Result.m_Fragment = std::string( CurrentString, LocalString - CurrentString );
259 CurrentString = LocalString;
262 Result.m_ErrorCode = LUrlParserError_Ok;
264 return Result;