1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * apr_uri.c: URI related utility things
26 #include "apr_general.h"
27 #include "apr_strings.h"
29 #define APR_WANT_STRFUNC
34 typedef struct schemes_t schemes_t
;
36 /** Structure to store various schemes and their default ports */
38 /** The name of the scheme */
40 /** The default port for the scheme */
41 apr_port_t default_port
;
44 /* Some WWW schemes and their default ports; this is basically /etc/services */
45 /* This will become global when the protocol abstraction comes */
46 /* As the schemes are searched by a linear search, */
47 /* they are sorted by their expected frequency */
48 static schemes_t schemes
[] =
50 {"http", APR_URI_HTTP_DEFAULT_PORT
},
51 {"ftp", APR_URI_FTP_DEFAULT_PORT
},
52 {"https", APR_URI_HTTPS_DEFAULT_PORT
},
53 {"gopher", APR_URI_GOPHER_DEFAULT_PORT
},
54 {"ldap", APR_URI_LDAP_DEFAULT_PORT
},
55 {"nntp", APR_URI_NNTP_DEFAULT_PORT
},
56 {"snews", APR_URI_SNEWS_DEFAULT_PORT
},
57 {"imap", APR_URI_IMAP_DEFAULT_PORT
},
58 {"pop", APR_URI_POP_DEFAULT_PORT
},
59 {"sip", APR_URI_SIP_DEFAULT_PORT
},
60 {"rtsp", APR_URI_RTSP_DEFAULT_PORT
},
61 {"wais", APR_URI_WAIS_DEFAULT_PORT
},
62 {"z39.50r", APR_URI_WAIS_DEFAULT_PORT
},
63 {"z39.50s", APR_URI_WAIS_DEFAULT_PORT
},
64 {"prospero", APR_URI_PROSPERO_DEFAULT_PORT
},
65 {"nfs", APR_URI_NFS_DEFAULT_PORT
},
66 {"tip", APR_URI_TIP_DEFAULT_PORT
},
67 {"acap", APR_URI_ACAP_DEFAULT_PORT
},
68 {"telnet", APR_URI_TELNET_DEFAULT_PORT
},
69 {"ssh", APR_URI_SSH_DEFAULT_PORT
},
70 { NULL
, 0xFFFF } /* unknown port */
73 APU_DECLARE(apr_port_t
) apr_uri_port_of_scheme(const char *scheme_str
)
78 for (scheme
= schemes
; scheme
->name
!= NULL
; ++scheme
) {
79 if (strcasecmp(scheme_str
, scheme
->name
) == 0) {
80 return scheme
->default_port
;
87 /* Unparse a apr_uri_t structure to an URI string.
88 * Optionally suppress the password for security reasons.
90 APU_DECLARE(char *) apr_uri_unparse(apr_pool_t
*p
,
91 const apr_uri_t
*uptr
,
96 /* If suppressing the site part, omit both user name & scheme://hostname */
97 if (!(flags
& APR_URI_UNP_OMITSITEPART
)) {
99 /* Construct a "user:password@" string, honoring the passed
100 * APR_URI_UNP_ flags: */
101 if (uptr
->user
|| uptr
->password
) {
103 (uptr
->user
&& !(flags
& APR_URI_UNP_OMITUSER
))
105 (uptr
->password
&& !(flags
& APR_URI_UNP_OMITPASSWORD
))
107 (uptr
->password
&& !(flags
& APR_URI_UNP_OMITPASSWORD
))
108 ? ((flags
& APR_URI_UNP_REVEALPASSWORD
)
109 ? uptr
->password
: "XXXXXXXX")
111 ((uptr
->user
&& !(flags
& APR_URI_UNP_OMITUSER
)) ||
112 (uptr
->password
&& !(flags
& APR_URI_UNP_OMITPASSWORD
)))
117 /* Construct scheme://site string */
118 if (uptr
->hostname
) {
120 const char *lbrk
= "", *rbrk
= "";
122 if (strchr(uptr
->hostname
, ':')) { /* v6 literal */
128 (uptr
->port_str
== NULL
||
130 uptr
->port
== apr_uri_port_of_scheme(uptr
->scheme
));
132 ret
= apr_pstrcat(p
, "//", ret
, lbrk
, uptr
->hostname
, rbrk
,
133 is_default_port
? "" : ":",
134 is_default_port
? "" : uptr
->port_str
,
138 ret
= apr_pstrcat(p
, uptr
->scheme
, ":", ret
, NULL
);
142 /* Should we suppress all path info? */
143 if (!(flags
& APR_URI_UNP_OMITPATHINFO
)) {
144 /* Append path, query and fragment strings: */
149 (uptr
->query
&& !(flags
& APR_URI_UNP_OMITQUERY
))
151 (uptr
->query
&& !(flags
& APR_URI_UNP_OMITQUERY
))
153 (uptr
->fragment
&& !(flags
& APR_URI_UNP_OMITQUERY
))
155 (uptr
->fragment
&& !(flags
& APR_URI_UNP_OMITQUERY
))
156 ? uptr
->fragment
: NULL
,
162 /* Here is the hand-optimized parse_uri_components(). There are some wild
163 * tricks we could pull in assembly language that we don't pull here... like we
164 * can do word-at-time scans for delimiter characters using the same technique
165 * that fast memchr()s use. But that would be way non-portable. -djg
168 /* We have a apr_table_t that we can index by character and it tells us if the
169 * character is one of the interesting delimiters. Note that we even get
170 * compares for NUL for free -- it's just another delimiter.
173 #define T_COLON 0x01 /* ':' */
174 #define T_SLASH 0x02 /* '/' */
175 #define T_QUESTION 0x04 /* '?' */
176 #define T_HASH 0x08 /* '#' */
177 #define T_NUL 0x80 /* '\0' */
179 #if APR_CHARSET_EBCDIC
180 /* Delimiter table for the EBCDIC character set */
181 static const unsigned char uri_delims
[256] = {
182 T_NUL
,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
185 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
186 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
187 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188 0,T_SLASH
,0,0,0,0,0,0,0,0,0,0,0,0,0,T_QUESTION
,
189 0,0,0,0,0,0,0,0,0,0,T_COLON
,T_HASH
,0,0,0,0,
190 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
193 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
194 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
195 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
197 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
200 /* Delimiter table for the ASCII character set */
201 static const unsigned char uri_delims
[256] = {
202 T_NUL
,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
203 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
204 0,0,0,T_HASH
,0,0,0,0,0,0,0,0,0,0,0,T_SLASH
,
205 0,0,0,0,0,0,0,0,0,0,T_COLON
,0,0,0,0,T_QUESTION
,
206 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
207 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
208 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
209 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
211 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
213 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
214 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
215 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
216 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
217 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
222 /* it works like this:
223 if (uri_delims[ch] & NOTEND_foobar) {
224 then we're not at a delimiter for foobar
228 /* Note that we optimize the scheme scanning here, we cheat and let the
229 * compiler know that it doesn't have to do the & masking.
231 #define NOTEND_SCHEME (0xff)
232 #define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
233 #define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL)
235 /* parse_uri_components():
236 * Parse a given URI, fill in all supplied fields of a uri_components
237 * structure. This eliminates the necessity of extracting host, port,
238 * path, query info repeatedly in the modules.
240 * - fills in fields of uri_components *uptr
241 * - none on any of the r->* fields
243 APU_DECLARE(apr_status_t
) apr_uri_parse(apr_pool_t
*p
, const char *uri
,
248 const char *hostinfo
;
251 int v6_offset1
= 0, v6_offset2
= 0;
253 /* Initialize the structure. parse_uri() and parse_uri_components()
254 * can be called more than once per request.
256 memset (uptr
, '\0', sizeof(*uptr
));
257 uptr
->is_initialized
= 1;
259 /* We assume the processor has a branch predictor like most --
260 * it assumes forward branches are untaken and backwards are taken. That's
261 * the reason for the gotos. -djg
264 /* RFC2396 #4.3 says that two leading slashes mean we have an
265 * authority component, not a path! Fixing this looks scary
266 * with the gotos here. But if the existing logic is valid,
267 * then presumably a goto pointing to deal_with_authority works.
269 * RFC2396 describes this as resolving an ambiguity. In the
270 * case of three or more slashes there would seem to be no
271 * ambiguity, so it is a path after all.
273 if (uri
[1] == '/' && uri
[2] != '/') {
275 goto deal_with_authority
;
279 /* we expect uri to point to first character of path ... remember
280 * that the path could be empty -- http://foobar?query for example
283 while ((uri_delims
[*(unsigned char *)s
] & NOTEND_PATH
) == 0) {
287 uptr
->path
= apr_pstrmemdup(p
, uri
, s
- uri
);
296 uptr
->fragment
= apr_pstrdup(p
, s1
+ 1);
297 uptr
->query
= apr_pstrmemdup(p
, s
, s1
- s
);
300 uptr
->query
= apr_pstrdup(p
, s
);
304 /* otherwise it's a fragment */
305 uptr
->fragment
= apr_pstrdup(p
, s
+ 1);
309 /* find the scheme: */
311 while ((uri_delims
[*(unsigned char *)s
] & NOTEND_SCHEME
) == 0) {
314 /* scheme must be non-empty and followed by : */
315 if (s
== uri
|| s
[0] != ':') {
316 goto deal_with_path
; /* backwards predicted taken! */
319 uptr
->scheme
= apr_pstrmemdup(p
, uri
, s
- uri
);
320 if (s
[1] != '/' || s
[2] != '/') {
329 while ((uri_delims
[*(unsigned char *)s
] & NOTEND_HOSTINFO
) == 0) {
332 uri
= s
; /* whatever follows hostinfo is start of uri */
333 uptr
->hostinfo
= apr_pstrmemdup(p
, hostinfo
, uri
- hostinfo
);
335 /* If there's a username:password@host:port, the @ we want is the last @...
336 * too bad there's no memrchr()... For the C purists, note that hostinfo
337 * is definately not the first character of the original uri so therefore
338 * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
342 } while (s
>= hostinfo
&& *s
!= '@');
344 /* again we want the common case to be fall through */
346 /* We expect hostinfo to point to the first character of
347 * the hostname. If there's a port it is the first colon,
350 if (*hostinfo
== '[') {
353 s
= memchr(hostinfo
, ']', uri
- hostinfo
);
358 s
= NULL
; /* no port */
362 s
= memchr(hostinfo
, ':', uri
- hostinfo
);
365 /* we expect the common case to have no port */
366 uptr
->hostname
= apr_pstrmemdup(p
,
367 hostinfo
+ v6_offset1
,
368 uri
- hostinfo
- v6_offset2
);
371 uptr
->hostname
= apr_pstrmemdup(p
,
372 hostinfo
+ v6_offset1
,
373 s
- hostinfo
- v6_offset2
);
375 uptr
->port_str
= apr_pstrmemdup(p
, s
, uri
- s
);
377 port
= strtol(uptr
->port_str
, &endstr
, 10);
379 if (*endstr
== '\0') {
382 /* Invalid characters after ':' found */
385 uptr
->port
= apr_uri_port_of_scheme(uptr
->scheme
);
389 /* first colon delimits username:password */
390 s1
= memchr(hostinfo
, ':', s
- hostinfo
);
392 uptr
->user
= apr_pstrmemdup(p
, hostinfo
, s1
- hostinfo
);
394 uptr
->password
= apr_pstrmemdup(p
, s1
, s
- s1
);
397 uptr
->user
= apr_pstrmemdup(p
, hostinfo
, s
- hostinfo
);
403 /* Special case for CONNECT parsing: it comes with the hostinfo part only */
404 /* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
405 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
406 * for the format of the "CONNECT host:port HTTP/1.0" request
408 APU_DECLARE(apr_status_t
) apr_uri_parse_hostinfo(apr_pool_t
*p
,
409 const char *hostinfo
,
417 /* Initialize the structure. parse_uri() and parse_uri_components()
418 * can be called more than once per request.
420 memset(uptr
, '\0', sizeof(*uptr
));
421 uptr
->is_initialized
= 1;
422 uptr
->hostinfo
= apr_pstrdup(p
, hostinfo
);
424 /* We expect hostinfo to point to the first character of
425 * the hostname. There must be a port, separated by a colon
427 if (*hostinfo
== '[') {
428 if ((rsb
= strchr(hostinfo
, ']')) == NULL
||
432 /* literal IPv6 address */
438 s
= strchr(hostinfo
, ':');
443 uptr
->hostname
= apr_pstrndup(p
, hostinfo
, s
- hostinfo
- v6_offset1
);
445 uptr
->port_str
= apr_pstrdup(p
, s
);
447 uptr
->port
= (unsigned short) strtol(uptr
->port_str
, &endstr
, 10);
448 if (*endstr
== '\0') {
451 /* Invalid characters after ':' found */