1 /* This software was written by Dirk Engling <erdgeist@erdgeist.org>
2 It is considered beerware. Prost. Skol. Cheers or whatever.
7 #include "scan_urlencoded_query.h"
15 /* Idea is to do a in place replacement or guarantee at least
16 strlen( string ) bytes in deststring
17 watch http://www.ietf.org/rfc/rfc2396.txt
18 unreserved = alphanum | mark
19 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
20 we add '%' to the matrix to not stop at encoded chars.
21 After losing too many requests to being too strict, add the following characters to reserved matrix
22 relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
25 /* This matrix holds for each ascii character the information,
26 whether it is a non-terminating character for on of the three
27 scan states we are in, that is 'path', 'param' and 'value' from
28 /path?param=value¶m=value, it is encoded in bit 0, 1 and 2
31 The top bit of lower nibble indicates, whether this character is
32 a hard terminator, ie. \0, \n or \s, where the whole scanning
33 process should terminate
35 static const unsigned char is_unreserved
[256] = {
36 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 8,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
38 4,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,7,
39 8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,7,0,
40 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
41 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
42 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
43 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
46 /* Do a fast nibble to hex representation conversion */
47 static unsigned char fromhex(unsigned char x
) {
58 /* Skip the value of a param=value pair */
59 void scan_urlencoded_skipvalue(char **string
) {
60 const unsigned char *s
= *(const unsigned char **)string
;
63 /* Since we are asked to skip the 'value', we assume to stop at
64 terminators for a 'value' string position */
65 while ((f
= is_unreserved
[*s
++]) & SCAN_SEARCHPATH_VALUE
)
68 /* If we stopped at a hard terminator like \0 or \n, make the
69 next scan_urlencoded_query encounter it again */
70 if (f
& SCAN_SEARCHPATH_TERMINATOR
)
76 int scan_find_keywords(const ot_keywords
*keywords
, char **string
, SCAN_SEARCHPATH_FLAG flags
) {
77 char *deststring
= *string
;
78 ssize_t match_length
= scan_urlencoded_query(string
, deststring
, flags
);
82 if (match_length
== 0)
85 while (keywords
->key
) {
86 if (!strncmp(keywords
->key
, deststring
, match_length
) && !keywords
->key
[match_length
])
87 return keywords
->value
;
94 ssize_t
scan_urlencoded_query(char **string
, char *deststring
, SCAN_SEARCHPATH_FLAG flags
) {
95 const unsigned char *s
= *(const unsigned char **)string
;
96 unsigned char *d
= (unsigned char *)deststring
;
99 /* This is the main decoding loop.
100 'flag' determines, which characters are non-terminating in current context
101 (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
103 while (is_unreserved
[c
= *s
++] & flags
) {
105 /* When encountering an url escaped character, try to decode */
107 if ((b
= fromhex(*s
++)) == 0xff)
109 if ((c
= fromhex(*s
++)) == 0xff)
114 /* Write (possibly decoded) character to output */
123 /* If we started scanning on a hard terminator, indicate we've finished */
124 if (d
== (unsigned char *)deststring
)
127 /* Else make the next call to scan_urlencoded_param encounter it again */
131 if (flags
!= SCAN_PATH
)
135 if (flags
!= SCAN_SEARCHPATH_PARAM
)
139 if (flags
== SCAN_PATH
)
141 if (flags
== SCAN_SEARCHPATH_PARAM
)
149 return d
- (unsigned char *)deststring
;
152 ssize_t
scan_fixed_int(char *data
, size_t len
, int *tmp
) {
156 --len
, ++data
, ++minus
;
157 while ((len
> 0) && (*data
>= '0') && (*data
<= '9')) {
159 *tmp
= 10 * *tmp
+ *data
++ - '0';