3 * Copyright (c) 2010, Zed A. Shaw and Mongrel2 Project Contributors.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * * Neither the name of the Mongrel2 Project, Zed A. Shaw, nor the names
18 * of its contributors may be used to endorse or promote products
19 * derived from this software without specific prior written
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
23 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include "http11_parser.h"
42 #define LEN(AT, FPC) (FPC - buffer - parser->AT)
43 #define MARK(M,FPC) (parser->M = (FPC) - buffer)
44 #define PTR_TO(F) (buffer + parser->F)
52 action mark {MARK(mark, fpc); }
55 action start_field { MARK(field_start, fpc); }
57 parser->field_len = LEN(field_start, fpc);
60 action start_value { MARK(mark, fpc); }
63 if(parser->http_field != NULL) {
64 parser->http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, fpc));
69 if(!apply_element(parser, CONTENT_TYPE, PTR_TO(mark), fpc, 10*1024))
74 if(!apply_element(parser, FRAGMENT, PTR_TO(mark), fpc, 10*1024))
79 if(!apply_element(parser, HTTP_VERSION, PTR_TO(mark), fpc, 10))
84 if(!apply_element(parser, REQUEST_PATH, PTR_TO(mark), fpc, 1024))
88 action request_method {
89 if(!apply_element(parser, REQUEST_METHOD, PTR_TO(mark), fpc, 1024))
94 if(!apply_element(parser, REQUEST_URI, PTR_TO(mark), fpc, 12*1024))
98 action start_query {MARK(query_start, fpc); }
100 action query_string {
101 if(!apply_element(parser, QUERY_STRING, PTR_TO(query_start), fpc, 10*1024))
106 parser->body_start = fpc - buffer + 1;
107 if(parser->header_done != NULL)
108 parser->header_done(parser->data, REQUEST_BODY, fpc + 1, pe - fpc - 1);
112 #### HTTP PROTOCOL GRAMMAR
114 CRLF = ("\r\n" | "\n");
118 safe = ("$" | "-" | "_" | ".");
119 extra = ("!" | "*" | "'" | "(" | ")" | ",");
120 reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
121 unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
122 national = any -- (alpha | digit | reserved | extra | safe | unsafe);
123 unreserved = (alpha | digit | safe | extra | national);
124 escape = ("%" xdigit xdigit);
125 uchar = (unreserved | escape);
126 pchar = (uchar | ":" | "@" | "&" | "=" | "+");
127 tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
130 token = (ascii -- (CTL | tspecials));
132 # URI schemes and absolute paths
134 absolute_uri = (scheme ":" (uchar | reserved )*);
136 path = ( pchar+ ( "/" pchar* )* ) ;
137 query = ( uchar | reserved )* %query_string ;
138 param = ( pchar | "/" )* ;
139 params = ( param ( ";" param )* ) ;
140 rel_path = ( path? %request_path (";" params)? ) ("?" %start_query query)?;
141 absolute_path = ( "/"+ rel_path );
143 Request_URI = ( "*" | absolute_uri | absolute_path ) >mark %request_uri;
144 Fragment = ( uchar | reserved )* >mark %fragment;
145 Method = ( upper | digit | safe ){1,20} >mark %request_method;
147 http_number = ( "1." ("0" | "1") ) ;
148 HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
149 Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " " HTTP_Version CRLF ) ;
151 field_name = ( token -- ":" )+ >start_field %write_field;
153 field_value = any* >start_value %write_value;
155 message_header = field_name ":" " "* field_value :> CRLF;
157 Request = Request_Line ( message_header )* ( CRLF );
159 main := (Request ) @done;
166 static int apply_element(http_parser *parser, int type, const char *begin, const char *end, int max_length)
168 int len = (int)(end-begin);
169 if(len > max_length) {
172 if(parser->on_element)
173 parser->on_element(parser->data, type, begin, len);
179 int http_parser_init(http_parser *parser) {
183 parser->body_start = 0;
184 parser->content_len = 0;
187 parser->field_len = 0;
188 parser->field_start = 0;
194 size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off)
196 if(len == 0) return 0;
201 assert(off <= len && "offset past end of buffer");
206 assert(pe - p == (int)len - (int)off && "pointers aren't same distance");
210 assert(p <= pe && "Buffer overflow after parsing.");
212 if (!http_parser_has_error(parser)) {
216 parser->nread += p - (buffer + off);
218 assert(parser->nread <= len && "nread longer than length");
219 assert(parser->body_start <= len && "body starts after buffer end");
220 assert(parser->mark < len && "mark is after buffer end");
221 assert(parser->field_len <= len && "field has length longer than whole buffer");
222 assert(parser->field_start < len && "field starts after buffer end");
224 return(parser->nread);
227 int http_parser_finish(http_parser *parser)
229 if (http_parser_has_error(parser) ) {
231 } else if (http_parser_is_finished(parser) ) {
238 int http_parser_has_error(http_parser *parser) {
239 return parser->cs == http_parser_error;
242 int http_parser_is_finished(http_parser *parser) {
243 return parser->cs >= http_parser_first_final;