[Author: zork]
[google-gears.git] / gears / base / common / http_utils.cc
blob8ddb9ffa04b2d198182a68af7770fe11ce48903e
1 // Copyright 2007, Google Inc.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are met:
5 //
6 // 1. Redistributions of source code must retain the above copyright notice,
7 // this list of conditions and the following disclaimer.
8 // 2. Redistributions in binary form must reproduce the above copyright notice,
9 // this list of conditions and the following disclaimer in the documentation
10 // and/or other materials provided with the distribution.
11 // 3. Neither the name of Google Inc. nor the names of its contributors may be
12 // used to endorse or promote products derived from this software without
13 // specific prior written permission.
15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 // This file is branched from /google3/webutil/http/httputils.cc
28 #include <iterator>
29 #include <vector>
30 #include "gears/base/common/http_utils.h"
32 // ----------------------------------------------------------------------
33 // The top portion of this file contains various google3 utils that the
34 // header parsing code we're actually interested in re-using depends on.
35 // ----------------------------------------------------------------------
37 #if WIN32
38 // From google3/base/port.h
39 #pragma warning(disable : 4018) // signed/unsigned mismatch
40 #define strcasecmp _stricmp
41 #define strncasecmp _strnicmp
42 #endif
44 // From google3/string/memutils.h
45 // ----------------------------------------------------------------------
46 // strliterallen, memcasecmp, memcaseis, memis, memcasestr
47 // ----------------------------------------------------------------------
49 // The ""'s catch people who don't pass in a literal for "str"
50 #define strliterallen(str) (sizeof("" str "")-1)
52 static int memcasecmp(const char *s1, const char *s2, size_t len) {
53 const unsigned char *us1 = reinterpret_cast<const unsigned char *>(s1);
54 const unsigned char *us2 = reinterpret_cast<const unsigned char *>(s2);
56 for ( size_t i = 0; i < len; i++ ) {
57 const int diff = tolower(us1[i]) - tolower(us2[i]);
58 if (diff != 0) return diff;
60 return 0;
63 #define memcaseis(str, len, literal) \
64 ( (((len) == strliterallen(literal)) \
65 && memcasecmp(str, literal, strliterallen(literal)) == 0) )
67 #define memis(str, len, literal) \
68 ( (((len) == strliterallen(literal)) \
69 && memcmp(str, literal, strliterallen(literal)) == 0) )
71 // From google3/string/strutil.h
72 // ----------------------------------------------------------------------
73 // var_strprefix()
74 // var_strcaseprefix()
75 // Give me a string and a putative prefix, and I return a pointer
76 // past the prefix if the prefix matches, or NULL else.
77 // Just like 'strprefix' and 'strcaseprefix' except that it works
78 // on character pointers as well as constant strings.
79 // Templates are used to provide both const and non-const versions.
80 // ----------------------------------------------------------------------
82 template<class CharStar>
83 inline CharStar var_strprefix(CharStar str, const char* prefix) {
84 const int len = strlen(prefix);
85 return strncmp(str, prefix, len) == 0 ? str + len : NULL;
88 template<class CharStar>
89 inline CharStar var_strcaseprefix(CharStar str, const char* prefix) {
90 const int len = strlen(prefix);
91 return strncasecmp(str, prefix, len) == 0 ? str + len : NULL;
94 // ----------------------------------------------------------------------
95 // Replacements for logging related macros
96 // ----------------------------------------------------------------------
98 class null_stream {
99 public:
100 null_stream& operator <<(char*) { return *this; }
101 null_stream& operator <<(const char*) { return *this; }
103 static null_stream devnull;
105 #define CHECK_LT(x, y) assert(x < y)
106 #define CHECK_GE(x, y) assert(x >= y)
107 #define CHECK(x) assert(x);
108 #define NOLOG(level) devnull // was LOG in original sources
109 #define NOVLOG(level) devnull // was VLOG in original sources
110 #define WARNING 1
112 // Based on google3/base/arena.h
113 // ----------------------------------------------------------------------
114 // We implement just enough of the interface to allow this module to compile.
115 // ----------------------------------------------------------------------
117 class UnsafeArena {
118 public:
119 ~UnsafeArena() {
120 Reset();
122 char* Strdup(const char* s) {
123 size_t size = strlen(s) + 1;
124 return strncpy(Alloc(size), s, size);
126 char* Alloc(const size_t size) {
127 char *alloced = new char[size];
128 allocations_.push_back(alloced);
129 return alloced;
131 void Reset() {
132 for (size_t i = 0; i < allocations_.size(); ++i) {
133 delete [] allocations_[i];
135 allocations_.clear();
137 private:
138 std::vector<char*> allocations_;
141 // The google3 code we've picked up uses the standard namespace
142 using std::vector;
143 using std::pair;
144 using std::string;
145 using std::make_pair;
147 // ----------------------------------------------------------------------
148 // The remainder of this file is largely unmodified from the original
149 // sources in google3/webutil/http/httputils.cc, with the caveat that
150 // we've only taken a subset of the original methods. The methods that
151 // we have taken are nearly verbatim.
152 // ----------------------------------------------------------------------
154 const char * const HTTPHeaders::ACCEPT = "Accept";
155 const char * const HTTPHeaders::ACCEPT_CHARSET = "Accept-Charset";
156 const char * const HTTPHeaders::ACCEPT_ENCODING = "Accept-Encoding";
157 const char * const HTTPHeaders::ACCEPT_LANGUAGE = "Accept-Language";
158 const char * const HTTPHeaders::ACCEPT_RANGES = "Accept-Ranges";
159 const char * const HTTPHeaders::AGE = "Age";
160 const char * const HTTPHeaders::AUTHORIZATION = "Authorization";
161 const char * const HTTPHeaders::CACHE_CONTROL = "Cache-Control";
162 const char * const HTTPHeaders::CONNECTION = "Connection";
163 const char * const HTTPHeaders::CONTENT_DISPOSITION = "Content-Disposition";
164 const char * const HTTPHeaders::CONTENT_ENCODING = "Content-Encoding";
165 const char * const HTTPHeaders::CONTENT_LANGUAGE = "Content-Language";
166 const char * const HTTPHeaders::CONTENT_LENGTH = "Content-Length";
167 const char * const HTTPHeaders::CONTENT_LOCATION = "Content-Location";
168 const char * const HTTPHeaders::CONTENT_RANGE = "Content-Range";
169 const char * const HTTPHeaders::CONTENT_TYPE = "Content-Type";
170 const char * const HTTPHeaders::COOKIE = "Cookie";
171 const char * const HTTPHeaders::DATE = "Date";
172 const char * const HTTPHeaders::DAV = "DAV";
173 const char * const HTTPHeaders::DEPTH = "Depth";
174 const char * const HTTPHeaders::DESTINATION = "Destination";
175 const char * const HTTPHeaders::ETAG = "ETag";
176 const char * const HTTPHeaders::EXPECT = "Expect";
177 const char * const HTTPHeaders::EXPIRES = "Expires";
178 const char * const HTTPHeaders::FROM = "From";
179 const char * const HTTPHeaders::HOST = "Host";
180 const char * const HTTPHeaders::IF = "If";
181 const char * const HTTPHeaders::IF_MATCH = "If-Match";
182 const char * const HTTPHeaders::IF_MODIFIED_SINCE = "If-Modified-Since";
183 const char * const HTTPHeaders::IF_NONE_MATCH = "If-None-Match";
184 const char * const HTTPHeaders::IF_UNMODIFIED_SINCE = "If-Unmodified-Since";
185 const char * const HTTPHeaders::IF_RANGE = "If-Range";
186 const char * const HTTPHeaders::KEEP_ALIVE = "Keep-Alive";
187 const char * const HTTPHeaders::LABEL = "Label";
188 const char * const HTTPHeaders::LAST_MODIFIED = "Last-Modified";
189 const char * const HTTPHeaders::LOCATION = "Location";
190 const char * const HTTPHeaders::LOCK_TOKEN = "Lock-Token";
191 const char * const HTTPHeaders::MS_AUTHOR_VIA = "MS-Author-Via";
192 const char * const HTTPHeaders::OVERWRITE_HDR = "Overwrite";
193 const char * const HTTPHeaders::PRAGMA = "Pragma";
194 const char * const HTTPHeaders::P3P = "P3P";
195 const char * const HTTPHeaders::PROXY_CONNECTION = "Proxy-Connection";
196 const char * const HTTPHeaders::PROXY_AUTHENTICATE = "Proxy-Authenticate";
197 const char * const HTTPHeaders::PROXY_AUTHORIZATION = "Proxy-Authorization";
198 const char * const HTTPHeaders::RANGE = "Range";
199 const char * const HTTPHeaders::REFERER = "Referer";
200 const char * const HTTPHeaders::SERVER = "Server";
201 const char * const HTTPHeaders::SET_COOKIE = "Set-Cookie";
202 const char * const HTTPHeaders::STATUS_URI = "Status-URI";
203 const char * const HTTPHeaders::TIMEOUT = "Timeout";
204 const char * const HTTPHeaders::TRAILERS = "Trailers";
205 const char * const HTTPHeaders::TRANSFER_ENCODING = "Transfer-Encoding";
206 const char * const HTTPHeaders::TRANSFER_ENCODING_ABBRV = "TE";
207 const char * const HTTPHeaders::UPGRADE = "Upgrade";
208 const char * const HTTPHeaders::USER_AGENT = "User-Agent";
209 const char * const HTTPHeaders::VARY = "Vary";
210 const char * const HTTPHeaders::VIA = "Via";
211 const char * const HTTPHeaders::WWW_AUTHENTICATE = "WWW-Authenticate";
212 const char * const HTTPHeaders::X_FORWARDED_FOR = "X-Forwarded-For";
213 const char * const HTTPHeaders::X_PROXYUSER_IP = "X-ProxyUser-IP";
214 const char * const HTTPHeaders::X_UP_SUBNO = "X-Up-Subno";
215 const char * const HTTPHeaders::XID = "XID";
216 const char * const HTTPHeaders::X_ROBOTS = "X-Robots-Tag";
218 // ----------------------------------------------------------------------
219 // HTTPHeaders::HTTPHeaders()
220 // ~HTTPHeaders::HTTPHeaders()
221 // Basic memory management
222 // ----------------------------------------------------------------------
224 HTTPHeaders::HTTPHeaders()
225 : headers_(new KeyValueList), arena_(new UnsafeArena) {
226 // Could just call ClearHeaders(), but this is faster and has the same effect
227 set_http_version(HTTP_ERROR);
228 set_response_code(HTTPResponse::RC_UNDEFINED);
231 HTTPHeaders::~HTTPHeaders() {
232 delete headers_;
233 delete arena_;
236 // ----------------------------------------------------------------------
237 // HTTPHeaders::ClearHeaders()
238 // SetHeader() allocates both the key and the value using
239 // arena_->Strdup. We reset the arena and call clear() on the
240 // underlying vector. You're left with an empty headers vector.
241 // ----------------------------------------------------------------------
243 void HTTPHeaders::ClearHeaders() {
244 set_http_version(HTTP_ERROR);
245 set_response_code(HTTPResponse::RC_UNDEFINED);
246 set_reason_phrase("");
247 firstline_.clear();
248 headers_->clear();
249 arena_->Reset();
252 // ----------------------------------------------------------------------
253 // HTTPHeaders::IsEmpty()
254 // HTTPHeaders::begin()
255 // HTTPHeaders::end()
256 // We'd put these in the .h file if we could use STL there.
257 // ----------------------------------------------------------------------
259 bool HTTPHeaders::IsEmpty() const {
260 return headers_->empty();
262 HTTPHeaders::const_iterator HTTPHeaders::begin() const {
263 return headers_->begin();
265 HTTPHeaders::const_iterator HTTPHeaders::end() const {
266 return headers_->end();
268 HTTPHeaders::const_reverse_iterator HTTPHeaders::rbegin() const {
269 return headers_->rbegin();
271 HTTPHeaders::const_reverse_iterator HTTPHeaders::rend() const {
272 return headers_->rend();
275 // ----------------------------------------------------------------------
276 // HTTPHeaders::GetHeader()
277 // Returns the value associated with header.
278 // Returns NULL if one is not found.
279 // This function should not be used to process large numbers of
280 // command headers (it searches a vector) but it's very handy when
281 // there are just a few headers.
282 // Also do not use this method to get the special case header
283 // 'Set-Cookie', because there maybe multiple Set-Cookie headers and this
284 // method will only return the first one. Use GetHeaders() instead.
285 // ----------------------------------------------------------------------
287 const char* HTTPHeaders::GetHeader(const char* header) const {
288 const_iterator key;
289 for (key = headers_->begin(); key != headers_->end(); ++key) {
290 if (strcasecmp(key->first, header) == 0) {
291 NOVLOG(5) << "Found HTTP header: '" << header << "': '"
292 << (key->second ? key->second : "(null)") << "'";
293 return key->second; // The associated value
296 NOVLOG(5) << "Failed to find HTTP header: '" << header << "'";
297 return NULL;
300 // ----------------------------------------------------------------------
301 // HTTPHeaders::GetHeaders()
302 // Returns all values associated with a header.
303 // Use this method for the 'Set-Cookie' header since there may
304 // be multiple instances.
305 // ----------------------------------------------------------------------
306 void HTTPHeaders::GetHeaders(const char* header,
307 vector<const char*>* values) const {
308 const_iterator key;
309 for (key = headers_->begin(); key != headers_->end(); ++key) {
310 if (strcasecmp(key->first, header) == 0) {
311 NOVLOG(5) << "Found HTTP header: '" << header << "': '"
312 << (key->second ? key->second : "(null)") << "'";
313 values->push_back(key->second);
318 // ----------------------------------------------------------------------
319 // HTTPHeaders::HeaderIs()
320 // HTTPHeaders::HeaderStartsWith()
321 // Convenience methods which tell you if a header is present, and
322 // if so whether it has a certain value, or starts with a certain
323 // value. All comparisons are case-insensitive.
324 // ----------------------------------------------------------------------
326 bool HTTPHeaders::HeaderIs(const char* key, const char* value) const {
327 const char* real_value = GetHeader(key);
328 return real_value && !strcasecmp(real_value, value);
331 bool HTTPHeaders::HeaderStartsWith(const char* key, const char* value) const {
332 const char* real_value = GetHeader(key);
333 return real_value && var_strcaseprefix(real_value, value);
336 // ----------------------------------------------------------------------
337 // HTTPHeaders::SetHeader()
338 // HTTPHeaders::AddNewHeader()
339 // Sets a header in the vector of headers. If a header with the
340 // given name already exists and the overwrite parameter is set to
341 // OVERWRITE, then we overwrite it. If a header with the given name
342 // already exists and the overwrite parameter is set to APPEND, then
343 // we append the new value on to the old value (separating them with
344 // a ','). When setting the new vector entry, it makes copies
345 // of both key and value.
346 // In the case of AddNewHeader(), we always tack on the given
347 // key and value to the end of our list of headers. You probably only
348 // want to use this low-level interface if you're writing test code
349 // that needs to generate duplicate HTTP headers. Well-behaved
350 // programs should use SetHeader().
351 // Value can be NULL; this means "erase this header." We actually
352 // store with the NULL, so the erase can apply "automatic"
353 // headers the connection class would otherwise emit by itself.
354 // RETURNS a pointer to the new value for the key we just inserted
355 // (which, in the overwrite == APPEND case, may not be the value that
356 // we just set), where it lives in the array.
358 // If we are over the threshold for header size, we return null. We
359 // don't even bother to allow header erasing in this case.
360 // ----------------------------------------------------------------------
362 const char* HTTPHeaders::SetHeader(const char* key, const char* value,
363 int overwrite) {
365 // We want to limit the size the headers can grow to, to avoid
366 // denial-of-service where clients keep sending us lots of data.
367 // Ignore any beyond the max.
368 if ( IsUsingTooMuchMemory() )
369 return NULL;
371 // Header values cannot contain \r\n because everything following the \r\n
372 // will be interpreted as a new header line. This can cause security problems
373 // if an uncareful server accepts user input (e.g., a URL to redirect to) and
374 // writes it into response headers without proper sanitization. So scan for
375 // CR or LF, and if we find either, replace it with "_". Common case is
376 // none is found, so all we've done is scan the string.
377 if (value) {
378 char* copy = NULL;
379 for (int i=0; value[i]; i++)
380 if (value[i] == '\r' || value[i] == '\n') {
381 if (!copy) { // only dup once
382 copy = arena_->Strdup(value);
384 copy[i] = '_';
387 if (copy)
388 value = copy;
391 // Special case Set-Cookie/WWW-Authenticate can be a multi-line header
392 // (like a multi-map)
393 if ( !strcasecmp(key, SET_COOKIE) || !strcasecmp(key, WWW_AUTHENTICATE) ) {
394 if ( overwrite == APPEND ) {
395 return AddNewHeader( key, value);
396 } else if ( overwrite == OVERWRITE ) {
397 ClearHeader(key);
398 return AddNewHeader(key, value);
400 // If overwrite is NO_OVERWRITE, then the non-Set-Cookie-specific code
401 // below will handle it correctly.
404 for ( KeyValueList::iterator it = headers_->begin(); it != headers_->end();
405 ++it ) {
406 if ( !strcasecmp(it->first, key) ) {
407 if ( overwrite == NO_OVERWRITE ) {
408 NOVLOG(5) << "Didn't replace HTTP header '" << it->first << "':'"
409 << it->second << "' with value '" << value << "' "
410 << "because the overwrite flag was not set";
411 } else if ( overwrite == OVERWRITE || it->second == NULL ) {
412 // We temporarily leak the memory for the old key and value.
413 // Note: we could avoid copying key here but I am worried about
414 // case differences between old key and new key.
415 it->first = arena_->Strdup(key);
416 NOVLOG(5) << "Replaced HTTP header '"
417 << it->first << "':'" << it->second
418 << "' with value '" << value << "'";
419 it->second = value ? arena_->Strdup(value) : NULL;
420 } else {
421 assert(overwrite == APPEND);
422 if ( value ) {
423 // We temporarily leak the memory for the old key and value.
424 // Note: we could avoid copying key here but I am worried about
425 // case differences between old key and new key.
426 it->first = arena_->Strdup(key);
427 AppendValueToHeader(&(*it), ",", value);
428 } else {
429 NOVLOG(5) << "Appended nothing to HTTP header '"
430 << it->first << "':'" << it->second << "'";
433 return it->second;
436 // Nothing matched, so we can insert
437 return AddNewHeader(key, value);
440 const char* HTTPHeaders::AddNewHeader(const char* key, const char* value) {
442 // We want to limit the size the headers can grow to, to avoid
443 // denial-of-service where clients keep sending us lots of data.
444 // Ignore any beyond the max.
445 if ( IsUsingTooMuchMemory() )
446 return NULL;
448 headers_->push_back(make_pair(arena_->Strdup(key),
449 value ? arena_->Strdup(value) : NULL));
450 NOVLOG(5) << "Added HTTP header '" << key << "': '"
451 << (value ? value : "(null)") << "'";
452 return headers_->back().second; // pointer to value we just inserted
455 // ----------------------------------------------------------------------
456 // HTTPHeaders::AppendValueToHeader()
457 // Append the given value to the given header, separating it from
458 // the current value with the given separator.
459 // RETURNS a pointer to the new value for the key we just inserted,
460 // where it lives in the array.
462 // As the previous value is not freed, there are degenerate documents
463 // that require O(N^2) memory to process. E.g. A single header,
464 // with the rest of the document being header continuation lines.
465 // ----------------------------------------------------------------------
467 const char* HTTPHeaders::AppendValueToHeader(pair<char*,char*>* header,
468 const char* separator,
469 const char* value) {
470 // We want to limit the size the headers can grow to, to avoid
471 // denial-of-service where clients keep sending us lots of data.
472 // Ignore any beyond the max.
473 if ( IsUsingTooMuchMemory() )
474 return NULL;
476 const size_t len1 = strlen(header->second);
477 const size_t len2 = strlen(separator);
478 const size_t len3 = strlen(value);
479 // The + 1 is for the \0 terminator.
480 char* const new_second = arena_->Alloc(len1 + len2 + len3 + 1);
481 strncpy(new_second, header->second, len1 + len2 + len3 + 1);
482 strncpy(new_second + len1, separator, len2 + len3 + 1);
483 strncpy(new_second + len1 + len2, value, len3 + 1);
484 NOVLOG(5) << "Appended to replace HTTP header '"
485 << header->first << "':'" << header->second
486 << "' with value '" << new_second << "'";
487 header->second = new_second; // temporarily leaks old value in arena
488 return header->second;
491 // ----------------------------------------------------------------------
492 // HTTPHeaders::ClearHeader()
493 // Makes it as if the header never was. This is like
494 // SetHeader(hdr, NULL) in that, in both cases, the header won't
495 // ever be emitted to the user. They differ only in how they
496 // react to SetHeader(hdr, value, NO_OVERWRITE). After ClearHeader,
497 // this SetHeader will succeed, but after the NULL-SetHeader,
498 // it will fail. RETURNS true if we found the header, false else.
499 // ----------------------------------------------------------------------
501 bool HTTPHeaders::ClearHeader(const char* header) {
502 bool did_delete = false;
503 KeyValueList::iterator it = headers_->begin();
504 while ( it != headers_->end() ) {
505 if ( !strcasecmp(it->first, header) ) {
506 it = headers_->erase(it);
507 did_delete = true;
508 } else {
509 ++it;
512 return did_delete;
515 // ----------------------------------------------------------------------
516 // HTTPHeaders::AddRequestFirstline()
517 // HTTPHeaders::CheckResponseProtocol()
519 // AddRequestFirstline() and AddResponseFirstline() know how to parse one
520 // line of input: the firstline (HTTP command or response).
521 // CheckResponseProtocol() is used by AddResponseFirstline() to parse the
522 // protocol (e.g., "HTTP/1.1" part of the first line). It can be
523 // overridden to support other protocols (such as Gnutella) that use
524 // HTTP-like headers.
525 // It's safest if the line you pass in includes a trailing \n
526 // (should only be necessary, though, if you use "H 200" shortcut).
527 // ----------------------------------------------------------------------
529 #define FAIL(msg) do { \
530 NOLOG(WARNING) << "Request line error (" \
531 << HTTPHeaders::firstline() << "): " << (msg); \
532 return false; /* indicate we're done parsing */ \
533 } while (0)
535 bool HTTPHeaders::AddResponseFirstline(const char* firstline, int linelen) {
536 assert(firstline);
537 firstline_.assign(firstline, linelen);
538 const char* const line = firstline_.c_str(); // so that we're NUL terminated
539 // One-letter requests can't be valid HTTP. But we do see (and expect) them
540 // in google1-compatibility mode: so while we still fail, we do so silently.
541 if ( linelen == 1 ) return false; // FAIL without logging
542 const char* const space = strchr(line, ' ');
543 if ( !space ) FAIL("no space char");
544 const int protolen = space - line;
546 if (!CheckResponseProtocol(line, protolen))
547 return false;
549 char* error;
550 const int response_int = strtol(space, &error, 10);
551 if ( NULL == error || space == error ||
552 (!isspace(*error) && *error != '\0') )
553 FAIL("Response code not an integer");
554 if ( response_int < HTTPResponse::RC_FIRST_CODE ||
555 response_int > HTTPResponse::RC_LAST_CODE ) {
556 FAIL("Illegal response code");
558 set_response_code(static_cast<HTTPResponse::ResponseCode>(response_int));
559 while (isspace(*error)) error++;
560 reason_phrase_.assign(error);
562 return true;
565 bool HTTPHeaders::CheckResponseProtocol(const char* proto, int protolen) {
566 // TODO(michaeln): Gears, do we care to future proof this, HTTP/x.x?
567 if ( memcaseis(proto, protolen, "HTTP/1.0") ) {
568 set_http_version(HTTP_10);
569 } else if ( memcaseis(proto, protolen, "HTTP/1.1") ) {
570 set_http_version(HTTP_11);
571 } else if ( memcaseis(proto, protolen, "HTTP") ) {
572 // If the server didn't provide a version number, then we assume
573 // 1.0. This helps with odd servers like the NCSA/1.5.2 server,
574 // which does not send a version number if the request is version
575 // 1.1. Mozilla does the same - it tolerates this and assumes
576 // version 1.0.
577 set_http_version(HTTP_10);
578 /* Gears deletions
579 } else if ( memcaseis(proto, protolen, "ICY") ) {
580 // For icecast/shoutcast responsed used by say iTune radio stations.
581 set_http_version(HTTP_ICY);
582 } else if ( memis(proto, protolen, "H") ) {
583 // our internal "H<space> ..." form.
584 set_http_version(HTTP_OTHER); // a minimalist response
586 } else {
587 // We don't LOG here because it's easy to get here by accident:
588 // maybe they called AddResponse when they meant to call AddRequest.
589 return false; // FAIL without logging
591 return true;
594 // ----------------------------------------------------------------------
595 // HTTPHeaders::SetHeaderFromLine()
596 // Like SetHeader(), but we actually parse the response line from
597 // a webserver ("Header: value") for you.
598 // While the input is a char*, because we munge it as we go,
599 // we do not modify it: it's the same after our call as before.
600 // RETURNS a pointer to the new value for the key we just inserted,
601 // where it lives in the array.
603 // We handle header continuations. To quote from RFC 2616, section
604 // 2.2:
606 // HTTP/1.1 header field values can be folded onto multiple lines
607 // if the continuation line begins with a space or horizontal
608 // tab. All linear white space, including folding, has the same
609 // semantics as SP. A recipient MAY replace any linear white space
610 // with a single SP before interpreting the field value or
611 // forwarding the message downstream.
612 // ----------------------------------------------------------------------
614 const char* HTTPHeaders::SetHeaderFromLine(char* line, int overwrite) {
615 // First check for continuation lines. Continuation lines
616 // start with some number of space and/or tab characters,
617 // which are equivalent to a single space character.
618 const char* start = line;
619 while ( (*start == ' ') || (*start == '\t') ) start++;
620 if ( start > line ) {
621 KeyValueList::reverse_iterator last_header = headers_->rbegin();
622 if ( last_header == headers_->rend() ) {
623 NOLOG(WARNING) << "Ignoring continuation line with no previous header";
624 return NULL;
625 } else {
626 // If the previous header value is non-empty, and this
627 // continuation is non-empty, then we need a separator: we
628 // collapse the header continuation whitespace into a single
629 // space separator (as the RFC says we "MAY" do; see above).
630 const char* const separator =
631 ((*(last_header->second) != '\0') && (*start != '\0')) ? " " : "";
632 return AppendValueToHeader(&(*last_header), separator, start);
636 // This is not a continuation line; hopefully this is a regular "Key: Value".
637 char* colon = strchr(line, ':');
638 if ( !colon ) {
639 NOLOG(WARNING) << "Ignoring invalid header '" << line << "'";
640 return NULL;
641 } else {
642 char *key, *value; // space used to ingore spaces around :
643 key = line;
644 value = colon+1;
646 while ( colon > key && colon[-1] == ' ' ) colon--; // spaces before :
647 const char oldval = *colon; // we'll restore this later
648 *colon = '\0'; // \0 the end of the key
649 while ( *value == ' ' ) value++; // spaces after :
651 if (!strcasecmp(key, "host")) {
652 // a host might already have been set (if the incoming header
653 // had an absolute request url as in GET http://host/path HTTP/1.0)
654 // the spec says to ignore the "Host: host.com" header in this case
655 overwrite = NO_OVERWRITE;
657 const char* retval = SetHeader(key, value, overwrite);
658 *colon = oldval; // restore input to original state
659 return retval;
663 // ----------------------------------------------------------------------
664 // HTTPUtils::ParseHTTPHeaders()
665 // On entrance to this function, body and bodylen point to a document.
666 // At the end, body points past the http headers for this document;
667 // it stays unchanged if there don't seem to be any headers. bodylen
668 // is adjusted accordingly. Body need not be nul-terminated.
669 // If we have an error parsing, body is set to NULL and len to 0.
670 // If headers is not NULL, the headers will be parsed, and if
671 // allow_const_cast is true, the body will be temporarily modified
672 // while parsing the headers. Otherwise header lines are copied to
673 // a local string.
675 // Returns true only when a blank line ('\r\n' only) has been seen.
676 // Returns false if an error occurs before then.
678 // Factored out from both HTTPParser::Parse and HTTPUtils::ExtractBody
679 // ----------------------------------------------------------------------
681 bool HTTPUtils::ParseHTTPHeaders(const char** body, uint32* bodylen,
682 HTTPHeaders* headers, bool allow_const_cast) {
683 int num_headers = 0; // number we've seen
685 const char* content = *body;
686 uint32 contentlen = *bodylen;
687 const char* contentend = content + contentlen;
689 *body = NULL; // init to impossible values
690 *bodylen = 0;
692 // doc bodies can be empty
693 if (content == NULL || contentlen == 0) return false;
695 const char* line = content;
697 while (true) { // done when we notice header end
698 // After this loop runs, line_end will point to the \r of a
699 // line-ending \r\n pair or be NULL if there isn't one.
700 const char* line_end;
701 if (line + 1 >= contentend) { // not enough space for \r\n
702 line_end = NULL;
703 } else {
704 for (const char* start = line;; start = line_end + 1) {
705 line_end = (char *) memchr(start, '\r', contentend - start - 1);
706 if (line_end == NULL || line_end[1] == '\n')
707 break;
711 // doc ends while still in headers?
712 if (line_end == NULL) {
713 if (num_headers == 0) { // means there are no headers
714 *body = content; // so everything is the body
715 *bodylen = contentlen;
716 } else { // assume document was only header
717 // this statement was different in HTTPParser::Parse(), so we
718 // use the return value to set body = content within Parse()
719 *body = content + contentlen;
720 *bodylen = 0;
722 return false; // done due to error
725 CHECK_LT(line_end + 1, contentend);
727 // If we haven't seen any headers and there's a \n before the \r,
728 // the first \r must not have ended a header line.
729 if (num_headers == 0 &&
730 memchr(line, '\n', line_end - line)) {
731 *body = content;
732 *bodylen = contentlen;
733 return false; // done due to error
736 if (line_end == line) { // blank line, means end of headers
737 *body = line_end + 2; // skip over last \r\n
738 // If the checks in this block fail, do not try to fix them here.
739 // Something upstream almost certainly went past the end of the
740 // headers and we're confused now.
741 CHECK_GE(*body, content);
742 const int headerlen = *body - content;
743 CHECK_GE(static_cast<int>(contentlen), headerlen);
744 *bodylen = contentlen - headerlen;
745 return true; // done due to end of headers
748 if (num_headers == 0
749 && ! memchr(line, ':', line_end - line)) {
750 // First header line AND no colon means that it's (hopefully!) a status
751 // response line.
752 bool good_first_line = false;
753 const char *space = (char *) memchr(line, ' ', line_end - line);
754 if (space) {
755 char *errpos;
756 strtol(space + 1, &errpos, 10);
757 if (isspace(*errpos) && errpos > space + 1) {
758 good_first_line = true;
759 num_headers++;
762 if (good_first_line) {
763 // If we are actually recording the first line, there are a few
764 // more validity checks in this AddResponseFirstLine
765 if (headers && !headers->AddResponseFirstline(line, line_end - line)) {
766 num_headers--;
768 } else {
769 *body = content;
770 *bodylen = contentlen;
771 return false; // Done -- error parsing first line
773 } else { // Not the first parsed header line
774 num_headers++; // or we have a line with a colon
775 if (headers) { // We have to actually parse it
776 if (allow_const_cast) {
777 // This line should be a header line.
778 // Temporarily terminate the line and use SetHeaderFromLine.
779 char oldval = *line_end;
780 *(const_cast<char *>(line_end)) = '\0';
781 headers->SetHeaderFromLine(const_cast<char *>(line),
782 HTTPHeaders::APPEND);
783 *(const_cast<char *>(line_end)) = oldval; // Restore the old value.
784 } else {
785 // Can't use const_cast so we copy the line to a temporary string
786 string header_line(line, line_end);
787 header_line.c_str();
788 headers->SetHeaderFromLine(&header_line[0], HTTPHeaders::APPEND);
793 line = line_end + 2; // read past \r and \n
797 // ----------------------------------------------------------------------
798 // HTTPUtils::ExtractBody()
799 // On entrance to this function, body and bodylen point to a document.
800 // At the end, body points past the http headers for this document;
801 // it stays unchanged if there don't seem to be any headers. bodylen
802 // is adjusted accordingly. Body need not be nul-terminated.
803 // If we have an error parsing, body is set to NULL and len to 0.
804 // TODO: make sure this function supports header continuation lines.
805 // ----------------------------------------------------------------------
807 void HTTPUtils::ExtractBody(const char **body, uint32* bodylen) {
808 ParseHTTPHeaders(body, bodylen, NULL, false);