net/http/http_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // The rules for parsing content-types were borrowed from Firefox:
   6 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
   7
   8 #include "net/http/http_util.h"
   9
  10 #include <algorithm>
  11
  12 #include "base/basictypes.h"
  13 #include "base/logging.h"
  14 #include "base/strings/string_number_conversions.h"
  15 #include "base/strings/string_piece.h"
  16 #include "base/strings/string_tokenizer.h"
  17 #include "base/strings/string_util.h"
  18 #include "base/strings/stringprintf.h"
  19 #include "base/time/time.h"
  20
  21
  22 namespace net {
  23
  24 // Helpers --------------------------------------------------------------------
  25
  26 // Returns the index of the closing quote of the string, if any.  |start| points
  27 // at the opening quote.
  28 static size_t FindStringEnd(const std::string& line, size_t start, char delim) {
  29   DCHECK_LT(start, line.length());
  30   DCHECK_EQ(line[start], delim);
  31   DCHECK((delim == '"') || (delim == '\''));
  32
  33   const char set[] = { delim, '\\', '\0' };
  34   for (size_t end = line.find_first_of(set, start + 1);
  35        end != std::string::npos; end = line.find_first_of(set, end + 2)) {
  36     if (line[end] != '\\')
  37       return end;
  38   }
  39   return line.length();
  40 }
  41
  42
  43 // HttpUtil -------------------------------------------------------------------
  44
  45 // static
  46 size_t HttpUtil::FindDelimiter(const std::string& line,
  47                                size_t search_start,
  48                                char delimiter) {
  49   do {
  50     // search_start points to the spot from which we should start looking
  51     // for the delimiter.
  52     const char delim_str[] = { delimiter, '"', '\'', '\0' };
  53     size_t cur_delim_pos = line.find_first_of(delim_str, search_start);
  54     if (cur_delim_pos == std::string::npos)
  55       return line.length();
  56
  57     char ch = line[cur_delim_pos];
  58     if (ch == delimiter) {
  59       // Found delimiter
  60       return cur_delim_pos;
  61     }
  62
  63     // We hit the start of a quoted string.  Look for its end.
  64     search_start = FindStringEnd(line, cur_delim_pos, ch);
  65     if (search_start == line.length())
  66       return search_start;
  67
  68     ++search_start;
  69
  70     // search_start now points to the first char after the end of the
  71     // string, so just go back to the top of the loop and look for
  72     // |delimiter| again.
  73   } while (true);
  74
  75   NOTREACHED();
  76   return line.length();
  77 }
  78
  79 // static
  80 void HttpUtil::ParseContentType(const std::string& content_type_str,
  81                                 std::string* mime_type,
  82                                 std::string* charset,
  83                                 bool* had_charset,
  84                                 std::string* boundary) {
  85   const std::string::const_iterator begin = content_type_str.begin();
  86
  87   // Trim leading and trailing whitespace from type.  We include '(' in
  88   // the trailing trim set to catch media-type comments, which are not at all
  89   // standard, but may occur in rare cases.
  90   size_t type_val = content_type_str.find_first_not_of(HTTP_LWS);
  91   type_val = std::min(type_val, content_type_str.length());
  92   size_t type_end = content_type_str.find_first_of(HTTP_LWS ";(", type_val);
  93   if (type_end == std::string::npos)
  94     type_end = content_type_str.length();
  95
  96   size_t charset_val = 0;
  97   size_t charset_end = 0;
  98   bool type_has_charset = false;
  99
 100   // Iterate over parameters
 101   size_t param_start = content_type_str.find_first_of(';', type_end);
 102   if (param_start != std::string::npos) {
 103     base::StringTokenizer tokenizer(begin + param_start, content_type_str.end(),
 104                                     ";");
 105     tokenizer.set_quote_chars("\"");
 106     while (tokenizer.GetNext()) {
 107       std::string::const_iterator equals_sign =
 108           std::find(tokenizer.token_begin(), tokenizer.token_end(), '=');
 109       if (equals_sign == tokenizer.token_end())
 110         continue;
 111
 112       std::string::const_iterator param_name_begin = tokenizer.token_begin();
 113       std::string::const_iterator param_name_end = equals_sign;
 114       TrimLWS(&param_name_begin, &param_name_end);
 115
 116       std::string::const_iterator param_value_begin = equals_sign + 1;
 117       std::string::const_iterator param_value_end = tokenizer.token_end();
 118       DCHECK(param_value_begin <= tokenizer.token_end());
 119       TrimLWS(&param_value_begin, &param_value_end);
 120
 121       if (base::LowerCaseEqualsASCII(
 122               base::StringPiece(param_name_begin, param_name_end), "charset")) {
 123         // TODO(abarth): Refactor this function to consistently use iterators.
 124         charset_val = param_value_begin - begin;
 125         charset_end = param_value_end - begin;
 126         type_has_charset = true;
 127       } else if (base::LowerCaseEqualsASCII(
 128                      base::StringPiece(param_name_begin, param_name_end),
 129                      "boundary")) {
 130         if (boundary)
 131           boundary->assign(param_value_begin, param_value_end);
 132       }
 133     }
 134   }
 135
 136   if (type_has_charset) {
 137     // Trim leading and trailing whitespace from charset_val.  We include
 138     // '(' in the trailing trim set to catch media-type comments, which are
 139     // not at all standard, but may occur in rare cases.
 140     charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val);
 141     charset_val = std::min(charset_val, charset_end);
 142     char first_char = content_type_str[charset_val];
 143     if (first_char == '"' || first_char == '\'') {
 144       charset_end = FindStringEnd(content_type_str, charset_val, first_char);
 145       ++charset_val;
 146       DCHECK(charset_end >= charset_val);
 147     } else {
 148       charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(",
 149                                                             charset_val),
 150                              charset_end);
 151     }
 152   }
 153
 154   // if the server sent "*/*", it is meaningless, so do not store it.
 155   // also, if type_val is the same as mime_type, then just update the
 156   // charset.  however, if charset is empty and mime_type hasn't
 157   // changed, then don't wipe-out an existing charset.  We
 158   // also want to reject a mime-type if it does not include a slash.
 159   // some servers give junk after the charset parameter, which may
 160   // include a comma, so this check makes us a bit more tolerant.
 161   if (content_type_str.length() != 0 &&
 162       content_type_str != "*/*" &&
 163       content_type_str.find_first_of('/') != std::string::npos) {
 164     // Common case here is that mime_type is empty
 165     bool eq = !mime_type->empty() &&
 166               base::LowerCaseEqualsASCII(
 167                   base::StringPiece(begin + type_val, begin + type_end),
 168                   mime_type->data());
 169     if (!eq) {
 170       mime_type->assign(begin + type_val, begin + type_end);
 171       base::StringToLowerASCII(mime_type);
 172     }
 173     if ((!eq && *had_charset) || type_has_charset) {
 174       *had_charset = true;
 175       charset->assign(begin + charset_val, begin + charset_end);
 176       base::StringToLowerASCII(charset);
 177     }
 178   }
 179 }
 180
 181 // static
 182 // Parse the Range header according to RFC 2616 14.35.1
 183 // ranges-specifier = byte-ranges-specifier
 184 // byte-ranges-specifier = bytes-unit "=" byte-range-set
 185 // byte-range-set  = 1#( byte-range-spec | suffix-byte-range-spec )
 186 // byte-range-spec = first-byte-pos "-" [last-byte-pos]
 187 // first-byte-pos  = 1*DIGIT
 188 // last-byte-pos   = 1*DIGIT
 189 bool HttpUtil::ParseRanges(const std::string& headers,
 190                            std::vector<HttpByteRange>* ranges) {
 191   std::string ranges_specifier;
 192   HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
 193
 194   while (it.GetNext()) {
 195     // Look for "Range" header.
 196     if (!base::LowerCaseEqualsASCII(it.name(), "range"))
 197       continue;
 198     ranges_specifier = it.values();
 199     // We just care about the first "Range" header, so break here.
 200     break;
 201   }
 202
 203   if (ranges_specifier.empty())
 204     return false;
 205
 206   return ParseRangeHeader(ranges_specifier, ranges);
 207 }
 208
 209 // static
 210 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier,
 211                                 std::vector<HttpByteRange>* ranges) {
 212   size_t equal_char_offset = ranges_specifier.find('=');
 213   if (equal_char_offset == std::string::npos)
 214     return false;
 215
 216   // Try to extract bytes-unit part.
 217   std::string::const_iterator bytes_unit_begin = ranges_specifier.begin();
 218   std::string::const_iterator bytes_unit_end = bytes_unit_begin +
 219                                                equal_char_offset;
 220   std::string::const_iterator byte_range_set_begin = bytes_unit_end + 1;
 221   std::string::const_iterator byte_range_set_end = ranges_specifier.end();
 222
 223   TrimLWS(&bytes_unit_begin, &bytes_unit_end);
 224   // "bytes" unit identifier is not found.
 225   if (!base::LowerCaseEqualsASCII(
 226           base::StringPiece(bytes_unit_begin, bytes_unit_end), "bytes"))
 227     return false;
 228
 229   ValuesIterator byte_range_set_iterator(byte_range_set_begin,
 230                                          byte_range_set_end, ',');
 231   while (byte_range_set_iterator.GetNext()) {
 232     size_t minus_char_offset = byte_range_set_iterator.value().find('-');
 233     // If '-' character is not found, reports failure.
 234     if (minus_char_offset == std::string::npos)
 235       return false;
 236
 237     std::string::const_iterator first_byte_pos_begin =
 238         byte_range_set_iterator.value_begin();
 239     std::string::const_iterator first_byte_pos_end =
 240         first_byte_pos_begin +  minus_char_offset;
 241     TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
 242     std::string first_byte_pos(first_byte_pos_begin, first_byte_pos_end);
 243
 244     HttpByteRange range;
 245     // Try to obtain first-byte-pos.
 246     if (!first_byte_pos.empty()) {
 247       int64 first_byte_position = -1;
 248       if (!base::StringToInt64(first_byte_pos, &first_byte_position))
 249         return false;
 250       range.set_first_byte_position(first_byte_position);
 251     }
 252
 253     std::string::const_iterator last_byte_pos_begin =
 254         byte_range_set_iterator.value_begin() + minus_char_offset + 1;
 255     std::string::const_iterator last_byte_pos_end =
 256         byte_range_set_iterator.value_end();
 257     TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
 258     std::string last_byte_pos(last_byte_pos_begin, last_byte_pos_end);
 259
 260     // We have last-byte-pos or suffix-byte-range-spec in this case.
 261     if (!last_byte_pos.empty()) {
 262       int64 last_byte_position;
 263       if (!base::StringToInt64(last_byte_pos, &last_byte_position))
 264         return false;
 265       if (range.HasFirstBytePosition())
 266         range.set_last_byte_position(last_byte_position);
 267       else
 268         range.set_suffix_length(last_byte_position);
 269     } else if (!range.HasFirstBytePosition()) {
 270       return false;
 271     }
 272
 273     // Do a final check on the HttpByteRange object.
 274     if (!range.IsValid())
 275       return false;
 276     ranges->push_back(range);
 277   }
 278   return !ranges->empty();
 279 }
 280
 281 // static
 282 bool HttpUtil::ParseRetryAfterHeader(const std::string& retry_after_string,
 283                                      base::Time now,
 284                                      base::TimeDelta* retry_after) {
 285   int seconds;
 286   base::Time time;
 287   base::TimeDelta interval;
 288
 289   if (base::StringToInt(retry_after_string, &seconds)) {
 290     interval = base::TimeDelta::FromSeconds(seconds);
 291   } else if (base::Time::FromUTCString(retry_after_string.c_str(), &time)) {
 292     interval = time - now;
 293   } else {
 294     return false;
 295   }
 296
 297   if (interval < base::TimeDelta::FromSeconds(0))
 298     return false;
 299
 300   *retry_after = interval;
 301   return true;
 302 }
 303
 304 // static
 305 bool HttpUtil::HasHeader(const std::string& headers, const char* name) {
 306   size_t name_len = strlen(name);
 307   std::string::const_iterator it =
 308       std::search(headers.begin(),
 309                   headers.end(),
 310                   name,
 311                   name + name_len,
 312                   base::CaseInsensitiveCompareASCII<char>());
 313   if (it == headers.end())
 314     return false;
 315
 316   // ensure match is prefixed by newline
 317   if (it != headers.begin() && it[-1] != '\n')
 318     return false;
 319
 320   // ensure match is suffixed by colon
 321   if (it + name_len >= headers.end() || it[name_len] != ':')
 322     return false;
 323
 324   return true;
 325 }
 326
 327 namespace {
 328 // A header string containing any of the following fields will cause
 329 // an error. The list comes from the XMLHttpRequest standard.
 330 // http://www.w3.org/TR/XMLHttpRequest/#the-setrequestheader-method
 331 const char* const kForbiddenHeaderFields[] = {
 332   "accept-charset",
 333   "accept-encoding",
 334   "access-control-request-headers",
 335   "access-control-request-method",
 336   "connection",
 337   "content-length",
 338   "cookie",
 339   "cookie2",
 340   "content-transfer-encoding",
 341   "date",
 342   "expect",
 343   "host",
 344   "keep-alive",
 345   "origin",
 346   "referer",
 347   "te",
 348   "trailer",
 349   "transfer-encoding",
 350   "upgrade",
 351   "user-agent",
 352   "via",
 353 };
 354 }  // anonymous namespace
 355
 356 // static
 357 bool HttpUtil::IsSafeHeader(const std::string& name) {
 358   std::string lower_name(base::StringToLowerASCII(name));
 359   if (base::StartsWith(lower_name, "proxy-", base::CompareCase::SENSITIVE) ||
 360       base::StartsWith(lower_name, "sec-", base::CompareCase::SENSITIVE))
 361     return false;
 362   for (size_t i = 0; i < arraysize(kForbiddenHeaderFields); ++i) {
 363     if (lower_name == kForbiddenHeaderFields[i])
 364       return false;
 365   }
 366   return true;
 367 }
 368
 369 // static
 370 bool HttpUtil::IsValidHeaderName(const std::string& name) {
 371   // Check whether the header name is RFC 2616-compliant.
 372   return HttpUtil::IsToken(name);
 373 }
 374
 375 // static
 376 bool HttpUtil::IsValidHeaderValue(const std::string& value) {
 377   // Just a sanity check: disallow NUL and CRLF.
 378   return value.find('\0') == std::string::npos &&
 379       value.find("\r\n") == std::string::npos;
 380 }
 381
 382 // static
 383 std::string HttpUtil::StripHeaders(const std::string& headers,
 384                                    const char* const headers_to_remove[],
 385                                    size_t headers_to_remove_len) {
 386   std::string stripped_headers;
 387   HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
 388
 389   while (it.GetNext()) {
 390     bool should_remove = false;
 391     for (size_t i = 0; i < headers_to_remove_len; ++i) {
 392       if (base::LowerCaseEqualsASCII(
 393               base::StringPiece(it.name_begin(), it.name_end()),
 394               headers_to_remove[i])) {
 395         should_remove = true;
 396         break;
 397       }
 398     }
 399     if (!should_remove) {
 400       // Assume that name and values are on the same line.
 401       stripped_headers.append(it.name_begin(), it.values_end());
 402       stripped_headers.append("\r\n");
 403     }
 404   }
 405   return stripped_headers;
 406 }
 407
 408 // static
 409 bool HttpUtil::IsNonCoalescingHeader(std::string::const_iterator name_begin,
 410                                      std::string::const_iterator name_end) {
 411   // NOTE: "set-cookie2" headers do not support expires attributes, so we don't
 412   // have to list them here.
 413   const char* const kNonCoalescingHeaders[] = {
 414     "date",
 415     "expires",
 416     "last-modified",
 417     "location",  // See bug 1050541 for details
 418     "retry-after",
 419     "set-cookie",
 420     // The format of auth-challenges mixes both space separated tokens and
 421     // comma separated properties, so coalescing on comma won't work.
 422     "www-authenticate",
 423     "proxy-authenticate",
 424     // STS specifies that UAs must not process any STS headers after the first
 425     // one.
 426     "strict-transport-security"
 427   };
 428   for (size_t i = 0; i < arraysize(kNonCoalescingHeaders); ++i) {
 429     if (base::LowerCaseEqualsASCII(base::StringPiece(name_begin, name_end),
 430                                    kNonCoalescingHeaders[i]))
 431       return true;
 432   }
 433   return false;
 434 }
 435
 436 bool HttpUtil::IsLWS(char c) {
 437   return strchr(HTTP_LWS, c) != NULL;
 438 }
 439
 440 void HttpUtil::TrimLWS(std::string::const_iterator* begin,
 441                        std::string::const_iterator* end) {
 442   // leading whitespace
 443   while (*begin < *end && IsLWS((*begin)[0]))
 444     ++(*begin);
 445
 446   // trailing whitespace
 447   while (*begin < *end && IsLWS((*end)[-1]))
 448     --(*end);
 449 }
 450
 451 bool HttpUtil::IsQuote(char c) {
 452   // Single quote mark isn't actually part of quoted-text production,
 453   // but apparently some servers rely on this.
 454   return c == '"' || c == '\'';
 455 }
 456
 457 // See RFC 2616 Sec 2.2 for the definition of |token|.
 458 bool HttpUtil::IsToken(std::string::const_iterator begin,
 459                        std::string::const_iterator end) {
 460   if (begin == end)
 461     return false;
 462   for (std::string::const_iterator iter = begin; iter != end; ++iter) {
 463     unsigned char c = *iter;
 464     if (c >= 0x80 || c <= 0x1F || c == 0x7F ||
 465         c == '(' || c == ')' || c == '<' || c == '>' || c == '@' ||
 466         c == ',' || c == ';' || c == ':' || c == '\\' || c == '"' ||
 467         c == '/' || c == '[' || c == ']' || c == '?' || c == '=' ||
 468         c == '{' || c == '}' || c == ' ' || c == '\t')
 469       return false;
 470   }
 471   return true;
 472 }
 473
 474 std::string HttpUtil::Unquote(std::string::const_iterator begin,
 475                               std::string::const_iterator end) {
 476   // Empty string
 477   if (begin == end)
 478     return std::string();
 479
 480   // Nothing to unquote.
 481   if (!IsQuote(*begin))
 482     return std::string(begin, end);
 483
 484   // No terminal quote mark.
 485   if (end - begin < 2 || *begin != *(end - 1))
 486     return std::string(begin, end);
 487
 488   // Strip quotemarks
 489   ++begin;
 490   --end;
 491
 492   // Unescape quoted-pair (defined in RFC 2616 section 2.2)
 493   std::string unescaped;
 494   bool prev_escape = false;
 495   for (; begin != end; ++begin) {
 496     char c = *begin;
 497     if (c == '\\' && !prev_escape) {
 498       prev_escape = true;
 499       continue;
 500     }
 501     prev_escape = false;
 502     unescaped.push_back(c);
 503   }
 504   return unescaped;
 505 }
 506
 507 // static
 508 std::string HttpUtil::Unquote(const std::string& str) {
 509   return Unquote(str.begin(), str.end());
 510 }
 511
 512 // static
 513 std::string HttpUtil::Quote(const std::string& str) {
 514   std::string escaped;
 515   escaped.reserve(2 + str.size());
 516
 517   std::string::const_iterator begin = str.begin();
 518   std::string::const_iterator end = str.end();
 519
 520   // Esape any backslashes or quotemarks within the string, and
 521   // then surround with quotes.
 522   escaped.push_back('"');
 523   for (; begin != end; ++begin) {
 524     char c = *begin;
 525     if (c == '"' || c == '\\')
 526       escaped.push_back('\\');
 527     escaped.push_back(c);
 528   }
 529   escaped.push_back('"');
 530   return escaped;
 531 }
 532
 533 // Find the "http" substring in a status line. This allows for
 534 // some slop at the start. If the "http" string could not be found
 535 // then returns -1.
 536 // static
 537 int HttpUtil::LocateStartOfStatusLine(const char* buf, int buf_len) {
 538   const int slop = 4;
 539   const int http_len = 4;
 540
 541   if (buf_len >= http_len) {
 542     int i_max = std::min(buf_len - http_len, slop);
 543     for (int i = 0; i <= i_max; ++i) {
 544       if (base::LowerCaseEqualsASCII(base::StringPiece(buf + i, http_len),
 545                                      "http"))
 546         return i;
 547     }
 548   }
 549   return -1;  // Not found
 550 }
 551
 552 static int LocateEndOfHeadersHelper(const char* buf,
 553                                     int buf_len,
 554                                     int i,
 555                                     bool accept_empty_header_list) {
 556   char last_c = '\0';
 557   bool was_lf = false;
 558   if (accept_empty_header_list) {
 559     // Normally two line breaks signal the end of a header list. An empty header
 560     // list ends with a single line break at the start of the buffer.
 561     last_c = '\n';
 562     was_lf = true;
 563   }
 564
 565   for (; i < buf_len; ++i) {
 566     char c = buf[i];
 567     if (c == '\n') {
 568       if (was_lf)
 569         return i + 1;
 570       was_lf = true;
 571     } else if (c != '\r' || last_c != '\n') {
 572       was_lf = false;
 573     }
 574     last_c = c;
 575   }
 576   return -1;
 577 }
 578
 579 int HttpUtil::LocateEndOfAdditionalHeaders(const char* buf,
 580                                            int buf_len,
 581                                            int i) {
 582   return LocateEndOfHeadersHelper(buf, buf_len, i, true);
 583 }
 584
 585 int HttpUtil::LocateEndOfHeaders(const char* buf, int buf_len, int i) {
 586   return LocateEndOfHeadersHelper(buf, buf_len, i, false);
 587 }
 588
 589 // In order for a line to be continuable, it must specify a
 590 // non-blank header-name. Line continuations are specifically for
 591 // header values -- do not allow headers names to span lines.
 592 static bool IsLineSegmentContinuable(const char* begin, const char* end) {
 593   if (begin == end)
 594     return false;
 595
 596   const char* colon = std::find(begin, end, ':');
 597   if (colon == end)
 598     return false;
 599
 600   const char* name_begin = begin;
 601   const char* name_end = colon;
 602
 603   // Name can't be empty.
 604   if (name_begin == name_end)
 605     return false;
 606
 607   // Can't start with LWS (this would imply the segment is a continuation)
 608   if (HttpUtil::IsLWS(*name_begin))
 609     return false;
 610
 611   return true;
 612 }
 613
 614 // Helper used by AssembleRawHeaders, to find the end of the status line.
 615 static const char* FindStatusLineEnd(const char* begin, const char* end) {
 616   size_t i = base::StringPiece(begin, end - begin).find_first_of("\r\n");
 617   if (i == base::StringPiece::npos)
 618     return end;
 619   return begin + i;
 620 }
 621
 622 // Helper used by AssembleRawHeaders, to skip past leading LWS.
 623 static const char* FindFirstNonLWS(const char* begin, const char* end) {
 624   for (const char* cur = begin; cur != end; ++cur) {
 625     if (!HttpUtil::IsLWS(*cur))
 626       return cur;
 627   }
 628   return end;  // Not found.
 629 }
 630
 631 std::string HttpUtil::AssembleRawHeaders(const char* input_begin,
 632                                          int input_len) {
 633   std::string raw_headers;
 634   raw_headers.reserve(input_len);
 635
 636   const char* input_end = input_begin + input_len;
 637
 638   // Skip any leading slop, since the consumers of this output
 639   // (HttpResponseHeaders) don't deal with it.
 640   int status_begin_offset = LocateStartOfStatusLine(input_begin, input_len);
 641   if (status_begin_offset != -1)
 642     input_begin += status_begin_offset;
 643
 644   // Copy the status line.
 645   const char* status_line_end = FindStatusLineEnd(input_begin, input_end);
 646   raw_headers.append(input_begin, status_line_end);
 647
 648   // After the status line, every subsequent line is a header line segment.
 649   // Should a segment start with LWS, it is a continuation of the previous
 650   // line's field-value.
 651
 652   // TODO(ericroman): is this too permissive? (delimits on [\r\n]+)
 653   base::CStringTokenizer lines(status_line_end, input_end, "\r\n");
 654
 655   // This variable is true when the previous line was continuable.
 656   bool prev_line_continuable = false;
 657
 658   while (lines.GetNext()) {
 659     const char* line_begin = lines.token_begin();
 660     const char* line_end = lines.token_end();
 661
 662     if (prev_line_continuable && IsLWS(*line_begin)) {
 663       // Join continuation; reduce the leading LWS to a single SP.
 664       raw_headers.push_back(' ');
 665       raw_headers.append(FindFirstNonLWS(line_begin, line_end), line_end);
 666     } else {
 667       // Terminate the previous line.
 668       raw_headers.push_back('\n');
 669
 670       // Copy the raw data to output.
 671       raw_headers.append(line_begin, line_end);
 672
 673       // Check if the current line can be continued.
 674       prev_line_continuable = IsLineSegmentContinuable(line_begin, line_end);
 675     }
 676   }
 677
 678   raw_headers.append("\n\n", 2);
 679
 680   // Use '\0' as the canonical line terminator. If the input already contained
 681   // any embeded '\0' characters we will strip them first to avoid interpreting
 682   // them as line breaks.
 683   raw_headers.erase(std::remove(raw_headers.begin(), raw_headers.end(), '\0'),
 684                     raw_headers.end());
 685   std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0');
 686
 687   return raw_headers;
 688 }
 689
 690 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) {
 691   std::string disassembled_headers;
 692   base::StringTokenizer tokenizer(str, std::string(1, '\0'));
 693   while (tokenizer.GetNext()) {
 694     disassembled_headers.append(tokenizer.token_begin(), tokenizer.token_end());
 695     disassembled_headers.append("\r\n");
 696   }
 697   disassembled_headers.append("\r\n");
 698
 699   return disassembled_headers;
 700 }
 701
 702 // TODO(jungshik): 1. If the list is 'fr-CA,fr-FR,en,de', we have to add
 703 // 'fr' after 'fr-CA' with the same q-value as 'fr-CA' because
 704 // web servers, in general, do not fall back to 'fr' and may end up picking
 705 // 'en' which has a lower preference than 'fr-CA' and 'fr-FR'.
 706 // 2. This function assumes that the input is a comma separated list
 707 // without any whitespace. As long as it comes from the preference and
 708 // a user does not manually edit the preference file, it's the case. Still,
 709 // we may have to make it more robust.
 710 std::string HttpUtil::GenerateAcceptLanguageHeader(
 711     const std::string& raw_language_list) {
 712   // We use integers for qvalue and qvalue decrement that are 10 times
 713   // larger than actual values to avoid a problem with comparing
 714   // two floating point numbers.
 715   const unsigned int kQvalueDecrement10 = 2;
 716   unsigned int qvalue10 = 10;
 717   base::StringTokenizer t(raw_language_list, ",");
 718   std::string lang_list_with_q;
 719   while (t.GetNext()) {
 720     std::string language = t.token();
 721     if (qvalue10 == 10) {
 722       // q=1.0 is implicit.
 723       lang_list_with_q = language;
 724     } else {
 725       DCHECK_LT(qvalue10, 10U);
 726       base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(),
 727                           qvalue10);
 728     }
 729     // It does not make sense to have 'q=0'.
 730     if (qvalue10 > kQvalueDecrement10)
 731       qvalue10 -= kQvalueDecrement10;
 732   }
 733   return lang_list_with_q;
 734 }
 735
 736 void HttpUtil::AppendHeaderIfMissing(const char* header_name,
 737                                      const std::string& header_value,
 738                                      std::string* headers) {
 739   if (header_value.empty())
 740     return;
 741   if (HttpUtil::HasHeader(*headers, header_name))
 742     return;
 743   *headers += std::string(header_name) + ": " + header_value + "\r\n";
 744 }
 745
 746 bool HttpUtil::HasStrongValidators(HttpVersion version,
 747                                    const std::string& etag_header,
 748                                    const std::string& last_modified_header,
 749                                    const std::string& date_header) {
 750   if (version < HttpVersion(1, 1))
 751     return false;
 752
 753   if (!etag_header.empty()) {
 754     size_t slash = etag_header.find('/');
 755     if (slash == std::string::npos || slash == 0)
 756       return true;
 757
 758     std::string::const_iterator i = etag_header.begin();
 759     std::string::const_iterator j = etag_header.begin() + slash;
 760     TrimLWS(&i, &j);
 761     if (!base::LowerCaseEqualsASCII(base::StringPiece(i, j), "w"))
 762       return true;
 763   }
 764
 765   base::Time last_modified;
 766   if (!base::Time::FromString(last_modified_header.c_str(), &last_modified))
 767     return false;
 768
 769   base::Time date;
 770   if (!base::Time::FromString(date_header.c_str(), &date))
 771     return false;
 772
 773   return ((date - last_modified).InSeconds() >= 60);
 774 }
 775
 776 // Functions for histogram initialization.  The code 0 is put in the map to
 777 // track status codes that are invalid.
 778 // TODO(gavinp): Greatly prune the collected codes once we learn which
 779 // ones are not sent in practice, to reduce upload size & memory use.
 780
 781 enum {
 782   HISTOGRAM_MIN_HTTP_STATUS_CODE = 100,
 783   HISTOGRAM_MAX_HTTP_STATUS_CODE = 599,
 784 };
 785
 786 // static
 787 std::vector<int> HttpUtil::GetStatusCodesForHistogram() {
 788   std::vector<int> codes;
 789   codes.reserve(
 790       HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2);
 791   codes.push_back(0);
 792   for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE;
 793        i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i)
 794     codes.push_back(i);
 795   return codes;
 796 }
 797
 798 // static
 799 int HttpUtil::MapStatusCodeForHistogram(int code) {
 800   if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code &&
 801       code <= HISTOGRAM_MAX_HTTP_STATUS_CODE)
 802     return code;
 803   return 0;
 804 }
 805
 806 // BNF from section 4.2 of RFC 2616:
 807 //
 808 //   message-header = field-name ":" [ field-value ]
 809 //   field-name     = token
 810 //   field-value    = *( field-content | LWS )
 811 //   field-content  = <the OCTETs making up the field-value
 812 //                     and consisting of either *TEXT or combinations
 813 //                     of token, separators, and quoted-string>
 814 //
 815
 816 HttpUtil::HeadersIterator::HeadersIterator(
 817     std::string::const_iterator headers_begin,
 818     std::string::const_iterator headers_end,
 819     const std::string& line_delimiter)
 820     : lines_(headers_begin, headers_end, line_delimiter) {
 821 }
 822
 823 HttpUtil::HeadersIterator::~HeadersIterator() {
 824 }
 825
 826 bool HttpUtil::HeadersIterator::GetNext() {
 827   while (lines_.GetNext()) {
 828     name_begin_ = lines_.token_begin();
 829     values_end_ = lines_.token_end();
 830
 831     std::string::const_iterator colon(std::find(name_begin_, values_end_, ':'));
 832     if (colon == values_end_)
 833       continue;  // skip malformed header
 834
 835     name_end_ = colon;
 836
 837     // If the name starts with LWS, it is an invalid line.
 838     // Leading LWS implies a line continuation, and these should have
 839     // already been joined by AssembleRawHeaders().
 840     if (name_begin_ == name_end_ || IsLWS(*name_begin_))
 841       continue;
 842
 843     TrimLWS(&name_begin_, &name_end_);
 844     if (name_begin_ == name_end_)
 845       continue;  // skip malformed header
 846
 847     values_begin_ = colon + 1;
 848     TrimLWS(&values_begin_, &values_end_);
 849
 850     // if we got a header name, then we are done.
 851     return true;
 852   }
 853   return false;
 854 }
 855
 856 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) {
 857   DCHECK(name != NULL);
 858   DCHECK_EQ(0, base::StringToLowerASCII<std::string>(name).compare(name))
 859       << "the header name must be in all lower case";
 860
 861   while (GetNext()) {
 862     if (base::LowerCaseEqualsASCII(base::StringPiece(name_begin_, name_end_),
 863                                    name)) {
 864       return true;
 865     }
 866   }
 867
 868   return false;
 869 }
 870
 871 HttpUtil::ValuesIterator::ValuesIterator(
 872     std::string::const_iterator values_begin,
 873     std::string::const_iterator values_end,
 874     char delimiter)
 875     : values_(values_begin, values_end, std::string(1, delimiter)) {
 876   values_.set_quote_chars("\'\"");
 877 }
 878
 879 HttpUtil::ValuesIterator::~ValuesIterator() {
 880 }
 881
 882 bool HttpUtil::ValuesIterator::GetNext() {
 883   while (values_.GetNext()) {
 884     value_begin_ = values_.token_begin();
 885     value_end_ = values_.token_end();
 886     TrimLWS(&value_begin_, &value_end_);
 887
 888     // bypass empty values.
 889     if (value_begin_ != value_end_)
 890       return true;
 891   }
 892   return false;
 893 }
 894
 895 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(
 896     std::string::const_iterator begin,
 897     std::string::const_iterator end,
 898     char delimiter)
 899     : props_(begin, end, delimiter),
 900       valid_(true),
 901       name_begin_(end),
 902       name_end_(end),
 903       value_begin_(end),
 904       value_end_(end),
 905       value_is_quoted_(false) {
 906 }
 907
 908 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() {}
 909
 910 // We expect properties to be formatted as one of:
 911 //   name="value"
 912 //   name='value'
 913 //   name='\'value\''
 914 //   name=value
 915 //   name = value
 916 //   name=
 917 // Due to buggy implementations found in some embedded devices, we also
 918 // accept values with missing close quotemark (http://crbug.com/39836):
 919 //   name="value
 920 bool HttpUtil::NameValuePairsIterator::GetNext() {
 921   if (!props_.GetNext())
 922     return false;
 923
 924   // Set the value as everything. Next we will split out the name.
 925   value_begin_ = props_.value_begin();
 926   value_end_ = props_.value_end();
 927   name_begin_ = name_end_ = value_end_;
 928
 929   // Scan for the equals sign.
 930   std::string::const_iterator equals = std::find(value_begin_, value_end_, '=');
 931   if (equals == value_end_ || equals == value_begin_)
 932     return valid_ = false;  // Malformed, no equals sign
 933
 934   // Verify that the equals sign we found wasn't inside of quote marks.
 935   for (std::string::const_iterator it = value_begin_; it != equals; ++it) {
 936     if (HttpUtil::IsQuote(*it))
 937       return valid_ = false;  // Malformed, quote appears before equals sign
 938   }
 939
 940   name_begin_ = value_begin_;
 941   name_end_ = equals;
 942   value_begin_ = equals + 1;
 943
 944   TrimLWS(&name_begin_, &name_end_);
 945   TrimLWS(&value_begin_, &value_end_);
 946   value_is_quoted_ = false;
 947   unquoted_value_.clear();
 948
 949   if (value_begin_ == value_end_)
 950     return valid_ = false;  // Malformed, value is empty
 951
 952   if (HttpUtil::IsQuote(*value_begin_)) {
 953     // Trim surrounding quotemarks off the value
 954     if (*value_begin_ != *(value_end_ - 1) || value_begin_ + 1 == value_end_) {
 955       // NOTE: This is not as graceful as it sounds:
 956       // * quoted-pairs will no longer be unquoted
 957       //   (["\"hello] should give ["hello]).
 958       // * Does not detect when the final quote is escaped
 959       //   (["value\"] should give [value"])
 960       ++value_begin_;  // Gracefully recover from mismatching quotes.
 961     } else {
 962       value_is_quoted_ = true;
 963       // Do not store iterators into this. See declaration of unquoted_value_.
 964       unquoted_value_ = HttpUtil::Unquote(value_begin_, value_end_);
 965     }
 966   }
 967
 968   return true;
 969 }
 970
 971 }  // namespace net