third_party/WebKit/Source/platform/network/HTTPParsers.cpp

   1 /*
   2  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
   3  * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
   4  * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
   5  * Copyright (C) 2009 Google Inc. All rights reserved.
   6  * Copyright (C) 2011 Apple Inc. All Rights Reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1.  Redistributions of source code must retain the above copyright
  13  *     notice, this list of conditions and the following disclaimer.
  14  * 2.  Redistributions in binary form must reproduce the above copyright
  15  *     notice, this list of conditions and the following disclaimer in the
  16  *     documentation and/or other materials provided with the distribution.
  17  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
  18  *     its contributors may be used to endorse or promote products derived
  19  *     from this software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
  22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  25  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  28  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32
  33 #include "config.h"
  34 #include "platform/network/HTTPParsers.h"
  35
  36 #include "wtf/DateMath.h"
  37 #include "wtf/MathExtras.h"
  38 #include "wtf/text/CString.h"
  39 #include "wtf/text/CharacterNames.h"
  40 #include "wtf/text/StringBuilder.h"
  41 #include "wtf/text/WTFString.h"
  42
  43 using namespace WTF;
  44
  45 namespace blink {
  46
  47 static bool isWhitespace(UChar chr)
  48 {
  49     return (chr == ' ') || (chr == '\t');
  50 }
  51
  52 // true if there is more to parse, after incrementing pos past whitespace.
  53 // Note: Might return pos == str.length()
  54 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
  55 {
  56     unsigned len = str.length();
  57
  58     if (fromHttpEquivMeta) {
  59         while (pos < len && str[pos] <= ' ')
  60             ++pos;
  61     } else {
  62         while (pos < len && isWhitespace(str[pos]))
  63             ++pos;
  64     }
  65
  66     return pos < len;
  67 }
  68
  69 // Returns true if the function can match the whole token (case insensitive)
  70 // incrementing pos on match, otherwise leaving pos unchanged.
  71 // Note: Might return pos == str.length()
  72 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
  73 {
  74     unsigned len = str.length();
  75     unsigned current = pos;
  76
  77     while (current < len && *token) {
  78         if (toASCIILower(str[current]) != *token++)
  79             return false;
  80         ++current;
  81     }
  82
  83     if (*token)
  84         return false;
  85
  86     pos = current;
  87     return true;
  88 }
  89
  90 // True if the expected equals sign is seen and there is more to follow.
  91 static inline bool skipEquals(const String& str, unsigned &pos)
  92 {
  93     return skipWhiteSpace(str, pos, false) && str[pos++] == '=' && skipWhiteSpace(str, pos, false);
  94 }
  95
  96 // True if a value present, incrementing pos to next space or semicolon, if any.
  97 // Note: might return pos == str.length().
  98 static inline bool skipValue(const String& str, unsigned& pos)
  99 {
 100     unsigned start = pos;
 101     unsigned len = str.length();
 102     while (pos < len) {
 103         if (str[pos] == ' ' || str[pos] == '\t' || str[pos] == ';')
 104             break;
 105         ++pos;
 106     }
 107     return pos != start;
 108 }
 109
 110 bool isValidHTTPHeaderValue(const String& name)
 111 {
 112     // FIXME: This should really match name against
 113     // field-value in section 4.2 of RFC 2616.
 114
 115     return name.containsOnlyLatin1() && !name.contains('\r') && !name.contains('\n') && !name.contains(static_cast<UChar>('\0'));
 116 }
 117
 118 // See RFC 7230, Section 3.2.
 119 // Checks whether |value| matches field-content in RFC 7230.
 120 // link: http://tools.ietf.org/html/rfc7230#section-3.2
 121 bool isValidHTTPFieldContentRFC7230(const String& value)
 122 {
 123     if (value.isEmpty())
 124         return false;
 125
 126     UChar firstCharacter = value[0];
 127     if (firstCharacter == ' ' || firstCharacter == '\t')
 128         return false;
 129
 130     UChar lastCharacter = value[value.length() - 1];
 131     if (lastCharacter == ' ' || lastCharacter == '\t')
 132         return false;
 133
 134     for (unsigned i = 0; i < value.length(); ++i) {
 135         UChar c = value[i];
 136         // TODO(mkwst): Extract this character class to a central location, https://crbug.com/527324.
 137         if (c == 0x7F || c > 0xFF || (c < 0x20 && c != '\t'))
 138             return false;
 139     }
 140
 141     return true;
 142 }
 143
 144 // See RFC 7230, Section 3.2.6.
 145 bool isValidHTTPToken(const String& characters)
 146 {
 147     if (characters.isEmpty())
 148         return false;
 149     for (unsigned i = 0; i < characters.length(); ++i) {
 150         UChar c = characters[i];
 151         if (c <= 0x20 || c >= 0x7F
 152             || c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
 153             || c == ',' || c == ';' || c == ':' || c == '\\' || c == '"'
 154             || c == '/' || c == '[' || c == ']' || c == '?' || c == '='
 155             || c == '{' || c == '}')
 156         return false;
 157     }
 158     return true;
 159 }
 160
 161 static const size_t maxInputSampleSize = 128;
 162 static String trimInputSample(const char* p, size_t length)
 163 {
 164     if (length > maxInputSampleSize)
 165         return String(p, maxInputSampleSize) + horizontalEllipsisCharacter;
 166     return String(p, length);
 167 }
 168
 169 ContentDispositionType contentDispositionType(const String& contentDisposition)
 170 {
 171     if (contentDisposition.isEmpty())
 172         return ContentDispositionNone;
 173
 174     Vector<String> parameters;
 175     contentDisposition.split(';', parameters);
 176
 177     if (parameters.isEmpty())
 178         return ContentDispositionNone;
 179
 180     String dispositionType = parameters[0];
 181     dispositionType.stripWhiteSpace();
 182
 183     if (equalIgnoringCase(dispositionType, "inline"))
 184         return ContentDispositionInline;
 185
 186     // Some broken sites just send bogus headers like
 187     //
 188     //   Content-Disposition: ; filename="file"
 189     //   Content-Disposition: filename="file"
 190     //   Content-Disposition: name="file"
 191     //
 192     // without a disposition token... screen those out.
 193     if (!isValidHTTPToken(dispositionType))
 194         return ContentDispositionNone;
 195
 196     // We have a content-disposition of "attachment" or unknown.
 197     // RFC 2183, section 2.8 says that an unknown disposition
 198     // value should be treated as "attachment"
 199     return ContentDispositionAttachment;
 200 }
 201
 202 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
 203 {
 204     unsigned len = refresh.length();
 205     unsigned pos = 0;
 206
 207     if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
 208         return false;
 209
 210     while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
 211         ++pos;
 212
 213     if (pos == len) { // no URL
 214         url = String();
 215         bool ok;
 216         delay = refresh.stripWhiteSpace().toDouble(&ok);
 217         return ok;
 218     } else {
 219         bool ok;
 220         delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
 221         if (!ok)
 222             return false;
 223
 224         ++pos;
 225         skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
 226         unsigned urlStartPos = pos;
 227         if (refresh.find("url", urlStartPos, TextCaseInsensitive) == urlStartPos) {
 228             urlStartPos += 3;
 229             skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
 230             if (refresh[urlStartPos] == '=') {
 231                 ++urlStartPos;
 232                 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
 233             } else {
 234                 urlStartPos = pos; // e.g. "Refresh: 0; url.html"
 235             }
 236         }
 237
 238         unsigned urlEndPos = len;
 239
 240         if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
 241             UChar quotationMark = refresh[urlStartPos];
 242             urlStartPos++;
 243             while (urlEndPos > urlStartPos) {
 244                 urlEndPos--;
 245                 if (refresh[urlEndPos] == quotationMark)
 246                     break;
 247             }
 248
 249             // https://bugs.webkit.org/show_bug.cgi?id=27868
 250             // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
 251             // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
 252             // after the opening quote instead.
 253             if (urlEndPos == urlStartPos)
 254                 urlEndPos = len;
 255         }
 256
 257         url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
 258         return true;
 259     }
 260 }
 261
 262 double parseDate(const String& value)
 263 {
 264     return parseDateFromNullTerminatedCharacters(value.utf8().data());
 265 }
 266
 267 // FIXME: This function doesn't comply with RFC 6266.
 268 // For example, this function doesn't handle the interaction between " and ;
 269 // that arises from quoted-string, nor does this function properly unquote
 270 // attribute values. Further this function appears to process parameter names
 271 // in a case-sensitive manner. (There are likely other bugs as well.)
 272 String filenameFromHTTPContentDisposition(const String& value)
 273 {
 274     Vector<String> keyValuePairs;
 275     value.split(';', keyValuePairs);
 276
 277     unsigned length = keyValuePairs.size();
 278     for (unsigned i = 0; i < length; i++) {
 279         size_t valueStartPos = keyValuePairs[i].find('=');
 280         if (valueStartPos == kNotFound)
 281             continue;
 282
 283         String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
 284
 285         if (key.isEmpty() || key != "filename")
 286             continue;
 287
 288         String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
 289
 290         // Remove quotes if there are any
 291         if (value[0] == '\"')
 292             value = value.substring(1, value.length() - 2);
 293
 294         return value;
 295     }
 296
 297     return String();
 298 }
 299
 300 AtomicString extractMIMETypeFromMediaType(const AtomicString& mediaType)
 301 {
 302     StringBuilder mimeType;
 303     unsigned length = mediaType.length();
 304     mimeType.reserveCapacity(length);
 305     for (unsigned i = 0; i < length; i++) {
 306         UChar c = mediaType[i];
 307
 308         if (c == ';')
 309             break;
 310
 311         // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
 312         // type header field, Content-Type. In such cases, the media type string passed here may contain
 313         // the multiple values separated by commas. For now, this code ignores text after the first comma,
 314         // which prevents it from simply failing to parse such types altogether. Later for better
 315         // compatibility we could consider using the first or last valid MIME type instead.
 316         // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
 317         if (c == ',')
 318             break;
 319
 320         // FIXME: The following is not correct. RFC 2616 allows linear white space before and
 321         // after the MIME type, but not within the MIME type itself. And linear white space
 322         // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
 323         // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
 324         if (isSpaceOrNewline(c))
 325             continue;
 326
 327         mimeType.append(c);
 328     }
 329
 330     if (mimeType.length() == length)
 331         return mediaType;
 332     return mimeType.toAtomicString();
 333 }
 334
 335 String extractCharsetFromMediaType(const String& mediaType)
 336 {
 337     unsigned pos, len;
 338     findCharsetInMediaType(mediaType, pos, len);
 339     return mediaType.substring(pos, len);
 340 }
 341
 342 void findCharsetInMediaType(const String& mediaType, unsigned& charsetPos, unsigned& charsetLen, unsigned start)
 343 {
 344     charsetPos = start;
 345     charsetLen = 0;
 346
 347     size_t pos = start;
 348     unsigned length = mediaType.length();
 349
 350     while (pos < length) {
 351         pos = mediaType.find("charset", pos, TextCaseInsensitive);
 352         if (pos == kNotFound || !pos) {
 353             charsetLen = 0;
 354             return;
 355         }
 356
 357         // is what we found a beginning of a word?
 358         if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
 359             pos += 7;
 360             continue;
 361         }
 362
 363         pos += 7;
 364
 365         // skip whitespace
 366         while (pos != length && mediaType[pos] <= ' ')
 367             ++pos;
 368
 369         if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
 370             continue;
 371
 372         while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
 373             ++pos;
 374
 375         // we don't handle spaces within quoted parameter values, because charset names cannot have any
 376         unsigned endpos = pos;
 377         while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
 378             ++endpos;
 379
 380         charsetPos = pos;
 381         charsetLen = endpos - pos;
 382         return;
 383     }
 384 }
 385
 386 ReflectedXSSDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
 387 {
 388     DEFINE_STATIC_LOCAL(String, failureReasonInvalidToggle, ("expected 0 or 1"));
 389     DEFINE_STATIC_LOCAL(String, failureReasonInvalidSeparator, ("expected semicolon"));
 390     DEFINE_STATIC_LOCAL(String, failureReasonInvalidEquals, ("expected equals sign"));
 391     DEFINE_STATIC_LOCAL(String, failureReasonInvalidMode, ("invalid mode directive"));
 392     DEFINE_STATIC_LOCAL(String, failureReasonInvalidReport, ("invalid report directive"));
 393     DEFINE_STATIC_LOCAL(String, failureReasonDuplicateMode, ("duplicate mode directive"));
 394     DEFINE_STATIC_LOCAL(String, failureReasonDuplicateReport, ("duplicate report directive"));
 395     DEFINE_STATIC_LOCAL(String, failureReasonInvalidDirective, ("unrecognized directive"));
 396
 397     unsigned pos = 0;
 398
 399     if (!skipWhiteSpace(header, pos, false))
 400         return ReflectedXSSUnset;
 401
 402     if (header[pos] == '0')
 403         return AllowReflectedXSS;
 404
 405     if (header[pos++] != '1') {
 406         failureReason = failureReasonInvalidToggle;
 407         return ReflectedXSSInvalid;
 408     }
 409
 410     ReflectedXSSDisposition result = FilterReflectedXSS;
 411     bool modeDirectiveSeen = false;
 412     bool reportDirectiveSeen = false;
 413
 414     while (1) {
 415         // At end of previous directive: consume whitespace, semicolon, and whitespace.
 416         if (!skipWhiteSpace(header, pos, false))
 417             return result;
 418
 419         if (header[pos++] != ';') {
 420             failureReason = failureReasonInvalidSeparator;
 421             failurePosition = pos;
 422             return ReflectedXSSInvalid;
 423         }
 424
 425         if (!skipWhiteSpace(header, pos, false))
 426             return result;
 427
 428         // At start of next directive.
 429         if (skipToken(header, pos, "mode")) {
 430             if (modeDirectiveSeen) {
 431                 failureReason = failureReasonDuplicateMode;
 432                 failurePosition = pos;
 433                 return ReflectedXSSInvalid;
 434             }
 435             modeDirectiveSeen = true;
 436             if (!skipEquals(header, pos)) {
 437                 failureReason = failureReasonInvalidEquals;
 438                 failurePosition = pos;
 439                 return ReflectedXSSInvalid;
 440             }
 441             if (!skipToken(header, pos, "block")) {
 442                 failureReason = failureReasonInvalidMode;
 443                 failurePosition = pos;
 444                 return ReflectedXSSInvalid;
 445             }
 446             result = BlockReflectedXSS;
 447         } else if (skipToken(header, pos, "report")) {
 448             if (reportDirectiveSeen) {
 449                 failureReason = failureReasonDuplicateReport;
 450                 failurePosition = pos;
 451                 return ReflectedXSSInvalid;
 452             }
 453             reportDirectiveSeen = true;
 454             if (!skipEquals(header, pos)) {
 455                 failureReason = failureReasonInvalidEquals;
 456                 failurePosition = pos;
 457                 return ReflectedXSSInvalid;
 458             }
 459             size_t startPos = pos;
 460             if (!skipValue(header, pos)) {
 461                 failureReason = failureReasonInvalidReport;
 462                 failurePosition = pos;
 463                 return ReflectedXSSInvalid;
 464             }
 465             reportURL = header.substring(startPos, pos - startPos);
 466             failurePosition = startPos; // If later semantic check deems unacceptable.
 467         } else {
 468             failureReason = failureReasonInvalidDirective;
 469             failurePosition = pos;
 470             return ReflectedXSSInvalid;
 471         }
 472     }
 473 }
 474
 475 ContentTypeOptionsDisposition parseContentTypeOptionsHeader(const String& header)
 476 {
 477     if (header.stripWhiteSpace().lower() == "nosniff")
 478         return ContentTypeOptionsNosniff;
 479     return ContentTypeOptionsNone;
 480 }
 481
 482 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
 483 {
 484     size_t spacePos = statusLine.find(' ');
 485     // Remove status code from the status line.
 486     spacePos = statusLine.find(' ', spacePos + 1);
 487     return statusLine.substring(spacePos + 1);
 488 }
 489
 490 XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
 491 {
 492     XFrameOptionsDisposition result = XFrameOptionsNone;
 493
 494     if (header.isEmpty())
 495         return result;
 496
 497     Vector<String> headers;
 498     header.split(',', headers);
 499
 500     for (size_t i = 0; i < headers.size(); i++) {
 501         String currentHeader = headers[i].stripWhiteSpace();
 502         XFrameOptionsDisposition currentValue = XFrameOptionsNone;
 503         if (equalIgnoringCase(currentHeader, "deny"))
 504             currentValue = XFrameOptionsDeny;
 505         else if (equalIgnoringCase(currentHeader, "sameorigin"))
 506             currentValue = XFrameOptionsSameOrigin;
 507         else if (equalIgnoringCase(currentHeader, "allowall"))
 508             currentValue = XFrameOptionsAllowAll;
 509         else
 510             currentValue = XFrameOptionsInvalid;
 511
 512         if (result == XFrameOptionsNone)
 513             result = currentValue;
 514         else if (result != currentValue)
 515             return XFrameOptionsConflict;
 516     }
 517     return result;
 518 }
 519
 520 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
 521 {
 522     // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
 523     // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
 524     // We don't support multiple range requests.
 525
 526     rangeOffset = rangeEnd = rangeSuffixLength = -1;
 527
 528     // The "bytes" unit identifier should be present.
 529     static const char bytesStart[] = "bytes=";
 530     if (!range.startsWith(bytesStart, TextCaseInsensitive))
 531         return false;
 532     String byteRange = range.substring(sizeof(bytesStart) - 1);
 533
 534     // The '-' character needs to be present.
 535     int index = byteRange.find('-');
 536     if (index == -1)
 537         return false;
 538
 539     // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
 540     // Example:
 541     //     -500
 542     if (!index) {
 543         String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
 544         bool ok;
 545         long long value = suffixLengthString.toInt64Strict(&ok);
 546         if (ok)
 547             rangeSuffixLength = value;
 548         return true;
 549     }
 550
 551     // Otherwise, the first-byte-position and the last-byte-position are provied.
 552     // Examples:
 553     //     0-499
 554     //     500-
 555     String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
 556     bool ok;
 557     long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
 558     if (!ok)
 559         return false;
 560
 561     String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
 562     long long lastBytePos = -1;
 563     if (!lastBytePosStr.isEmpty()) {
 564         lastBytePos = lastBytePosStr.toInt64Strict(&ok);
 565         if (!ok)
 566             return false;
 567     }
 568
 569     if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
 570         return false;
 571
 572     rangeOffset = firstBytePos;
 573     rangeEnd = lastBytePos;
 574     return true;
 575 }
 576
 577 // HTTP/1.1 - RFC 2616
 578 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
 579 // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
 580 size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
 581 {
 582     method = String();
 583     url = String();
 584     httpVersion = Unknown;
 585
 586     const char* space1 = 0;
 587     const char* space2 = 0;
 588     const char* p;
 589     size_t consumedLength;
 590
 591     for (p = data, consumedLength = 0; consumedLength < length; p++, consumedLength++) {
 592         if (*p == ' ') {
 593             if (!space1)
 594                 space1 = p;
 595             else if (!space2)
 596                 space2 = p;
 597         } else if (*p == '\n') {
 598             break;
 599         }
 600     }
 601
 602     // Haven't finished header line.
 603     if (consumedLength == length) {
 604         failureReason = "Incomplete Request Line";
 605         return 0;
 606     }
 607
 608     // RequestLine does not contain 3 parts.
 609     if (!space1 || !space2) {
 610         failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>.";
 611         return 0;
 612     }
 613
 614     // The line must end with "\r\n".
 615     const char* end = p + 1;
 616     if (*(end - 2) != '\r') {
 617         failureReason = "Request line does not end with CRLF";
 618         return 0;
 619     }
 620
 621     // Request Method.
 622     method = String(data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.
 623
 624     // Request URI.
 625     url = String(space1 + 1, space2 - space1 - 1); // For length subtract 1 for space.
 626
 627     // HTTP Version.
 628     String httpVersionString(space2 + 1, end - space2 - 3); // For length subtract 1 for space, and 2 for "\r\n".
 629     if (httpVersionString.length() != 8 || !httpVersionString.startsWith("HTTP/1."))
 630         httpVersion = Unknown;
 631     else if (httpVersionString[7] == '0')
 632         httpVersion = HTTP_1_0;
 633     else if (httpVersionString[7] == '1')
 634         httpVersion = HTTP_1_1;
 635     else
 636         httpVersion = Unknown;
 637
 638     return end - data;
 639 }
 640
 641 static bool parseHTTPHeaderName(const char* s, size_t start, size_t size, String& failureReason, size_t* position, AtomicString* name)
 642 {
 643     size_t nameBegin = start;
 644     for (size_t i = start; i < size; ++i) {
 645         switch (s[i]) {
 646         case '\r':
 647             failureReason = "Unexpected CR in name at " + trimInputSample(&s[nameBegin], i - nameBegin);
 648             return false;
 649         case '\n':
 650             failureReason = "Unexpected LF in name at " + trimInputSample(&s[nameBegin], i - nameBegin);
 651             return false;
 652         case ':':
 653             if (i == nameBegin) {
 654                 failureReason = "Header name is missing";
 655                 return false;
 656             }
 657             *name = AtomicString::fromUTF8(&s[nameBegin], i - nameBegin);
 658             if (name->isNull()) {
 659                 failureReason = "Invalid UTF-8 sequence in header name";
 660                 return false;
 661             }
 662             *position = i;
 663             return true;
 664         default:
 665             break;
 666         }
 667     }
 668     failureReason = "Unterminated header name";
 669     return false;
 670 }
 671
 672 static bool parseHTTPHeaderValue(const char* s, size_t start, size_t size, String& failureReason, size_t* position, AtomicString* value)
 673 {
 674     size_t i = start;
 675     for (; i < size && s[i] == ' '; ++i) {
 676     }
 677     size_t valueBegin = i;
 678
 679     for (; i < size && s[i] != '\r'; ++i) {
 680         if (s[i] == '\n') {
 681             failureReason = "Unexpected LF in value at " + trimInputSample(&s[valueBegin], i - valueBegin);
 682             return false;
 683         }
 684     }
 685     if (i == size) {
 686         failureReason = "Unterminated header value";
 687         return false;
 688     }
 689
 690     ASSERT(i < size && s[i] == '\r');
 691     if (i + 1 >= size || s[i + 1] != '\n') {
 692         failureReason = "LF doesn't follow CR after value at " + trimInputSample(&s[i + 1], size - i - 1);
 693         return false;
 694     }
 695
 696     *value = AtomicString::fromUTF8(&s[valueBegin], i - valueBegin);
 697     if (i != valueBegin && value->isNull()) {
 698         failureReason = "Invalid UTF-8 sequence in header value";
 699         return false;
 700     }
 701
 702     // 2 for strlen("\r\n")
 703     *position = i + 2;
 704     return true;
 705 }
 706
 707 // Note that the header is already parsed and re-formatted in chromium side.
 708 // We assume that the input is more restricted than RFC2616.
 709 size_t parseHTTPHeader(const char* s, size_t size, String& failureReason, AtomicString& name, AtomicString& value)
 710 {
 711     name = nullAtom;
 712     value = nullAtom;
 713     if (size >= 1 && s[0] == '\r') {
 714         if (size >= 2 && s[1] == '\n') {
 715             // Skip an empty line.
 716             return 2;
 717         }
 718         failureReason = "LF doesn't follow CR at " + trimInputSample(0, size);
 719         return 0;
 720     }
 721     size_t current = 0;
 722     if (!parseHTTPHeaderName(s, current, size, failureReason, &current, &name)) {
 723         return 0;
 724     }
 725     ASSERT(s[current] == ':');
 726     ++current;
 727
 728     if (!parseHTTPHeaderValue(s, current, size, failureReason, &current, &value)) {
 729         return 0;
 730     }
 731
 732     return current;
 733 }
 734
 735 size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
 736 {
 737     body.clear();
 738     body.append(data, length);
 739
 740     return length;
 741 }
 742
 743 static bool isCacheHeaderSeparator(UChar c)
 744 {
 745     // See RFC 2616, Section 2.2
 746     switch (c) {
 747     case '(':
 748     case ')':
 749     case '<':
 750     case '>':
 751     case '@':
 752     case ',':
 753     case ';':
 754     case ':':
 755     case '\\':
 756     case '"':
 757     case '/':
 758     case '[':
 759     case ']':
 760     case '?':
 761     case '=':
 762     case '{':
 763     case '}':
 764     case ' ':
 765     case '\t':
 766         return true;
 767     default:
 768         return false;
 769     }
 770 }
 771
 772 static bool isControlCharacter(UChar c)
 773 {
 774     return c < ' ' || c == 127;
 775 }
 776
 777 static inline String trimToNextSeparator(const String& str)
 778 {
 779     return str.substring(0, str.find(isCacheHeaderSeparator));
 780 }
 781
 782 static void parseCacheHeader(const String& header, Vector<pair<String, String>>& result)
 783 {
 784     const String safeHeader = header.removeCharacters(isControlCharacter);
 785     unsigned max = safeHeader.length();
 786     for (unsigned pos = 0; pos < max; /* pos incremented in loop */) {
 787         size_t nextCommaPosition = safeHeader.find(',', pos);
 788         size_t nextEqualSignPosition = safeHeader.find('=', pos);
 789         if (nextEqualSignPosition != kNotFound && (nextEqualSignPosition < nextCommaPosition || nextCommaPosition == kNotFound)) {
 790             // Get directive name, parse right hand side of equal sign, then add to map
 791             String directive = trimToNextSeparator(safeHeader.substring(pos, nextEqualSignPosition - pos).stripWhiteSpace());
 792             pos += nextEqualSignPosition - pos + 1;
 793
 794             String value = safeHeader.substring(pos, max - pos).stripWhiteSpace();
 795             if (value[0] == '"') {
 796                 // The value is a quoted string
 797                 size_t nextDoubleQuotePosition = value.find('"', 1);
 798                 if (nextDoubleQuotePosition != kNotFound) {
 799                     // Store the value as a quoted string without quotes
 800                     result.append(pair<String, String>(directive, value.substring(1, nextDoubleQuotePosition - 1).stripWhiteSpace()));
 801                     pos += (safeHeader.find('"', pos) - pos) + nextDoubleQuotePosition + 1;
 802                     // Move past next comma, if there is one
 803                     size_t nextCommaPosition2 = safeHeader.find(',', pos);
 804                     if (nextCommaPosition2 != kNotFound)
 805                         pos += nextCommaPosition2 - pos + 1;
 806                     else
 807                         return; // Parse error if there is anything left with no comma
 808                 } else {
 809                     // Parse error; just use the rest as the value
 810                     result.append(pair<String, String>(directive, trimToNextSeparator(value.substring(1, value.length() - 1).stripWhiteSpace())));
 811                     return;
 812                 }
 813             } else {
 814                 // The value is a token until the next comma
 815                 size_t nextCommaPosition2 = value.find(',');
 816                 if (nextCommaPosition2 != kNotFound) {
 817                     // The value is delimited by the next comma
 818                     result.append(pair<String, String>(directive, trimToNextSeparator(value.substring(0, nextCommaPosition2).stripWhiteSpace())));
 819                     pos += (safeHeader.find(',', pos) - pos) + 1;
 820                 } else {
 821                     // The rest is the value; no change to value needed
 822                     result.append(pair<String, String>(directive, trimToNextSeparator(value)));
 823                     return;
 824                 }
 825             }
 826         } else if (nextCommaPosition != kNotFound && (nextCommaPosition < nextEqualSignPosition || nextEqualSignPosition == kNotFound)) {
 827             // Add directive to map with empty string as value
 828             result.append(pair<String, String>(trimToNextSeparator(safeHeader.substring(pos, nextCommaPosition - pos).stripWhiteSpace()), ""));
 829             pos += nextCommaPosition - pos + 1;
 830         } else {
 831             // Add last directive to map with empty string as value
 832             result.append(pair<String, String>(trimToNextSeparator(safeHeader.substring(pos, max - pos).stripWhiteSpace()), ""));
 833             return;
 834         }
 835     }
 836 }
 837
 838 CacheControlHeader parseCacheControlDirectives(const AtomicString& cacheControlValue, const AtomicString& pragmaValue)
 839 {
 840     CacheControlHeader cacheControlHeader;
 841     cacheControlHeader.parsed = true;
 842     cacheControlHeader.maxAge = std::numeric_limits<double>::quiet_NaN();
 843     cacheControlHeader.staleWhileRevalidate = std::numeric_limits<double>::quiet_NaN();
 844
 845     DEFINE_STATIC_LOCAL(const AtomicString, noCacheDirective, ("no-cache", AtomicString::ConstructFromLiteral));
 846     DEFINE_STATIC_LOCAL(const AtomicString, noStoreDirective, ("no-store", AtomicString::ConstructFromLiteral));
 847     DEFINE_STATIC_LOCAL(const AtomicString, mustRevalidateDirective, ("must-revalidate", AtomicString::ConstructFromLiteral));
 848     DEFINE_STATIC_LOCAL(const AtomicString, maxAgeDirective, ("max-age", AtomicString::ConstructFromLiteral));
 849     DEFINE_STATIC_LOCAL(const AtomicString, staleWhileRevalidateDirective, ("stale-while-revalidate", AtomicString::ConstructFromLiteral));
 850
 851     if (!cacheControlValue.isEmpty()) {
 852         Vector<pair<String, String>> directives;
 853         parseCacheHeader(cacheControlValue, directives);
 854
 855         size_t directivesSize = directives.size();
 856         for (size_t i = 0; i < directivesSize; ++i) {
 857             // RFC2616 14.9.1: A no-cache directive with a value is only meaningful for proxy caches.
 858             // It should be ignored by a browser level cache.
 859             if (equalIgnoringCase(directives[i].first, noCacheDirective) && directives[i].second.isEmpty()) {
 860                 cacheControlHeader.containsNoCache = true;
 861             } else if (equalIgnoringCase(directives[i].first, noStoreDirective)) {
 862                 cacheControlHeader.containsNoStore = true;
 863             } else if (equalIgnoringCase(directives[i].first, mustRevalidateDirective)) {
 864                 cacheControlHeader.containsMustRevalidate = true;
 865             } else if (equalIgnoringCase(directives[i].first, maxAgeDirective)) {
 866                 if (!std::isnan(cacheControlHeader.maxAge)) {
 867                     // First max-age directive wins if there are multiple ones.
 868                     continue;
 869                 }
 870                 bool ok;
 871                 double maxAge = directives[i].second.toDouble(&ok);
 872                 if (ok)
 873                     cacheControlHeader.maxAge = maxAge;
 874             } else if (equalIgnoringCase(directives[i].first, staleWhileRevalidateDirective)) {
 875                 if (!std::isnan(cacheControlHeader.staleWhileRevalidate)) {
 876                     // First stale-while-revalidate directive wins if there are multiple ones.
 877                     continue;
 878                 }
 879                 bool ok;
 880                 double staleWhileRevalidate = directives[i].second.toDouble(&ok);
 881                 if (ok)
 882                     cacheControlHeader.staleWhileRevalidate = staleWhileRevalidate;
 883             }
 884         }
 885     }
 886
 887     if (!cacheControlHeader.containsNoCache) {
 888         // Handle Pragma: no-cache
 889         // This is deprecated and equivalent to Cache-control: no-cache
 890         // Don't bother tokenizing the value, it is not important
 891         cacheControlHeader.containsNoCache = pragmaValue.lower().contains(noCacheDirective);
 892     }
 893     return cacheControlHeader;
 894 }
 895
 896 void parseCommaDelimitedHeader(const String& headerValue, CommaDelimitedHeaderSet& headerSet)
 897 {
 898     Vector<String> results;
 899     headerValue.split(",", results);
 900     for (auto& value : results)
 901         headerSet.add(value.stripWhiteSpace(isWhitespace));
 902 }
 903
 904 }