dom/base/BodyUtil.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "BodyUtil.h"
   8
   9 #include "nsError.h"
  10 #include "nsString.h"
  11 #include "nsIGlobalObject.h"
  12 #include "mozilla/Encoding.h"
  13 #include "mozilla/dom/MimeType.h"
  14 #include "nsCRT.h"
  15 #include "nsCharSeparatedTokenizer.h"
  16 #include "nsDOMString.h"
  17 #include "nsNetUtil.h"
  18 #include "nsReadableUtils.h"
  19 #include "nsStreamUtils.h"
  20 #include "nsStringStream.h"
  21 #include "nsURLHelper.h"
  22
  23 #include "js/ArrayBuffer.h"  // JS::NewArrayBufferWithContents
  24 #include "js/JSON.h"
  25 #include "mozilla/ErrorResult.h"
  26 #include "mozilla/dom/Exceptions.h"
  27 #include "mozilla/dom/FetchUtil.h"
  28 #include "mozilla/dom/File.h"
  29 #include "mozilla/dom/FormData.h"
  30 #include "mozilla/dom/Headers.h"
  31 #include "mozilla/dom/Promise.h"
  32
  33 namespace mozilla::dom {
  34
  35 namespace {
  36
  37 // Reads over a CRLF and positions start after it.
  38 static bool PushOverLine(nsACString::const_iterator& aStart,
  39                          const nsACString::const_iterator& aEnd) {
  40   if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
  41     ++aStart;  // advance to after CRLF
  42     return true;
  43   }
  44
  45   return false;
  46 }
  47
  48 /**
  49  * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
  50  * This does not respect any encoding specified per entry, using UTF-8
  51  * throughout. This is as the Fetch spec states in the consume body algorithm.
  52  * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
  53  * unlike Necko we do not have to deal with receiving incomplete chunks of data.
  54  *
  55  * This parser will fail the entire parse on any invalid entry, so it will
  56  * never return a partially filled FormData.
  57  * The content-disposition header is used to figure out the name and filename
  58  * entries. The inclusion of the filename parameter decides if the entry is
  59  * inserted into the FormData as a string or a File.
  60  *
  61  * File blobs are copies of the underlying data string since we cannot adopt
  62  * char* chunks embedded within the larger body without significant effort.
  63  * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
  64  * friends to figure out if Fetch ends up copying big blobs to see if this is
  65  * worth optimizing.
  66  */
  67 class MOZ_STACK_CLASS FormDataParser {
  68  private:
  69   RefPtr<FormData> mFormData;
  70   nsCString mMimeType;
  71   nsCString mMixedCaseMimeType;
  72   nsCString mData;
  73
  74   // Entry state, reset in START_PART.
  75   nsCString mName;
  76   nsCString mFilename;
  77   nsCString mContentType;
  78
  79   enum {
  80     START_PART,
  81     PARSE_HEADER,
  82     PARSE_BODY,
  83   } mState;
  84
  85   nsIGlobalObject* mParentObject;
  86
  87   // Reads over a boundary and sets start to the position after the end of the
  88   // boundary. Returns false if no boundary is found immediately.
  89   bool PushOverBoundary(const nsACString& aBoundaryString,
  90                         nsACString::const_iterator& aStart,
  91                         nsACString::const_iterator& aEnd) {
  92     // We copy the end iterator to keep the original pointing to the real end
  93     // of the string.
  94     nsACString::const_iterator end(aEnd);
  95     const char* beginning = aStart.get();
  96     if (FindInReadable(aBoundaryString, aStart, end)) {
  97       // We either should find the body immediately, or after 2 chars with the
  98       // 2 chars being '-', everything else is failure.
  99       if ((aStart.get() - beginning) == 0) {
 100         aStart.advance(aBoundaryString.Length());
 101         return true;
 102       }
 103
 104       if ((aStart.get() - beginning) == 2) {
 105         if (*(--aStart) == '-' && *(--aStart) == '-') {
 106           aStart.advance(aBoundaryString.Length() + 2);
 107           return true;
 108         }
 109       }
 110     }
 111
 112     return false;
 113   }
 114
 115   bool ParseHeader(nsACString::const_iterator& aStart,
 116                    nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
 117     nsAutoCString headerName, headerValue;
 118     if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
 119                                   aWasEmptyHeader)) {
 120       return false;
 121     }
 122     if (*aWasEmptyHeader) {
 123       return true;
 124     }
 125
 126     if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
 127       bool seenFormData = false;
 128       for (const nsACString& token :
 129            nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
 130         if (token.IsEmpty()) {
 131           continue;
 132         }
 133
 134         if (token.EqualsLiteral("form-data")) {
 135           seenFormData = true;
 136           continue;
 137         }
 138
 139         if (seenFormData && StringBeginsWith(token, "name="_ns)) {
 140           mName = StringTail(token, token.Length() - 5);
 141           mName.Trim(" \"");
 142           continue;
 143         }
 144
 145         if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
 146           mFilename = StringTail(token, token.Length() - 9);
 147           mFilename.Trim(" \"");
 148           continue;
 149         }
 150       }
 151
 152       if (mName.IsVoid()) {
 153         // Could not parse a valid entry name.
 154         return false;
 155       }
 156     } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
 157       mContentType = headerValue;
 158     }
 159
 160     return true;
 161   }
 162
 163   // The end of a body is marked by a CRLF followed by the boundary. So the
 164   // CRLF is part of the boundary and not the body, but any prior CRLFs are
 165   // part of the body. This will position the iterator at the beginning of the
 166   // boundary (after the CRLF).
 167   bool ParseBody(const nsACString& aBoundaryString,
 168                  nsACString::const_iterator& aStart,
 169                  nsACString::const_iterator& aEnd) {
 170     const char* beginning = aStart.get();
 171
 172     // Find the boundary marking the end of the body.
 173     nsACString::const_iterator end(aEnd);
 174     if (!FindInReadable(aBoundaryString, aStart, end)) {
 175       return false;
 176     }
 177
 178     // We found a boundary, strip the just prior CRLF, and consider
 179     // everything else the body section.
 180     if (aStart.get() - beginning < 2) {
 181       // Only the first entry can have a boundary right at the beginning. Even
 182       // an empty body will have a CRLF before the boundary. So this is
 183       // a failure.
 184       return false;
 185     }
 186
 187     // Check that there is a CRLF right before the boundary.
 188     aStart.advance(-2);
 189
 190     // Skip optional hyphens.
 191     if (*aStart == '-' && *(aStart.get() + 1) == '-') {
 192       if (aStart.get() - beginning < 2) {
 193         return false;
 194       }
 195
 196       aStart.advance(-2);
 197     }
 198
 199     if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
 200       return false;
 201     }
 202
 203     nsAutoCString body(beginning, aStart.get() - beginning);
 204
 205     // Restore iterator to after the \r\n as we promised.
 206     // We do not need to handle the extra hyphens case since our boundary
 207     // parser in PushOverBoundary()
 208     aStart.advance(2);
 209
 210     if (!mFormData) {
 211       mFormData = new FormData();
 212     }
 213
 214     NS_ConvertUTF8toUTF16 name(mName);
 215
 216     if (mFilename.IsVoid()) {
 217       ErrorResult rv;
 218       mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
 219       MOZ_ASSERT(!rv.Failed());
 220     } else {
 221       // Unfortunately we've to copy the data first since all our strings are
 222       // going to free it. We also need fallible alloc, so we can't just use
 223       // ToNewCString().
 224       char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
 225       nsCString::const_iterator bodyIter, bodyEnd;
 226       body.BeginReading(bodyIter);
 227       body.EndReading(bodyEnd);
 228       char* p = copy;
 229       while (bodyIter != bodyEnd) {
 230         *p++ = *bodyIter++;
 231       }
 232       p = nullptr;
 233
 234       RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
 235           mParentObject, reinterpret_cast<void*>(copy), body.Length(),
 236           NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
 237           /* aLastModifiedDate */ 0);
 238       if (NS_WARN_IF(!file)) {
 239         return false;
 240       }
 241
 242       Optional<nsAString> dummy;
 243       ErrorResult rv;
 244       mFormData->Append(name, *file, dummy, rv);
 245       if (NS_WARN_IF(rv.Failed())) {
 246         rv.SuppressException();
 247         return false;
 248       }
 249     }
 250
 251     return true;
 252   }
 253
 254  public:
 255   FormDataParser(const nsACString& aMimeType,
 256                  const nsACString& aMixedCaseMimeType, const nsACString& aData,
 257                  nsIGlobalObject* aParent)
 258       : mMimeType(aMimeType),
 259         mMixedCaseMimeType(aMixedCaseMimeType),
 260         mData(aData),
 261         mState(START_PART),
 262         mParentObject(aParent) {}
 263
 264   bool Parse() {
 265     if (mData.IsEmpty()) {
 266       return false;
 267     }
 268
 269     // Determine boundary from mimetype.
 270     RefPtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType);
 271     if (!parsed) {
 272       return false;
 273     }
 274
 275     nsAutoCString boundaryString;
 276     if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) {
 277       return false;
 278     }
 279
 280     nsACString::const_iterator start, end;
 281     mData.BeginReading(start);
 282     // This should ALWAYS point to the end of data.
 283     // Helpers make copies.
 284     mData.EndReading(end);
 285
 286     while (start != end) {
 287       switch (mState) {
 288         case START_PART:
 289           mName.SetIsVoid(true);
 290           mFilename.SetIsVoid(true);
 291           mContentType = "text/plain"_ns;
 292
 293           while (start != end && NS_IsHTTPWhitespace(*start)) {
 294             ++start;
 295           }
 296
 297           // MUST start with boundary.
 298           if (!PushOverBoundary(boundaryString, start, end)) {
 299             return false;
 300           }
 301
 302           if (start != end && *start == '-') {
 303             // End of data.
 304             if (!mFormData) {
 305               mFormData = new FormData();
 306             }
 307             return true;
 308           }
 309
 310           if (!PushOverLine(start, end)) {
 311             return false;
 312           }
 313           mState = PARSE_HEADER;
 314           break;
 315
 316         case PARSE_HEADER:
 317           bool emptyHeader;
 318           if (!ParseHeader(start, end, &emptyHeader)) {
 319             return false;
 320           }
 321
 322           if (emptyHeader && !PushOverLine(start, end)) {
 323             return false;
 324           }
 325
 326           mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
 327           break;
 328
 329         case PARSE_BODY:
 330           if (mName.IsVoid()) {
 331             NS_WARNING(
 332                 "No content-disposition header with a valid name was "
 333                 "found. Failing at body parse.");
 334             return false;
 335           }
 336
 337           if (!ParseBody(boundaryString, start, end)) {
 338             return false;
 339           }
 340
 341           mState = START_PART;
 342           break;
 343
 344         default:
 345           MOZ_CRASH("Invalid case");
 346       }
 347     }
 348
 349     MOZ_ASSERT_UNREACHABLE("Should never reach here.");
 350     return false;
 351   }
 352
 353   already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
 354 };
 355 }  // namespace
 356
 357 // static
 358 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
 359                                   JS::MutableHandle<JSObject*> aValue,
 360                                   uint32_t aInputLength,
 361                                   UniquePtr<uint8_t[], JS::FreePolicy> aInput,
 362                                   ErrorResult& aRv) {
 363   aRv.MightThrowJSException();
 364
 365   JS::Rooted<JSObject*> arrayBuffer(aCx);
 366   arrayBuffer =
 367       JS::NewArrayBufferWithContents(aCx, aInputLength, std::move(aInput));
 368   if (!arrayBuffer) {
 369     aRv.StealExceptionFromJSContext(aCx);
 370     return;
 371   }
 372   aValue.set(arrayBuffer);
 373 }
 374
 375 // static
 376 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
 377                                              const nsString& aMimeType,
 378                                              uint32_t aInputLength,
 379                                              uint8_t* aInput,
 380                                              ErrorResult& aRv) {
 381   RefPtr<Blob> blob = Blob::CreateMemoryBlob(
 382       aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
 383
 384   if (!blob) {
 385     aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
 386     return nullptr;
 387   }
 388   return blob.forget();
 389 }
 390
 391 // static
 392 void BodyUtil::ConsumeBytes(JSContext* aCx, JS::MutableHandle<JSObject*> aValue,
 393                             uint32_t aInputLength,
 394                             UniquePtr<uint8_t[], JS::FreePolicy> aInput,
 395                             ErrorResult& aRv) {
 396   aRv.MightThrowJSException();
 397
 398   JS::Rooted<JSObject*> arrayBuffer(aCx);
 399   ConsumeArrayBuffer(aCx, &arrayBuffer, aInputLength, std::move(aInput), aRv);
 400   if (aRv.Failed()) {
 401     return;
 402   }
 403
 404   JS::Rooted<JSObject*> bytes(
 405       aCx, JS_NewUint8ArrayWithBuffer(aCx, arrayBuffer, 0, aInputLength));
 406   if (!bytes) {
 407     aRv.StealExceptionFromJSContext(aCx);
 408     return;
 409   }
 410   aValue.set(bytes);
 411 }
 412
 413 // static
 414 already_AddRefed<FormData> BodyUtil::ConsumeFormData(
 415     nsIGlobalObject* aParent, const nsCString& aMimeType,
 416     const nsACString& aMixedCaseMimeType, const nsCString& aStr,
 417     ErrorResult& aRv) {
 418   constexpr auto formDataMimeType = "multipart/form-data"_ns;
 419
 420   // Allow semicolon separated boundary/encoding suffix like
 421   // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
 422   bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
 423
 424   if (isValidFormDataMimeType &&
 425       aMimeType.Length() > formDataMimeType.Length()) {
 426     isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
 427   }
 428
 429   if (isValidFormDataMimeType) {
 430     FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent);
 431     if (!parser.Parse()) {
 432       aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
 433       return nullptr;
 434     }
 435
 436     RefPtr<FormData> fd = parser.GetFormData();
 437     MOZ_ASSERT(fd);
 438     return fd.forget();
 439   }
 440
 441   constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
 442   bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
 443
 444   if (isValidUrlEncodedMimeType &&
 445       aMimeType.Length() > urlDataMimeType.Length()) {
 446     isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
 447   }
 448
 449   if (isValidUrlEncodedMimeType) {
 450     RefPtr<FormData> fd = new FormData(aParent);
 451     DebugOnly<bool> status = URLParams::Parse(
 452         aStr, true, [&fd](const nsACString& aName, const nsACString& aValue) {
 453           IgnoredErrorResult rv;
 454           fd->Append(NS_ConvertUTF8toUTF16(aName),
 455                      NS_ConvertUTF8toUTF16(aValue), rv);
 456           MOZ_ASSERT(!rv.Failed());
 457           return true;
 458         });
 459     MOZ_ASSERT(status);
 460
 461     return fd.forget();
 462   }
 463
 464   aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
 465   return nullptr;
 466 }
 467
 468 // static
 469 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
 470                                nsString& aText) {
 471   nsresult rv =
 472       UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
 473   if (NS_FAILED(rv)) {
 474     return rv;
 475   }
 476   return NS_OK;
 477 }
 478
 479 // static
 480 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
 481                            const nsString& aStr, ErrorResult& aRv) {
 482   aRv.MightThrowJSException();
 483
 484   JS::Rooted<JS::Value> json(aCx);
 485   if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
 486     if (!JS_IsExceptionPending(aCx)) {
 487       aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
 488       return;
 489     }
 490
 491     JS::Rooted<JS::Value> exn(aCx);
 492     DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
 493     MOZ_ASSERT(gotException);
 494
 495     JS_ClearPendingException(aCx);
 496     aRv.ThrowJSException(aCx, exn);
 497     return;
 498   }
 499
 500   aValue.set(json);
 501 }
 502
 503 }  // namespace mozilla::dom