Backed out changeset 9d8b4c0b99ed (bug 1945683) for causing btime failures. CLOSED...
[gecko.git] / dom / base / BodyUtil.cpp
blobc6bb575a66450869245e14399a3e482413bfd3d9
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "BodyUtil.h"
9 #include "nsError.h"
10 #include "nsString.h"
11 #include "nsIGlobalObject.h"
12 #include "mozilla/Encoding.h"
13 #include "mozilla/dom/MimeType.h"
14 #include "nsCRT.h"
15 #include "nsCharSeparatedTokenizer.h"
16 #include "nsDOMString.h"
17 #include "nsNetUtil.h"
18 #include "nsReadableUtils.h"
19 #include "nsStreamUtils.h"
20 #include "nsStringStream.h"
21 #include "nsURLHelper.h"
23 #include "js/ArrayBuffer.h" // JS::NewArrayBufferWithContents
24 #include "js/JSON.h"
25 #include "mozilla/ErrorResult.h"
26 #include "mozilla/dom/Exceptions.h"
27 #include "mozilla/dom/FetchUtil.h"
28 #include "mozilla/dom/File.h"
29 #include "mozilla/dom/FormData.h"
30 #include "mozilla/dom/Headers.h"
31 #include "mozilla/dom/Promise.h"
33 namespace mozilla::dom {
35 namespace {
37 // Reads over a CRLF and positions start after it.
38 static bool PushOverLine(nsACString::const_iterator& aStart,
39 const nsACString::const_iterator& aEnd) {
40 if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
41 ++aStart; // advance to after CRLF
42 return true;
45 return false;
48 /**
49 * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
50 * This does not respect any encoding specified per entry, using UTF-8
51 * throughout. This is as the Fetch spec states in the consume body algorithm.
52 * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
53 * unlike Necko we do not have to deal with receiving incomplete chunks of data.
55 * This parser will fail the entire parse on any invalid entry, so it will
56 * never return a partially filled FormData.
57 * The content-disposition header is used to figure out the name and filename
58 * entries. The inclusion of the filename parameter decides if the entry is
59 * inserted into the FormData as a string or a File.
61 * File blobs are copies of the underlying data string since we cannot adopt
62 * char* chunks embedded within the larger body without significant effort.
63 * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
64 * friends to figure out if Fetch ends up copying big blobs to see if this is
65 * worth optimizing.
67 class MOZ_STACK_CLASS FormDataParser {
68 private:
69 RefPtr<FormData> mFormData;
70 nsCString mMimeType;
71 nsCString mMixedCaseMimeType;
72 nsCString mData;
74 // Entry state, reset in START_PART.
75 nsCString mName;
76 nsCString mFilename;
77 nsCString mContentType;
79 enum {
80 START_PART,
81 PARSE_HEADER,
82 PARSE_BODY,
83 } mState;
85 nsIGlobalObject* mParentObject;
87 // Reads over a boundary and sets start to the position after the end of the
88 // boundary. Returns false if no boundary is found immediately.
89 bool PushOverBoundary(const nsACString& aBoundaryString,
90 nsACString::const_iterator& aStart,
91 nsACString::const_iterator& aEnd) {
92 // We copy the end iterator to keep the original pointing to the real end
93 // of the string.
94 nsACString::const_iterator end(aEnd);
95 const char* beginning = aStart.get();
96 if (FindInReadable(aBoundaryString, aStart, end)) {
97 // We either should find the body immediately, or after 2 chars with the
98 // 2 chars being '-', everything else is failure.
99 if ((aStart.get() - beginning) == 0) {
100 aStart.advance(aBoundaryString.Length());
101 return true;
104 if ((aStart.get() - beginning) == 2) {
105 if (*(--aStart) == '-' && *(--aStart) == '-') {
106 aStart.advance(aBoundaryString.Length() + 2);
107 return true;
112 return false;
115 bool ParseHeader(nsACString::const_iterator& aStart,
116 nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
117 nsAutoCString headerName, headerValue;
118 if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
119 aWasEmptyHeader)) {
120 return false;
122 if (*aWasEmptyHeader) {
123 return true;
126 if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
127 bool seenFormData = false;
128 for (const nsACString& token :
129 nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
130 if (token.IsEmpty()) {
131 continue;
134 if (token.EqualsLiteral("form-data")) {
135 seenFormData = true;
136 continue;
139 if (seenFormData && StringBeginsWith(token, "name="_ns)) {
140 mName = StringTail(token, token.Length() - 5);
141 mName.Trim(" \"");
142 continue;
145 if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
146 mFilename = StringTail(token, token.Length() - 9);
147 mFilename.Trim(" \"");
148 continue;
152 if (mName.IsVoid()) {
153 // Could not parse a valid entry name.
154 return false;
156 } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
157 mContentType = headerValue;
160 return true;
163 // The end of a body is marked by a CRLF followed by the boundary. So the
164 // CRLF is part of the boundary and not the body, but any prior CRLFs are
165 // part of the body. This will position the iterator at the beginning of the
166 // boundary (after the CRLF).
167 bool ParseBody(const nsACString& aBoundaryString,
168 nsACString::const_iterator& aStart,
169 nsACString::const_iterator& aEnd) {
170 const char* beginning = aStart.get();
172 // Find the boundary marking the end of the body.
173 nsACString::const_iterator end(aEnd);
174 if (!FindInReadable(aBoundaryString, aStart, end)) {
175 return false;
178 // We found a boundary, strip the just prior CRLF, and consider
179 // everything else the body section.
180 if (aStart.get() - beginning < 2) {
181 // Only the first entry can have a boundary right at the beginning. Even
182 // an empty body will have a CRLF before the boundary. So this is
183 // a failure.
184 return false;
187 // Check that there is a CRLF right before the boundary.
188 aStart.advance(-2);
190 // Skip optional hyphens.
191 if (*aStart == '-' && *(aStart.get() + 1) == '-') {
192 if (aStart.get() - beginning < 2) {
193 return false;
196 aStart.advance(-2);
199 if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
200 return false;
203 nsAutoCString body(beginning, aStart.get() - beginning);
205 // Restore iterator to after the \r\n as we promised.
206 // We do not need to handle the extra hyphens case since our boundary
207 // parser in PushOverBoundary()
208 aStart.advance(2);
210 if (!mFormData) {
211 mFormData = new FormData();
214 NS_ConvertUTF8toUTF16 name(mName);
216 if (mFilename.IsVoid()) {
217 ErrorResult rv;
218 mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
219 MOZ_ASSERT(!rv.Failed());
220 } else {
221 // Unfortunately we've to copy the data first since all our strings are
222 // going to free it. We also need fallible alloc, so we can't just use
223 // ToNewCString().
224 char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
225 nsCString::const_iterator bodyIter, bodyEnd;
226 body.BeginReading(bodyIter);
227 body.EndReading(bodyEnd);
228 char* p = copy;
229 while (bodyIter != bodyEnd) {
230 *p++ = *bodyIter++;
232 p = nullptr;
234 RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
235 mParentObject, reinterpret_cast<void*>(copy), body.Length(),
236 NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
237 /* aLastModifiedDate */ 0);
238 if (NS_WARN_IF(!file)) {
239 return false;
242 Optional<nsAString> dummy;
243 ErrorResult rv;
244 mFormData->Append(name, *file, dummy, rv);
245 if (NS_WARN_IF(rv.Failed())) {
246 rv.SuppressException();
247 return false;
251 return true;
254 public:
255 FormDataParser(const nsACString& aMimeType,
256 const nsACString& aMixedCaseMimeType, const nsACString& aData,
257 nsIGlobalObject* aParent)
258 : mMimeType(aMimeType),
259 mMixedCaseMimeType(aMixedCaseMimeType),
260 mData(aData),
261 mState(START_PART),
262 mParentObject(aParent) {}
264 bool Parse() {
265 if (mData.IsEmpty()) {
266 return false;
269 // Determine boundary from mimetype.
270 RefPtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType);
271 if (!parsed) {
272 return false;
275 nsAutoCString boundaryString;
276 if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) {
277 return false;
280 nsACString::const_iterator start, end;
281 mData.BeginReading(start);
282 // This should ALWAYS point to the end of data.
283 // Helpers make copies.
284 mData.EndReading(end);
286 while (start != end) {
287 switch (mState) {
288 case START_PART:
289 mName.SetIsVoid(true);
290 mFilename.SetIsVoid(true);
291 mContentType = "text/plain"_ns;
293 while (start != end && NS_IsHTTPWhitespace(*start)) {
294 ++start;
297 // MUST start with boundary.
298 if (!PushOverBoundary(boundaryString, start, end)) {
299 return false;
302 if (start != end && *start == '-') {
303 // End of data.
304 if (!mFormData) {
305 mFormData = new FormData();
307 return true;
310 if (!PushOverLine(start, end)) {
311 return false;
313 mState = PARSE_HEADER;
314 break;
316 case PARSE_HEADER:
317 bool emptyHeader;
318 if (!ParseHeader(start, end, &emptyHeader)) {
319 return false;
322 if (emptyHeader && !PushOverLine(start, end)) {
323 return false;
326 mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
327 break;
329 case PARSE_BODY:
330 if (mName.IsVoid()) {
331 NS_WARNING(
332 "No content-disposition header with a valid name was "
333 "found. Failing at body parse.");
334 return false;
337 if (!ParseBody(boundaryString, start, end)) {
338 return false;
341 mState = START_PART;
342 break;
344 default:
345 MOZ_CRASH("Invalid case");
349 MOZ_ASSERT_UNREACHABLE("Should never reach here.");
350 return false;
353 already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
355 } // namespace
357 // static
358 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
359 JS::MutableHandle<JSObject*> aValue,
360 uint32_t aInputLength,
361 UniquePtr<uint8_t[], JS::FreePolicy> aInput,
362 ErrorResult& aRv) {
363 aRv.MightThrowJSException();
365 JS::Rooted<JSObject*> arrayBuffer(aCx);
366 arrayBuffer =
367 JS::NewArrayBufferWithContents(aCx, aInputLength, std::move(aInput));
368 if (!arrayBuffer) {
369 aRv.StealExceptionFromJSContext(aCx);
370 return;
372 aValue.set(arrayBuffer);
375 // static
376 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
377 const nsString& aMimeType,
378 uint32_t aInputLength,
379 uint8_t* aInput,
380 ErrorResult& aRv) {
381 RefPtr<Blob> blob = Blob::CreateMemoryBlob(
382 aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
384 if (!blob) {
385 aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
386 return nullptr;
388 return blob.forget();
391 // static
392 void BodyUtil::ConsumeBytes(JSContext* aCx, JS::MutableHandle<JSObject*> aValue,
393 uint32_t aInputLength,
394 UniquePtr<uint8_t[], JS::FreePolicy> aInput,
395 ErrorResult& aRv) {
396 aRv.MightThrowJSException();
398 JS::Rooted<JSObject*> arrayBuffer(aCx);
399 ConsumeArrayBuffer(aCx, &arrayBuffer, aInputLength, std::move(aInput), aRv);
400 if (aRv.Failed()) {
401 return;
404 JS::Rooted<JSObject*> bytes(
405 aCx, JS_NewUint8ArrayWithBuffer(aCx, arrayBuffer, 0, aInputLength));
406 if (!bytes) {
407 aRv.StealExceptionFromJSContext(aCx);
408 return;
410 aValue.set(bytes);
413 // static
414 already_AddRefed<FormData> BodyUtil::ConsumeFormData(
415 nsIGlobalObject* aParent, const nsCString& aMimeType,
416 const nsACString& aMixedCaseMimeType, const nsCString& aStr,
417 ErrorResult& aRv) {
418 constexpr auto formDataMimeType = "multipart/form-data"_ns;
420 // Allow semicolon separated boundary/encoding suffix like
421 // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
422 bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
424 if (isValidFormDataMimeType &&
425 aMimeType.Length() > formDataMimeType.Length()) {
426 isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
429 if (isValidFormDataMimeType) {
430 FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent);
431 if (!parser.Parse()) {
432 aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
433 return nullptr;
436 RefPtr<FormData> fd = parser.GetFormData();
437 MOZ_ASSERT(fd);
438 return fd.forget();
441 constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
442 bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
444 if (isValidUrlEncodedMimeType &&
445 aMimeType.Length() > urlDataMimeType.Length()) {
446 isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
449 if (isValidUrlEncodedMimeType) {
450 RefPtr<FormData> fd = new FormData(aParent);
451 DebugOnly<bool> status = URLParams::Parse(
452 aStr, true, [&fd](const nsACString& aName, const nsACString& aValue) {
453 IgnoredErrorResult rv;
454 fd->Append(NS_ConvertUTF8toUTF16(aName),
455 NS_ConvertUTF8toUTF16(aValue), rv);
456 MOZ_ASSERT(!rv.Failed());
457 return true;
459 MOZ_ASSERT(status);
461 return fd.forget();
464 aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
465 return nullptr;
468 // static
469 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
470 nsString& aText) {
471 nsresult rv =
472 UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
473 if (NS_FAILED(rv)) {
474 return rv;
476 return NS_OK;
479 // static
480 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
481 const nsString& aStr, ErrorResult& aRv) {
482 aRv.MightThrowJSException();
484 JS::Rooted<JS::Value> json(aCx);
485 if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
486 if (!JS_IsExceptionPending(aCx)) {
487 aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
488 return;
491 JS::Rooted<JS::Value> exn(aCx);
492 DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
493 MOZ_ASSERT(gotException);
495 JS_ClearPendingException(aCx);
496 aRv.ThrowJSException(aCx, exn);
497 return;
500 aValue.set(json);
503 } // namespace mozilla::dom