1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11 #include "nsIGlobalObject.h"
12 #include "mozilla/Encoding.h"
13 #include "mozilla/dom/MimeType.h"
15 #include "nsCharSeparatedTokenizer.h"
16 #include "nsDOMString.h"
17 #include "nsNetUtil.h"
18 #include "nsReadableUtils.h"
19 #include "nsStreamUtils.h"
20 #include "nsStringStream.h"
21 #include "nsURLHelper.h"
23 #include "js/ArrayBuffer.h" // JS::NewArrayBufferWithContents
25 #include "mozilla/ErrorResult.h"
26 #include "mozilla/dom/Exceptions.h"
27 #include "mozilla/dom/FetchUtil.h"
28 #include "mozilla/dom/File.h"
29 #include "mozilla/dom/FormData.h"
30 #include "mozilla/dom/Headers.h"
31 #include "mozilla/dom/Promise.h"
33 namespace mozilla::dom
{
37 // Reads over a CRLF and positions start after it.
38 static bool PushOverLine(nsACString::const_iterator
& aStart
,
39 const nsACString::const_iterator
& aEnd
) {
40 if (*aStart
== nsCRT::CR
&& (aEnd
- aStart
> 1) && *(++aStart
) == nsCRT::LF
) {
41 ++aStart
; // advance to after CRLF
49 * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
50 * This does not respect any encoding specified per entry, using UTF-8
51 * throughout. This is as the Fetch spec states in the consume body algorithm.
52 * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
53 * unlike Necko we do not have to deal with receiving incomplete chunks of data.
55 * This parser will fail the entire parse on any invalid entry, so it will
56 * never return a partially filled FormData.
57 * The content-disposition header is used to figure out the name and filename
58 * entries. The inclusion of the filename parameter decides if the entry is
59 * inserted into the FormData as a string or a File.
61 * File blobs are copies of the underlying data string since we cannot adopt
62 * char* chunks embedded within the larger body without significant effort.
63 * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
64 * friends to figure out if Fetch ends up copying big blobs to see if this is
67 class MOZ_STACK_CLASS FormDataParser
{
69 RefPtr
<FormData
> mFormData
;
71 nsCString mMixedCaseMimeType
;
74 // Entry state, reset in START_PART.
77 nsCString mContentType
;
85 nsIGlobalObject
* mParentObject
;
87 // Reads over a boundary and sets start to the position after the end of the
88 // boundary. Returns false if no boundary is found immediately.
89 bool PushOverBoundary(const nsACString
& aBoundaryString
,
90 nsACString::const_iterator
& aStart
,
91 nsACString::const_iterator
& aEnd
) {
92 // We copy the end iterator to keep the original pointing to the real end
94 nsACString::const_iterator
end(aEnd
);
95 const char* beginning
= aStart
.get();
96 if (FindInReadable(aBoundaryString
, aStart
, end
)) {
97 // We either should find the body immediately, or after 2 chars with the
98 // 2 chars being '-', everything else is failure.
99 if ((aStart
.get() - beginning
) == 0) {
100 aStart
.advance(aBoundaryString
.Length());
104 if ((aStart
.get() - beginning
) == 2) {
105 if (*(--aStart
) == '-' && *(--aStart
) == '-') {
106 aStart
.advance(aBoundaryString
.Length() + 2);
115 bool ParseHeader(nsACString::const_iterator
& aStart
,
116 nsACString::const_iterator
& aEnd
, bool* aWasEmptyHeader
) {
117 nsAutoCString headerName
, headerValue
;
118 if (!FetchUtil::ExtractHeader(aStart
, aEnd
, headerName
, headerValue
,
122 if (*aWasEmptyHeader
) {
126 if (headerName
.LowerCaseEqualsLiteral("content-disposition")) {
127 bool seenFormData
= false;
128 for (const nsACString
& token
:
129 nsCCharSeparatedTokenizer(headerValue
, ';').ToRange()) {
130 if (token
.IsEmpty()) {
134 if (token
.EqualsLiteral("form-data")) {
139 if (seenFormData
&& StringBeginsWith(token
, "name="_ns
)) {
140 mName
= StringTail(token
, token
.Length() - 5);
145 if (seenFormData
&& StringBeginsWith(token
, "filename="_ns
)) {
146 mFilename
= StringTail(token
, token
.Length() - 9);
147 mFilename
.Trim(" \"");
152 if (mName
.IsVoid()) {
153 // Could not parse a valid entry name.
156 } else if (headerName
.LowerCaseEqualsLiteral("content-type")) {
157 mContentType
= headerValue
;
163 // The end of a body is marked by a CRLF followed by the boundary. So the
164 // CRLF is part of the boundary and not the body, but any prior CRLFs are
165 // part of the body. This will position the iterator at the beginning of the
166 // boundary (after the CRLF).
167 bool ParseBody(const nsACString
& aBoundaryString
,
168 nsACString::const_iterator
& aStart
,
169 nsACString::const_iterator
& aEnd
) {
170 const char* beginning
= aStart
.get();
172 // Find the boundary marking the end of the body.
173 nsACString::const_iterator
end(aEnd
);
174 if (!FindInReadable(aBoundaryString
, aStart
, end
)) {
178 // We found a boundary, strip the just prior CRLF, and consider
179 // everything else the body section.
180 if (aStart
.get() - beginning
< 2) {
181 // Only the first entry can have a boundary right at the beginning. Even
182 // an empty body will have a CRLF before the boundary. So this is
187 // Check that there is a CRLF right before the boundary.
190 // Skip optional hyphens.
191 if (*aStart
== '-' && *(aStart
.get() + 1) == '-') {
192 if (aStart
.get() - beginning
< 2) {
199 if (*aStart
!= nsCRT::CR
|| *(aStart
.get() + 1) != nsCRT::LF
) {
203 nsAutoCString
body(beginning
, aStart
.get() - beginning
);
205 // Restore iterator to after the \r\n as we promised.
206 // We do not need to handle the extra hyphens case since our boundary
207 // parser in PushOverBoundary()
211 mFormData
= new FormData();
214 NS_ConvertUTF8toUTF16
name(mName
);
216 if (mFilename
.IsVoid()) {
218 mFormData
->Append(name
, NS_ConvertUTF8toUTF16(body
), rv
);
219 MOZ_ASSERT(!rv
.Failed());
221 // Unfortunately we've to copy the data first since all our strings are
222 // going to free it. We also need fallible alloc, so we can't just use
224 char* copy
= static_cast<char*>(moz_xmalloc(body
.Length()));
225 nsCString::const_iterator bodyIter
, bodyEnd
;
226 body
.BeginReading(bodyIter
);
227 body
.EndReading(bodyEnd
);
229 while (bodyIter
!= bodyEnd
) {
234 RefPtr
<Blob
> file
= File::CreateMemoryFileWithCustomLastModified(
235 mParentObject
, reinterpret_cast<void*>(copy
), body
.Length(),
236 NS_ConvertUTF8toUTF16(mFilename
), NS_ConvertUTF8toUTF16(mContentType
),
237 /* aLastModifiedDate */ 0);
238 if (NS_WARN_IF(!file
)) {
242 Optional
<nsAString
> dummy
;
244 mFormData
->Append(name
, *file
, dummy
, rv
);
245 if (NS_WARN_IF(rv
.Failed())) {
246 rv
.SuppressException();
255 FormDataParser(const nsACString
& aMimeType
,
256 const nsACString
& aMixedCaseMimeType
, const nsACString
& aData
,
257 nsIGlobalObject
* aParent
)
258 : mMimeType(aMimeType
),
259 mMixedCaseMimeType(aMixedCaseMimeType
),
262 mParentObject(aParent
) {}
265 if (mData
.IsEmpty()) {
269 // Determine boundary from mimetype.
270 RefPtr
<CMimeType
> parsed
= CMimeType::Parse(mMixedCaseMimeType
);
275 nsAutoCString boundaryString
;
276 if (!parsed
->GetParameterValue("boundary"_ns
, boundaryString
)) {
280 nsACString::const_iterator start
, end
;
281 mData
.BeginReading(start
);
282 // This should ALWAYS point to the end of data.
283 // Helpers make copies.
284 mData
.EndReading(end
);
286 while (start
!= end
) {
289 mName
.SetIsVoid(true);
290 mFilename
.SetIsVoid(true);
291 mContentType
= "text/plain"_ns
;
293 while (start
!= end
&& NS_IsHTTPWhitespace(*start
)) {
297 // MUST start with boundary.
298 if (!PushOverBoundary(boundaryString
, start
, end
)) {
302 if (start
!= end
&& *start
== '-') {
305 mFormData
= new FormData();
310 if (!PushOverLine(start
, end
)) {
313 mState
= PARSE_HEADER
;
318 if (!ParseHeader(start
, end
, &emptyHeader
)) {
322 if (emptyHeader
&& !PushOverLine(start
, end
)) {
326 mState
= emptyHeader
? PARSE_BODY
: PARSE_HEADER
;
330 if (mName
.IsVoid()) {
332 "No content-disposition header with a valid name was "
333 "found. Failing at body parse.");
337 if (!ParseBody(boundaryString
, start
, end
)) {
345 MOZ_CRASH("Invalid case");
349 MOZ_ASSERT_UNREACHABLE("Should never reach here.");
353 already_AddRefed
<FormData
> GetFormData() { return mFormData
.forget(); }
358 void BodyUtil::ConsumeArrayBuffer(JSContext
* aCx
,
359 JS::MutableHandle
<JSObject
*> aValue
,
360 uint32_t aInputLength
,
361 UniquePtr
<uint8_t[], JS::FreePolicy
> aInput
,
363 aRv
.MightThrowJSException();
365 JS::Rooted
<JSObject
*> arrayBuffer(aCx
);
367 JS::NewArrayBufferWithContents(aCx
, aInputLength
, std::move(aInput
));
369 aRv
.StealExceptionFromJSContext(aCx
);
372 aValue
.set(arrayBuffer
);
376 already_AddRefed
<Blob
> BodyUtil::ConsumeBlob(nsIGlobalObject
* aParent
,
377 const nsString
& aMimeType
,
378 uint32_t aInputLength
,
381 RefPtr
<Blob
> blob
= Blob::CreateMemoryBlob(
382 aParent
, reinterpret_cast<void*>(aInput
), aInputLength
, aMimeType
);
385 aRv
.Throw(NS_ERROR_DOM_UNKNOWN_ERR
);
388 return blob
.forget();
392 void BodyUtil::ConsumeBytes(JSContext
* aCx
, JS::MutableHandle
<JSObject
*> aValue
,
393 uint32_t aInputLength
,
394 UniquePtr
<uint8_t[], JS::FreePolicy
> aInput
,
396 aRv
.MightThrowJSException();
398 JS::Rooted
<JSObject
*> arrayBuffer(aCx
);
399 ConsumeArrayBuffer(aCx
, &arrayBuffer
, aInputLength
, std::move(aInput
), aRv
);
404 JS::Rooted
<JSObject
*> bytes(
405 aCx
, JS_NewUint8ArrayWithBuffer(aCx
, arrayBuffer
, 0, aInputLength
));
407 aRv
.StealExceptionFromJSContext(aCx
);
414 already_AddRefed
<FormData
> BodyUtil::ConsumeFormData(
415 nsIGlobalObject
* aParent
, const nsCString
& aMimeType
,
416 const nsACString
& aMixedCaseMimeType
, const nsCString
& aStr
,
418 constexpr auto formDataMimeType
= "multipart/form-data"_ns
;
420 // Allow semicolon separated boundary/encoding suffix like
421 // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
422 bool isValidFormDataMimeType
= StringBeginsWith(aMimeType
, formDataMimeType
);
424 if (isValidFormDataMimeType
&&
425 aMimeType
.Length() > formDataMimeType
.Length()) {
426 isValidFormDataMimeType
= aMimeType
[formDataMimeType
.Length()] == ';';
429 if (isValidFormDataMimeType
) {
430 FormDataParser
parser(aMimeType
, aMixedCaseMimeType
, aStr
, aParent
);
431 if (!parser
.Parse()) {
432 aRv
.ThrowTypeError
<MSG_BAD_FORMDATA
>();
436 RefPtr
<FormData
> fd
= parser
.GetFormData();
441 constexpr auto urlDataMimeType
= "application/x-www-form-urlencoded"_ns
;
442 bool isValidUrlEncodedMimeType
= StringBeginsWith(aMimeType
, urlDataMimeType
);
444 if (isValidUrlEncodedMimeType
&&
445 aMimeType
.Length() > urlDataMimeType
.Length()) {
446 isValidUrlEncodedMimeType
= aMimeType
[urlDataMimeType
.Length()] == ';';
449 if (isValidUrlEncodedMimeType
) {
450 RefPtr
<FormData
> fd
= new FormData(aParent
);
451 DebugOnly
<bool> status
= URLParams::Parse(
452 aStr
, true, [&fd
](const nsACString
& aName
, const nsACString
& aValue
) {
453 IgnoredErrorResult rv
;
454 fd
->Append(NS_ConvertUTF8toUTF16(aName
),
455 NS_ConvertUTF8toUTF16(aValue
), rv
);
456 MOZ_ASSERT(!rv
.Failed());
464 aRv
.ThrowTypeError
<MSG_BAD_FORMDATA
>();
469 nsresult
BodyUtil::ConsumeText(uint32_t aInputLength
, uint8_t* aInput
,
472 UTF_8_ENCODING
->DecodeWithBOMRemoval(Span(aInput
, aInputLength
), aText
);
480 void BodyUtil::ConsumeJson(JSContext
* aCx
, JS::MutableHandle
<JS::Value
> aValue
,
481 const nsString
& aStr
, ErrorResult
& aRv
) {
482 aRv
.MightThrowJSException();
484 JS::Rooted
<JS::Value
> json(aCx
);
485 if (!JS_ParseJSON(aCx
, aStr
.get(), aStr
.Length(), &json
)) {
486 if (!JS_IsExceptionPending(aCx
)) {
487 aRv
.Throw(NS_ERROR_DOM_UNKNOWN_ERR
);
491 JS::Rooted
<JS::Value
> exn(aCx
);
492 DebugOnly
<bool> gotException
= JS_GetPendingException(aCx
, &exn
);
493 MOZ_ASSERT(gotException
);
495 JS_ClearPendingException(aCx
);
496 aRv
.ThrowJSException(aCx
, exn
);
503 } // namespace mozilla::dom