Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / WebKit / Source / core / html / parser / HTMLSrcsetParser.cpp
blobb9910d859fe997daa7175c2d0744fe8affa8bbc6
1 /*
2 * Copyright (C) 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2013 Google Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
14 * distribution.
15 * * Neither the name of Google Inc. nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "config.h"
33 #include "core/html/parser/HTMLSrcsetParser.h"
35 #include "core/dom/Document.h"
36 #include "core/fetch/MemoryCache.h"
37 #include "core/fetch/ResourceFetcher.h"
38 #include "core/frame/FrameConsole.h"
39 #include "core/frame/LocalFrame.h"
40 #include "core/frame/UseCounter.h"
41 #include "core/html/parser/HTMLParserIdioms.h"
42 #include "core/inspector/ConsoleMessage.h"
43 #include "platform/ParsingUtilities.h"
45 namespace blink {
47 static bool compareByDensity(const ImageCandidate& first, const ImageCandidate& second)
49 return first.density() < second.density();
52 enum DescriptorTokenizerState {
53 TokenStart,
54 InParenthesis,
55 AfterToken,
58 struct DescriptorToken {
59 unsigned start;
60 unsigned length;
62 DescriptorToken(unsigned start, unsigned length)
63 : start(start)
64 , length(length)
68 unsigned lastIndex()
70 return start + length - 1;
73 template<typename CharType>
74 int toInt(const CharType* attribute, bool& isValid)
76 unsigned position = 0;
77 // Make sure the integer is a valid non-negative integer
78 // https://html.spec.whatwg.org/multipage/infrastructure.html#valid-non-negative-integer
79 unsigned lengthExcludingDescriptor = length - 1;
80 while (position < lengthExcludingDescriptor) {
81 if (!isASCIIDigit(*(attribute + start + position))) {
82 isValid = false;
83 return 0;
85 ++position;
87 return charactersToIntStrict(attribute + start, lengthExcludingDescriptor, &isValid);
90 template<typename CharType>
91 float toFloat(const CharType* attribute, bool& isValid)
93 // Make sure the is a valid floating point number
94 // https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
95 unsigned lengthExcludingDescriptor = length - 1;
96 if (lengthExcludingDescriptor > 0 && *(attribute + start) == '+') {
97 isValid = false;
98 return 0;
100 Decimal result = parseToDecimalForNumberType(String(attribute + start, lengthExcludingDescriptor));
101 isValid = result.isFinite();
102 if (!isValid)
103 return 0;
104 return static_cast<float>(result.toDouble());
108 template<typename CharType>
109 static void appendDescriptorAndReset(const CharType* attributeStart, const CharType*& descriptorStart, const CharType* position, Vector<DescriptorToken>& descriptors)
111 if (position > descriptorStart)
112 descriptors.append(DescriptorToken(descriptorStart - attributeStart, position - descriptorStart));
113 descriptorStart = 0;
116 // The following is called appendCharacter to match the spec's terminology.
117 template<typename CharType>
118 static void appendCharacter(const CharType* descriptorStart, const CharType* position)
120 // Since we don't copy the tokens, this just set the point where the descriptor tokens start.
121 if (!descriptorStart)
122 descriptorStart = position;
125 template<typename CharType>
126 static bool isEOF(const CharType* position, const CharType* end)
128 return position >= end;
131 template<typename CharType>
132 static void tokenizeDescriptors(const CharType* attributeStart,
133 const CharType*& position,
134 const CharType* attributeEnd,
135 Vector<DescriptorToken>& descriptors)
137 DescriptorTokenizerState state = TokenStart;
138 const CharType* descriptorsStart = position;
139 const CharType* currentDescriptorStart = descriptorsStart;
140 while (true) {
141 switch (state) {
142 case TokenStart:
143 if (isEOF(position, attributeEnd)) {
144 appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors);
145 return;
147 if (isComma(*position)) {
148 appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors);
149 ++position;
150 return;
152 if (isHTMLSpace(*position)) {
153 appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors);
154 currentDescriptorStart = position + 1;
155 state = AfterToken;
156 } else if (*position == '(') {
157 appendCharacter(currentDescriptorStart, position);
158 state = InParenthesis;
159 } else {
160 appendCharacter(currentDescriptorStart, position);
162 break;
163 case InParenthesis:
164 if (isEOF(position, attributeEnd)) {
165 appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors);
166 return;
168 if (*position == ')') {
169 appendCharacter(currentDescriptorStart, position);
170 state = TokenStart;
171 } else {
172 appendCharacter(currentDescriptorStart, position);
174 break;
175 case AfterToken:
176 if (isEOF(position, attributeEnd))
177 return;
178 if (!isHTMLSpace(*position)) {
179 state = TokenStart;
180 currentDescriptorStart = position;
181 --position;
183 break;
185 ++position;
189 static void srcsetError(Document* document, String message)
191 if (document && document->frame()) {
192 StringBuilder errorMessage;
193 errorMessage.append("Failed parsing 'srcset' attribute value since ");
194 errorMessage.append(message);
195 document->frame()->console().addMessage(ConsoleMessage::create(OtherMessageSource, ErrorMessageLevel, errorMessage.toString()));
199 template<typename CharType>
200 static bool parseDescriptors(const CharType* attribute, Vector<DescriptorToken>& descriptors, DescriptorParsingResult& result, Document* document)
202 for (DescriptorToken& descriptor : descriptors) {
203 if (descriptor.length == 0)
204 continue;
205 CharType c = attribute[descriptor.lastIndex()];
206 bool isValid = false;
207 if (c == 'w') {
208 if (result.hasDensity() || result.hasWidth()) {
209 srcsetError(document, "it has multiple 'w' descriptors or a mix of 'x' and 'w' descriptors.");
210 return false;
212 int resourceWidth = descriptor.toInt(attribute, isValid);
213 if (!isValid || resourceWidth <= 0) {
214 srcsetError(document, "its 'w' descriptor is invalid.");
215 return false;
217 result.setResourceWidth(resourceWidth);
218 } else if (c == 'h') {
219 // This is here only for future compat purposes.
220 // The value of the 'h' descriptor is not used.
221 if (result.hasDensity() || result.hasHeight()) {
222 srcsetError(document, "it has multiple 'h' descriptors or a mix of 'x' and 'h' descriptors.");
223 return false;
225 int resourceHeight = descriptor.toInt(attribute, isValid);
226 if (!isValid || resourceHeight <= 0) {
227 srcsetError(document, "its 'h' descriptor is invalid.");
228 return false;
230 result.setResourceHeight(resourceHeight);
231 } else if (c == 'x') {
232 if (result.hasDensity() || result.hasHeight() || result.hasWidth()) {
233 srcsetError(document, "it has multiple 'x' descriptors or a mix of 'x' and 'w'/'h' descriptors.");
234 return false;
236 float density = descriptor.toFloat(attribute, isValid);
237 if (!isValid || density < 0) {
238 srcsetError(document, "its 'x' descriptor is invalid.");
239 return false;
241 result.setDensity(density);
242 } else {
243 srcsetError(document, "it has an unknown descriptor.");
244 return false;
247 bool res = !result.hasHeight() || result.hasWidth();
248 if (!res)
249 srcsetError(document, "it has an 'h' descriptor and no 'w' descriptor.");
250 return res;
253 static bool parseDescriptors(const String& attribute, Vector<DescriptorToken>& descriptors, DescriptorParsingResult& result, Document* document)
255 // FIXME: See if StringView can't be extended to replace DescriptorToken here.
256 if (attribute.is8Bit()) {
257 return parseDescriptors(attribute.characters8(), descriptors, result, document);
259 return parseDescriptors(attribute.characters16(), descriptors, result, document);
262 // http://picture.responsiveimages.org/#parse-srcset-attr
263 template<typename CharType>
264 static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, const CharType* attributeStart, unsigned length, Vector<ImageCandidate>& imageCandidates, Document* document)
266 const CharType* position = attributeStart;
267 const CharType* attributeEnd = position + length;
269 while (position < attributeEnd) {
270 // 4. Splitting loop: Collect a sequence of characters that are space characters or U+002C COMMA characters.
271 skipWhile<CharType, isHTMLSpaceOrComma<CharType>>(position, attributeEnd);
272 if (position == attributeEnd) {
273 // Contrary to spec language - descriptor parsing happens on each candidate, so when we reach the attributeEnd, we can exit.
274 break;
276 const CharType* imageURLStart = position;
278 // 6. Collect a sequence of characters that are not space characters, and let that be url.
279 skipUntil<CharType, isHTMLSpace<CharType>>(position, attributeEnd);
280 const CharType* imageURLEnd = position;
282 DescriptorParsingResult result;
284 // 8. If url ends with a U+002C COMMA character (,)
285 if (isComma(*(position - 1))) {
286 // Remove all trailing U+002C COMMA characters from url.
287 imageURLEnd = position - 1;
288 reverseSkipWhile<CharType, isComma>(imageURLEnd, imageURLStart);
289 ++imageURLEnd;
290 // If url is empty, then jump to the step labeled splitting loop.
291 if (imageURLStart == imageURLEnd)
292 continue;
293 } else {
294 skipWhile<CharType, isHTMLSpace<CharType>>(position, attributeEnd);
295 Vector<DescriptorToken> descriptorTokens;
296 tokenizeDescriptors(attributeStart, position, attributeEnd, descriptorTokens);
297 // Contrary to spec language - descriptor parsing happens on each candidate.
298 // This is a black-box equivalent, to avoid storing descriptor lists for each candidate.
299 if (!parseDescriptors(attribute, descriptorTokens, result, document)) {
300 if (document) {
301 UseCounter::count(document, UseCounter::SrcsetDroppedCandidate);
302 if (document->frame())
303 document->frame()->console().addMessage(ConsoleMessage::create(OtherMessageSource, ErrorMessageLevel, String("Dropped srcset candidate ") + String(imageURLStart, imageURLEnd - imageURLStart)));
305 continue;
309 ASSERT(imageURLEnd > attributeStart);
310 unsigned imageURLStartingPosition = imageURLStart - attributeStart;
311 ASSERT(imageURLEnd > imageURLStart);
312 unsigned imageURLLength = imageURLEnd - imageURLStart;
313 imageCandidates.append(ImageCandidate(attribute, imageURLStartingPosition, imageURLLength, result, ImageCandidate::SrcsetOrigin));
314 // 11. Return to the step labeled splitting loop.
318 static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, Vector<ImageCandidate>& imageCandidates, Document* document)
320 if (attribute.isNull())
321 return;
323 if (attribute.is8Bit())
324 parseImageCandidatesFromSrcsetAttribute<LChar>(attribute, attribute.characters8(), attribute.length(), imageCandidates, document);
325 else
326 parseImageCandidatesFromSrcsetAttribute<UChar>(attribute, attribute.characters16(), attribute.length(), imageCandidates, document);
329 static unsigned selectionLogic(Vector<ImageCandidate*>& imageCandidates, float deviceScaleFactor)
331 unsigned i = 0;
333 for (; i < imageCandidates.size() - 1; ++i) {
334 unsigned next = i + 1;
335 float nextDensity;
336 float currentDensity;
337 float geometricMean;
339 nextDensity = imageCandidates[next]->density();
340 if (nextDensity < deviceScaleFactor)
341 continue;
343 currentDensity = imageCandidates[i]->density();
344 geometricMean = sqrt(currentDensity * nextDensity);
345 if (((deviceScaleFactor <= 1.0) && (deviceScaleFactor > currentDensity)) || (deviceScaleFactor >= geometricMean))
346 return next;
347 break;
349 return i;
352 static unsigned avoidDownloadIfHigherDensityResourceIsInCache(Vector<ImageCandidate*>& imageCandidates, unsigned winner, Document* document)
354 if (!document)
355 return winner;
356 for (unsigned i = imageCandidates.size() - 1; i > winner; --i) {
357 KURL url = document->completeURL(stripLeadingAndTrailingHTMLSpaces(imageCandidates[i]->url()));
358 if (memoryCache()->resourceForURL(url, document->fetcher()->getCacheIdentifier()))
359 return i;
361 return winner;
364 static ImageCandidate pickBestImageCandidate(float deviceScaleFactor, float sourceSize, Vector<ImageCandidate>& imageCandidates, Document* document = nullptr)
366 const float defaultDensityValue = 1.0;
367 bool ignoreSrc = false;
368 if (imageCandidates.isEmpty())
369 return ImageCandidate();
371 // http://picture.responsiveimages.org/#normalize-source-densities
372 for (ImageCandidate& image : imageCandidates) {
373 if (image.resourceWidth() > 0) {
374 image.setDensity((float)image.resourceWidth() / sourceSize);
375 ignoreSrc = true;
376 } else if (image.density() < 0) {
377 image.setDensity(defaultDensityValue);
381 std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByDensity);
383 Vector<ImageCandidate*> deDupedImageCandidates;
384 float prevDensity = -1.0;
385 for (ImageCandidate& image : imageCandidates) {
386 if (image.density() != prevDensity && (!ignoreSrc || !image.srcOrigin()))
387 deDupedImageCandidates.append(&image);
388 prevDensity = image.density();
390 unsigned winner = selectionLogic(deDupedImageCandidates, deviceScaleFactor);
391 ASSERT(winner < deDupedImageCandidates.size());
392 winner = avoidDownloadIfHigherDensityResourceIsInCache(deDupedImageCandidates, winner, document);
394 float winningDensity = deDupedImageCandidates[winner]->density();
395 // 16. If an entry b in candidates has the same associated ... pixel density as an earlier entry a in candidates,
396 // then remove entry b
397 while ((winner > 0) && (deDupedImageCandidates[winner - 1]->density() == winningDensity))
398 --winner;
400 return *deDupedImageCandidates[winner];
403 ImageCandidate bestFitSourceForSrcsetAttribute(float deviceScaleFactor, float sourceSize, const String& srcsetAttribute, Document* document)
405 Vector<ImageCandidate> imageCandidates;
407 parseImageCandidatesFromSrcsetAttribute(srcsetAttribute, imageCandidates, document);
409 return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates, document);
412 ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, float sourceSize, const String& srcAttribute, const String& srcsetAttribute, Document* document)
414 if (srcsetAttribute.isNull()) {
415 if (srcAttribute.isNull())
416 return ImageCandidate();
417 return ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin);
420 Vector<ImageCandidate> imageCandidates;
422 parseImageCandidatesFromSrcsetAttribute(srcsetAttribute, imageCandidates, document);
424 if (!srcAttribute.isEmpty())
425 imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin));
427 return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates, document);
430 String bestFitSourceForImageAttributes(float deviceScaleFactor, float sourceSize, const String& srcAttribute, ImageCandidate& srcsetImageCandidate)
432 if (srcsetImageCandidate.isEmpty())
433 return srcAttribute;
435 Vector<ImageCandidate> imageCandidates;
436 imageCandidates.append(srcsetImageCandidate);
438 if (!srcAttribute.isEmpty())
439 imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin));
441 return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates).toString();