When Retrier succeeds, record errors it encountered.
[chromium-blink-merge.git] / webkit / appcache / manifest_parser.cc
blob0e76e933348948b00b7eea8f6b0fdb8a9ba784e7
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache.
7 /*
8 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
27 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "webkit/appcache/manifest_parser.h"
34 #include "base/command_line.h"
35 #include "base/i18n/icu_string_conversions.h"
36 #include "base/logging.h"
37 #include "base/utf_string_conversions.h"
38 #include "googleurl/src/gurl.h"
40 namespace appcache {
42 namespace {
44 // Helper function used to identify 'isPattern' annotations.
45 bool HasPatternMatchingAnnotation(const wchar_t* line_p,
46 const wchar_t* line_end) {
47 // Skip whitespace separating the resource url from the annotation.
48 // Note: trailing whitespace has already been trimmed from the line.
49 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
50 ++line_p;
51 if (line_p == line_end)
52 return false;
53 std::wstring annotation(line_p, line_end - line_p);
54 return annotation == L"isPattern";
59 enum Mode {
60 EXPLICIT,
61 INTERCEPT,
62 FALLBACK,
63 ONLINE_WHITELIST,
64 UNKNOWN_MODE,
67 enum InterceptVerb {
68 RETURN,
69 EXECUTE,
70 UNKNOWN_VERB,
73 Manifest::Manifest() : online_whitelist_all(false) {}
75 Manifest::~Manifest() {}
77 bool ParseManifest(const GURL& manifest_url, const char* data, int length,
78 Manifest& manifest) {
79 // This is an implementation of the parsing algorithm specified in
80 // the HTML5 offline web application docs:
81 // http://www.w3.org/TR/html5/offline.html
82 // Do not modify it without consulting those docs.
83 // Though you might be tempted to convert these wstrings to UTF-8 or
84 // base::string16, this implementation seems simpler given the constraints.
86 const wchar_t kSignature[] = L"CACHE MANIFEST";
87 const size_t kSignatureLength = arraysize(kSignature) - 1;
88 const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST";
89 const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1;
91 DCHECK(manifest.explicit_urls.empty());
92 DCHECK(manifest.fallback_namespaces.empty());
93 DCHECK(manifest.online_whitelist_namespaces.empty());
94 DCHECK(!manifest.online_whitelist_all);
96 Mode mode = EXPLICIT;
98 std::wstring data_string;
99 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
100 // until UTF8ToWide uses 0xFFFD Unicode replacement character.
101 base::CodepageToWide(std::string(data, length), base::kCodepageUTF8,
102 base::OnStringConversionError::SUBSTITUTE, &data_string);
103 const wchar_t* p = data_string.c_str();
104 const wchar_t* end = p + data_string.length();
106 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?"
107 // Example: "CACHE MANIFEST #comment" is a valid signature.
108 // Example: "CACHE MANIFEST;V2" is not.
110 // When the input data starts with a UTF-8 Byte-Order-Mark
111 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a
112 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists.
113 int bom_offset = 0;
114 if (!data_string.empty() && data_string[0] == 0xFEFF) {
115 bom_offset = 1;
116 ++p;
119 if (p >= end)
120 return false;
122 // Check for a supported signature and skip p past it.
123 if (0 == data_string.compare(bom_offset, kSignatureLength,
124 kSignature)) {
125 p += kSignatureLength;
126 } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength,
127 kChromiumSignature)) {
128 p += kChromiumSignatureLength;
129 } else {
130 return false;
133 // Character after "CACHE MANIFEST" must be whitespace.
134 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
135 return false;
137 // Skip to the end of the line.
138 while (p < end && *p != '\r' && *p != '\n')
139 ++p;
141 while (1) {
142 // Skip whitespace
143 while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
144 ++p;
146 if (p == end)
147 break;
149 const wchar_t* line_start = p;
151 // Find the end of the line
152 while (p < end && *p != '\r' && *p != '\n')
153 ++p;
155 // Check if we have a comment
156 if (*line_start == '#')
157 continue;
159 // Get rid of trailing whitespace
160 const wchar_t* tmp = p - 1;
161 while (tmp > line_start && (*tmp == ' ' || *tmp == '\t'))
162 --tmp;
164 std::wstring line(line_start, tmp - line_start + 1);
166 if (line == L"CACHE:") {
167 mode = EXPLICIT;
168 } else if (line == L"FALLBACK:") {
169 mode = FALLBACK;
170 } else if (line == L"NETWORK:") {
171 mode = ONLINE_WHITELIST;
172 } else if (line == L"CHROMIUM-INTERCEPT:") {
173 mode = INTERCEPT;
174 } else if (*(line.end() - 1) == ':') {
175 mode = UNKNOWN_MODE;
176 } else if (mode == UNKNOWN_MODE) {
177 continue;
178 } else if (line == L"*" && mode == ONLINE_WHITELIST) {
179 manifest.online_whitelist_all = true;
180 continue;
181 } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) {
182 const wchar_t *line_p = line.c_str();
183 const wchar_t *line_end = line_p + line.length();
185 // Look for whitespace separating the URL from subsequent ignored tokens.
186 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
187 ++line_p;
189 base::string16 url16;
190 WideToUTF16(line.c_str(), line_p - line.c_str(), &url16);
191 GURL url = manifest_url.Resolve(url16);
192 if (!url.is_valid())
193 continue;
194 if (url.has_ref()) {
195 GURL::Replacements replacements;
196 replacements.ClearRef();
197 url = url.ReplaceComponents(replacements);
200 // Scheme component must be the same as the manifest URL's.
201 if (url.scheme() != manifest_url.scheme()) {
202 continue;
205 // See http://code.google.com/p/chromium/issues/detail?id=69594
206 // We willfully violate the HTML5 spec at this point in order
207 // to support the appcaching of cross-origin HTTPS resources.
208 // Per the spec, EXPLICIT cross-origin HTTS resources should be
209 // ignored here. We've opted for a milder constraint and allow
210 // caching unless the resource has a "no-store" header. That
211 // condition is enforced in AppCacheUpdateJob.
213 if (mode == EXPLICIT) {
214 manifest.explicit_urls.insert(url.spec());
215 } else {
216 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
217 manifest.online_whitelist_namespaces.push_back(
218 Namespace(NETWORK_NAMESPACE, url, GURL(), is_pattern));
220 } else if (mode == INTERCEPT) {
221 // Lines of the form,
222 // <urlnamespace> <intercept_type> <targeturl>
223 const wchar_t* line_p = line.c_str();
224 const wchar_t* line_end = line_p + line.length();
226 // Look for first whitespace separating the url namespace from
227 // the intercept type.
228 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
229 ++line_p;
231 if (line_p == line_end)
232 continue; // There was no whitespace separating the URLs.
234 base::string16 namespace_url16;
235 WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
236 GURL namespace_url = manifest_url.Resolve(namespace_url16);
237 if (!namespace_url.is_valid())
238 continue;
239 if (namespace_url.has_ref()) {
240 GURL::Replacements replacements;
241 replacements.ClearRef();
242 namespace_url = namespace_url.ReplaceComponents(replacements);
245 // The namespace URL must have the same scheme, host and port
246 // as the manifest's URL.
247 if (manifest_url.GetOrigin() != namespace_url.GetOrigin())
248 continue;
250 // Skip whitespace separating namespace from the type.
251 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
252 ++line_p;
254 // Look for whitespace separating the type from the target url.
255 const wchar_t* type_start = line_p;
256 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
257 ++line_p;
259 // Look for a type value we understand, otherwise skip the line.
260 InterceptVerb verb = UNKNOWN_VERB;
261 std::wstring type(type_start, line_p - type_start);
262 if (type == L"return") {
263 verb = RETURN;
264 } else if (type == L"execute" &&
265 CommandLine::ForCurrentProcess()->HasSwitch(
266 kEnableExecutableHandlers)) {
267 verb = EXECUTE;
269 if (verb == UNKNOWN_VERB)
270 continue;
272 // Skip whitespace separating type from the target_url.
273 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
274 ++line_p;
276 // Look for whitespace separating the URL from subsequent ignored tokens.
277 const wchar_t* target_url_start = line_p;
278 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
279 ++line_p;
281 base::string16 target_url16;
282 WideToUTF16(target_url_start, line_p - target_url_start, &target_url16);
283 GURL target_url = manifest_url.Resolve(target_url16);
284 if (!target_url.is_valid())
285 continue;
287 if (target_url.has_ref()) {
288 GURL::Replacements replacements;
289 replacements.ClearRef();
290 target_url = target_url.ReplaceComponents(replacements);
292 if (manifest_url.GetOrigin() != target_url.GetOrigin())
293 continue;
295 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
296 manifest.intercept_namespaces.push_back(
297 Namespace(INTERCEPT_NAMESPACE, namespace_url,
298 target_url, is_pattern, verb == EXECUTE));
299 } else if (mode == FALLBACK) {
300 const wchar_t* line_p = line.c_str();
301 const wchar_t* line_end = line_p + line.length();
303 // Look for whitespace separating the two URLs
304 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
305 ++line_p;
307 if (line_p == line_end) {
308 // There was no whitespace separating the URLs.
309 continue;
312 base::string16 namespace_url16;
313 WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
314 GURL namespace_url = manifest_url.Resolve(namespace_url16);
315 if (!namespace_url.is_valid())
316 continue;
317 if (namespace_url.has_ref()) {
318 GURL::Replacements replacements;
319 replacements.ClearRef();
320 namespace_url = namespace_url.ReplaceComponents(replacements);
323 // Fallback namespace URL must have the same scheme, host and port
324 // as the manifest's URL.
325 if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) {
326 continue;
329 // Skip whitespace separating fallback namespace from URL.
330 while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
331 ++line_p;
333 // Look for whitespace separating the URL from subsequent ignored tokens.
334 const wchar_t* fallback_start = line_p;
335 while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
336 ++line_p;
338 base::string16 fallback_url16;
339 WideToUTF16(fallback_start, line_p - fallback_start, &fallback_url16);
340 GURL fallback_url = manifest_url.Resolve(fallback_url16);
341 if (!fallback_url.is_valid())
342 continue;
343 if (fallback_url.has_ref()) {
344 GURL::Replacements replacements;
345 replacements.ClearRef();
346 fallback_url = fallback_url.ReplaceComponents(replacements);
349 // Fallback entry URL must have the same scheme, host and port
350 // as the manifest's URL.
351 if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) {
352 continue;
355 bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
357 // Store regardless of duplicate namespace URL. Only first match
358 // will ever be used.
359 manifest.fallback_namespaces.push_back(
360 Namespace(FALLBACK_NAMESPACE, namespace_url,
361 fallback_url, is_pattern));
362 } else {
363 NOTREACHED();
367 return true;
370 } // namespace appcache