1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache.
8 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
27 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "content/browser/appcache/appcache_manifest_parser.h"
34 #include "base/command_line.h"
35 #include "base/i18n/icu_string_conversions.h"
36 #include "base/logging.h"
37 #include "base/strings/utf_string_conversions.h"
44 // Helper function used to identify 'isPattern' annotations.
45 bool HasPatternMatchingAnnotation(const wchar_t* line_p
,
46 const wchar_t* line_end
) {
47 // Skip whitespace separating the resource url from the annotation.
48 // Note: trailing whitespace has already been trimmed from the line.
49 while (line_p
< line_end
&& (*line_p
== '\t' || *line_p
== ' '))
51 if (line_p
== line_end
)
53 std::wstring
annotation(line_p
, line_end
- line_p
);
54 return annotation
== L
"isPattern";
73 AppCacheManifest::AppCacheManifest()
74 : online_whitelist_all(false),
75 did_ignore_intercept_namespaces(false) {
78 AppCacheManifest::~AppCacheManifest() {}
80 bool ParseManifest(const GURL
& manifest_url
, const char* data
, int length
,
81 ParseMode parse_mode
, AppCacheManifest
& manifest
) {
82 // This is an implementation of the parsing algorithm specified in
83 // the HTML5 offline web application docs:
84 // http://www.w3.org/TR/html5/offline.html
85 // Do not modify it without consulting those docs.
86 // Though you might be tempted to convert these wstrings to UTF-8 or
87 // base::string16, this implementation seems simpler given the constraints.
89 const wchar_t kSignature
[] = L
"CACHE MANIFEST";
90 const size_t kSignatureLength
= arraysize(kSignature
) - 1;
91 const wchar_t kChromiumSignature
[] = L
"CHROMIUM CACHE MANIFEST";
92 const size_t kChromiumSignatureLength
= arraysize(kChromiumSignature
) - 1;
94 DCHECK(manifest
.explicit_urls
.empty());
95 DCHECK(manifest
.fallback_namespaces
.empty());
96 DCHECK(manifest
.online_whitelist_namespaces
.empty());
97 DCHECK(!manifest
.online_whitelist_all
);
98 DCHECK(!manifest
.did_ignore_intercept_namespaces
);
100 Mode mode
= EXPLICIT
;
102 std::wstring data_string
;
103 // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
104 // until UTF8ToWide uses 0xFFFD Unicode replacement character.
105 base::CodepageToWide(std::string(data
, length
), base::kCodepageUTF8
,
106 base::OnStringConversionError::SUBSTITUTE
, &data_string
);
107 const wchar_t* p
= data_string
.c_str();
108 const wchar_t* end
= p
+ data_string
.length();
110 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?"
111 // Example: "CACHE MANIFEST #comment" is a valid signature.
112 // Example: "CACHE MANIFEST;V2" is not.
114 // When the input data starts with a UTF-8 Byte-Order-Mark
115 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a
116 // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists.
118 if (!data_string
.empty() && data_string
[0] == 0xFEFF) {
126 // Check for a supported signature and skip p past it.
127 if (0 == data_string
.compare(bom_offset
, kSignatureLength
,
129 p
+= kSignatureLength
;
130 } else if (0 == data_string
.compare(bom_offset
, kChromiumSignatureLength
,
131 kChromiumSignature
)) {
132 p
+= kChromiumSignatureLength
;
137 // Character after "CACHE MANIFEST" must be whitespace.
138 if (p
< end
&& *p
!= ' ' && *p
!= '\t' && *p
!= '\n' && *p
!= '\r')
141 // Skip to the end of the line.
142 while (p
< end
&& *p
!= '\r' && *p
!= '\n')
147 while (p
< end
&& (*p
== '\n' || *p
== '\r' || *p
== ' ' || *p
== '\t'))
153 const wchar_t* line_start
= p
;
155 // Find the end of the line
156 while (p
< end
&& *p
!= '\r' && *p
!= '\n')
159 // Check if we have a comment
160 if (*line_start
== '#')
163 // Get rid of trailing whitespace
164 const wchar_t* tmp
= p
- 1;
165 while (tmp
> line_start
&& (*tmp
== ' ' || *tmp
== '\t'))
168 std::wstring
line(line_start
, tmp
- line_start
+ 1);
170 if (line
== L
"CACHE:") {
172 } else if (line
== L
"FALLBACK:") {
174 } else if (line
== L
"NETWORK:") {
175 mode
= ONLINE_WHITELIST
;
176 } else if (line
== L
"CHROMIUM-INTERCEPT:") {
178 } else if (*(line
.end() - 1) == ':') {
180 } else if (mode
== UNKNOWN_MODE
) {
182 } else if (line
== L
"*" && mode
== ONLINE_WHITELIST
) {
183 manifest
.online_whitelist_all
= true;
185 } else if (mode
== EXPLICIT
|| mode
== ONLINE_WHITELIST
) {
186 const wchar_t *line_p
= line
.c_str();
187 const wchar_t *line_end
= line_p
+ line
.length();
189 // Look for whitespace separating the URL from subsequent ignored tokens.
190 while (line_p
< line_end
&& *line_p
!= '\t' && *line_p
!= ' ')
193 base::string16 url16
;
194 base::WideToUTF16(line
.c_str(), line_p
- line
.c_str(), &url16
);
195 GURL url
= manifest_url
.Resolve(url16
);
199 GURL::Replacements replacements
;
200 replacements
.ClearRef();
201 url
= url
.ReplaceComponents(replacements
);
204 // Scheme component must be the same as the manifest URL's.
205 if (url
.scheme() != manifest_url
.scheme()) {
209 // See http://code.google.com/p/chromium/issues/detail?id=69594
210 // We willfully violate the HTML5 spec at this point in order
211 // to support the appcaching of cross-origin HTTPS resources.
212 // Per the spec, EXPLICIT cross-origin HTTS resources should be
213 // ignored here. We've opted for a milder constraint and allow
214 // caching unless the resource has a "no-store" header. That
215 // condition is enforced in AppCacheUpdateJob.
217 if (mode
== EXPLICIT
) {
218 manifest
.explicit_urls
.insert(url
.spec());
220 bool is_pattern
= HasPatternMatchingAnnotation(line_p
, line_end
);
221 manifest
.online_whitelist_namespaces
.push_back(
222 AppCacheNamespace(APPCACHE_NETWORK_NAMESPACE
, url
, GURL(),
225 } else if (mode
== INTERCEPT
) {
226 if (parse_mode
!= PARSE_MANIFEST_ALLOWING_INTERCEPTS
) {
227 manifest
.did_ignore_intercept_namespaces
= true;
231 // Lines of the form,
232 // <urlnamespace> <intercept_type> <targeturl>
233 const wchar_t* line_p
= line
.c_str();
234 const wchar_t* line_end
= line_p
+ line
.length();
236 // Look for first whitespace separating the url namespace from
237 // the intercept type.
238 while (line_p
< line_end
&& *line_p
!= '\t' && *line_p
!= ' ')
241 if (line_p
== line_end
)
242 continue; // There was no whitespace separating the URLs.
244 base::string16 namespace_url16
;
245 base::WideToUTF16(line
.c_str(), line_p
- line
.c_str(), &namespace_url16
);
246 GURL namespace_url
= manifest_url
.Resolve(namespace_url16
);
247 if (!namespace_url
.is_valid())
249 if (namespace_url
.has_ref()) {
250 GURL::Replacements replacements
;
251 replacements
.ClearRef();
252 namespace_url
= namespace_url
.ReplaceComponents(replacements
);
255 // The namespace URL must have the same scheme, host and port
256 // as the manifest's URL.
257 if (manifest_url
.GetOrigin() != namespace_url
.GetOrigin())
260 // Skip whitespace separating namespace from the type.
261 while (line_p
< line_end
&& (*line_p
== '\t' || *line_p
== ' '))
264 // Look for whitespace separating the type from the target url.
265 const wchar_t* type_start
= line_p
;
266 while (line_p
< line_end
&& *line_p
!= '\t' && *line_p
!= ' ')
269 // Look for a type value we understand, otherwise skip the line.
270 InterceptVerb verb
= UNKNOWN_VERB
;
271 std::wstring
type(type_start
, line_p
- type_start
);
272 if (type
== L
"return") {
274 } else if (type
== L
"execute" &&
275 base::CommandLine::ForCurrentProcess()->HasSwitch(
276 kEnableExecutableHandlers
)) {
279 if (verb
== UNKNOWN_VERB
)
282 // Skip whitespace separating type from the target_url.
283 while (line_p
< line_end
&& (*line_p
== '\t' || *line_p
== ' '))
286 // Look for whitespace separating the URL from subsequent ignored tokens.
287 const wchar_t* target_url_start
= line_p
;
288 while (line_p
< line_end
&& *line_p
!= '\t' && *line_p
!= ' ')
291 base::string16 target_url16
;
292 base::WideToUTF16(target_url_start
, line_p
- target_url_start
,
294 GURL target_url
= manifest_url
.Resolve(target_url16
);
295 if (!target_url
.is_valid())
298 if (target_url
.has_ref()) {
299 GURL::Replacements replacements
;
300 replacements
.ClearRef();
301 target_url
= target_url
.ReplaceComponents(replacements
);
303 if (manifest_url
.GetOrigin() != target_url
.GetOrigin())
306 bool is_pattern
= HasPatternMatchingAnnotation(line_p
, line_end
);
307 manifest
.intercept_namespaces
.push_back(
308 AppCacheNamespace(APPCACHE_INTERCEPT_NAMESPACE
, namespace_url
,
309 target_url
, is_pattern
, verb
== EXECUTE
));
310 } else if (mode
== FALLBACK
) {
311 const wchar_t* line_p
= line
.c_str();
312 const wchar_t* line_end
= line_p
+ line
.length();
314 // Look for whitespace separating the two URLs
315 while (line_p
< line_end
&& *line_p
!= '\t' && *line_p
!= ' ')
318 if (line_p
== line_end
) {
319 // There was no whitespace separating the URLs.
323 base::string16 namespace_url16
;
324 base::WideToUTF16(line
.c_str(), line_p
- line
.c_str(), &namespace_url16
);
325 GURL namespace_url
= manifest_url
.Resolve(namespace_url16
);
326 if (!namespace_url
.is_valid())
328 if (namespace_url
.has_ref()) {
329 GURL::Replacements replacements
;
330 replacements
.ClearRef();
331 namespace_url
= namespace_url
.ReplaceComponents(replacements
);
334 // Fallback namespace URL must have the same scheme, host and port
335 // as the manifest's URL.
336 if (manifest_url
.GetOrigin() != namespace_url
.GetOrigin()) {
340 // Skip whitespace separating fallback namespace from URL.
341 while (line_p
< line_end
&& (*line_p
== '\t' || *line_p
== ' '))
344 // Look for whitespace separating the URL from subsequent ignored tokens.
345 const wchar_t* fallback_start
= line_p
;
346 while (line_p
< line_end
&& *line_p
!= '\t' && *line_p
!= ' ')
349 base::string16 fallback_url16
;
350 base::WideToUTF16(fallback_start
, line_p
- fallback_start
,
352 GURL fallback_url
= manifest_url
.Resolve(fallback_url16
);
353 if (!fallback_url
.is_valid())
355 if (fallback_url
.has_ref()) {
356 GURL::Replacements replacements
;
357 replacements
.ClearRef();
358 fallback_url
= fallback_url
.ReplaceComponents(replacements
);
361 // Fallback entry URL must have the same scheme, host and port
362 // as the manifest's URL.
363 if (manifest_url
.GetOrigin() != fallback_url
.GetOrigin()) {
367 bool is_pattern
= HasPatternMatchingAnnotation(line_p
, line_end
);
369 // Store regardless of duplicate namespace URL. Only first match
370 // will ever be used.
371 manifest
.fallback_namespaces
.push_back(
372 AppCacheNamespace(APPCACHE_FALLBACK_NAMESPACE
, namespace_url
,
373 fallback_url
, is_pattern
));
382 } // namespace content