IndexedDBFactory now ForceCloses databases.
[chromium-blink-merge.git] / content / renderer / dom_serializer_browsertest.cc
blob43d0b5c1d66cf26823d00469e27ecdbfd0ed1a35
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/bind.h"
6 #include "base/command_line.h"
7 #include "base/compiler_specific.h"
8 #include "base/containers/hash_tables.h"
9 #include "base/file_util.h"
10 #include "base/files/file_path.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/public/renderer/render_view.h"
15 #include "content/public/renderer/render_view_observer.h"
16 #include "content/public/test/test_utils.h"
17 #include "content/renderer/savable_resources.h"
18 #include "content/shell/browser/shell.h"
19 #include "content/test/content_browser_test.h"
20 #include "content/test/content_browser_test_utils.h"
21 #include "net/base/net_util.h"
22 #include "net/url_request/url_request_context.h"
23 #include "third_party/WebKit/public/platform/WebCString.h"
24 #include "third_party/WebKit/public/platform/WebData.h"
25 #include "third_party/WebKit/public/platform/WebString.h"
26 #include "third_party/WebKit/public/platform/WebURL.h"
27 #include "third_party/WebKit/public/platform/WebVector.h"
28 #include "third_party/WebKit/public/web/WebDocument.h"
29 #include "third_party/WebKit/public/web/WebElement.h"
30 #include "third_party/WebKit/public/web/WebFrame.h"
31 #include "third_party/WebKit/public/web/WebNode.h"
32 #include "third_party/WebKit/public/web/WebNodeCollection.h"
33 #include "third_party/WebKit/public/web/WebNodeList.h"
34 #include "third_party/WebKit/public/web/WebPageSerializer.h"
35 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
36 #include "third_party/WebKit/public/web/WebView.h"
38 using blink::WebCString;
39 using blink::WebData;
40 using blink::WebDocument;
41 using blink::WebElement;
42 using blink::WebFrame;
43 using blink::WebNode;
44 using blink::WebNodeCollection;
45 using blink::WebNodeList;
46 using blink::WebPageSerializer;
47 using blink::WebPageSerializerClient;
48 using blink::WebNode;
49 using blink::WebString;
50 using blink::WebURL;
51 using blink::WebView;
52 using blink::WebVector;
54 namespace {
56 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
57 const int kRenderViewRoutingId = 2;
61 namespace content {
63 // Iterate recursively over sub-frames to find one with with a given url.
64 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) {
65 if (!web_view->mainFrame())
66 return NULL;
68 std::vector<WebFrame*> stack;
69 stack.push_back(web_view->mainFrame());
71 while (!stack.empty()) {
72 WebFrame* current_frame = stack.back();
73 stack.pop_back();
74 if (GURL(current_frame->document().url()) == url)
75 return current_frame;
76 WebNodeCollection all = current_frame->document().all();
77 for (WebNode node = all.firstItem();
78 !node.isNull(); node = all.nextItem()) {
79 if (!node.isElementNode())
80 continue;
81 // Check frame tag and iframe tag
82 WebElement element = node.to<WebElement>();
83 if (!element.hasTagName("frame") && !element.hasTagName("iframe"))
84 continue;
85 WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element);
86 if (sub_frame)
87 stack.push_back(sub_frame);
90 return NULL;
93 // Helper function that test whether the first node in the doc is a doc type
94 // node.
95 bool HasDocType(const WebDocument& doc) {
96 WebNode node = doc.firstChild();
97 if (node.isNull())
98 return false;
99 return node.nodeType() == WebNode::DocumentTypeNode;
102 // Helper function for checking whether input node is META tag. Return true
103 // means it is META element, otherwise return false. The parameter charset_info
104 // return actual charset info if the META tag has charset declaration.
105 bool IsMetaElement(const WebNode& node, std::string& charset_info) {
106 if (!node.isElementNode())
107 return false;
108 const WebElement meta = node.toConst<WebElement>();
109 if (!meta.hasTagName("meta"))
110 return false;
111 charset_info.erase(0, charset_info.length());
112 // Check the META charset declaration.
113 WebString httpEquiv = meta.getAttribute("http-equiv");
114 if (LowerCaseEqualsASCII(httpEquiv, "content-type")) {
115 std::string content = meta.getAttribute("content").utf8();
116 int pos = content.find("charset", 0);
117 if (pos > -1) {
118 // Add a dummy charset declaration to charset_info, which indicates this
119 // META tag has charset declaration although we do not get correct value
120 // yet.
121 charset_info.append("has-charset-declaration");
122 int remaining_length = content.length() - pos - 7;
123 if (!remaining_length)
124 return true;
125 int start_pos = pos + 7;
126 // Find "=" symbol.
127 while (remaining_length--)
128 if (content[start_pos++] == L'=')
129 break;
130 // Skip beginning space.
131 while (remaining_length) {
132 if (content[start_pos] > 0x0020)
133 break;
134 ++start_pos;
135 --remaining_length;
137 if (!remaining_length)
138 return true;
139 int end_pos = start_pos;
140 // Now we find out the start point of charset info. Search the end point.
141 while (remaining_length--) {
142 if (content[end_pos] <= 0x0020 || content[end_pos] == L';')
143 break;
144 ++end_pos;
146 // Get actual charset info.
147 charset_info = content.substr(start_pos, end_pos - start_pos);
148 return true;
151 return true;
154 class LoadObserver : public RenderViewObserver {
155 public:
156 LoadObserver(RenderView* render_view, const base::Closure& quit_closure)
157 : RenderViewObserver(render_view),
158 quit_closure_(quit_closure) {}
160 virtual void DidFinishLoad(blink::WebFrame* frame) OVERRIDE {
161 if (frame == render_view()->GetWebView()->mainFrame())
162 quit_closure_.Run();
165 private:
166 base::Closure quit_closure_;
169 class DomSerializerTests : public ContentBrowserTest,
170 public WebPageSerializerClient {
171 public:
172 DomSerializerTests()
173 : serialized_(false),
174 local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) {}
176 virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
177 command_line->AppendSwitch(switches::kSingleProcess);
178 #if defined(OS_WIN) && defined(USE_AURA)
179 // Don't want to try to create a GPU process.
180 command_line->AppendSwitch(switches::kDisableAcceleratedCompositing);
181 #endif
184 // DomSerializerDelegate.
185 virtual void didSerializeDataForFrame(const WebURL& frame_web_url,
186 const WebCString& data,
187 PageSerializationStatus status) {
189 GURL frame_url(frame_web_url);
190 // If the all frames are finished saving, check all finish status
191 if (status == WebPageSerializerClient::AllFramesAreFinished) {
192 SerializationFinishStatusMap::iterator it =
193 serialization_finish_status_.begin();
194 for (; it != serialization_finish_status_.end(); ++it)
195 ASSERT_TRUE(it->second);
196 serialized_ = true;
197 return;
200 // Check finish status of current frame.
201 SerializationFinishStatusMap::iterator it =
202 serialization_finish_status_.find(frame_url.spec());
203 // New frame, set initial status as false.
204 if (it == serialization_finish_status_.end())
205 serialization_finish_status_[frame_url.spec()] = false;
207 it = serialization_finish_status_.find(frame_url.spec());
208 ASSERT_TRUE(it != serialization_finish_status_.end());
209 // In process frame, finish status should be false.
210 ASSERT_FALSE(it->second);
212 // Add data to corresponding frame's content.
213 serialized_frame_map_[frame_url.spec()] += data.data();
215 // Current frame is completed saving, change the finish status.
216 if (status == WebPageSerializerClient::CurrentFrameIsFinished)
217 it->second = true;
220 bool HasSerializedFrame(const GURL& frame_url) {
221 return serialized_frame_map_.find(frame_url.spec()) !=
222 serialized_frame_map_.end();
225 const std::string& GetSerializedContentForFrame(
226 const GURL& frame_url) {
227 return serialized_frame_map_[frame_url.spec()];
230 RenderView* GetRenderView() {
231 // We could have the test on the UI thread get the WebContent's routing ID,
232 // but we know this will be the first RV so skip that and just hardcode it.
233 return RenderView::FromRoutingID(kRenderViewRoutingId);
236 WebView* GetWebView() {
237 return GetRenderView()->GetWebView();
240 WebFrame* GetMainFrame() {
241 return GetWebView()->mainFrame();
244 // Load web page according to input content and relative URLs within
245 // the document.
246 void LoadContents(const std::string& contents,
247 const GURL& base_url,
248 const WebString encoding_info) {
249 scoped_refptr<MessageLoopRunner> runner = new MessageLoopRunner;
250 LoadObserver observer(GetRenderView(), runner->QuitClosure());
252 // If input encoding is empty, use UTF-8 as default encoding.
253 if (encoding_info.isEmpty()) {
254 GetMainFrame()->loadHTMLString(contents, base_url);
255 } else {
256 WebData data(contents.data(), contents.length());
258 // Do not use WebFrame.LoadHTMLString because it assumes that input
259 // html contents use UTF-8 encoding.
260 // TODO(darin): This should use WebFrame::loadData.
261 WebFrame* web_frame = GetMainFrame();
263 ASSERT_TRUE(web_frame != NULL);
265 web_frame->loadData(data, "text/html", encoding_info, base_url);
268 runner->Run();
271 // Serialize page DOM according to specific page URL. The parameter
272 // recursive_serialization indicates whether we will serialize all
273 // sub-frames.
274 void SerializeDomForURL(const GURL& page_url,
275 bool recursive_serialization) {
276 // Find corresponding WebFrame according to page_url.
277 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), page_url);
278 ASSERT_TRUE(web_frame != NULL);
279 WebVector<WebURL> links;
280 links.assign(&page_url, 1);
281 WebString file_path =
282 base::FilePath(FILE_PATH_LITERAL("c:\\dummy.htm")).AsUTF16Unsafe();
283 WebVector<WebString> local_paths;
284 local_paths.assign(&file_path, 1);
285 // Start serializing DOM.
286 bool result = WebPageSerializer::serialize(web_frame,
287 recursive_serialization,
288 static_cast<WebPageSerializerClient*>(this),
289 links,
290 local_paths,
291 local_directory_name_.AsUTF16Unsafe());
292 ASSERT_TRUE(result);
293 ASSERT_TRUE(serialized_);
296 void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) {
297 // Make sure original contents have document type.
298 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
299 ASSERT_TRUE(web_frame != NULL);
300 WebDocument doc = web_frame->document();
301 ASSERT_TRUE(HasDocType(doc));
302 // Do serialization.
303 SerializeDomForURL(file_url, false);
304 // Load the serialized contents.
305 ASSERT_TRUE(HasSerializedFrame(file_url));
306 const std::string& serialized_contents =
307 GetSerializedContentForFrame(file_url);
308 LoadContents(serialized_contents, file_url,
309 web_frame->document().encoding());
310 // Make sure serialized contents still have document type.
311 web_frame = GetMainFrame();
312 doc = web_frame->document();
313 ASSERT_TRUE(HasDocType(doc));
316 void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) {
317 // Make sure original contents do not have document type.
318 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
319 ASSERT_TRUE(web_frame != NULL);
320 WebDocument doc = web_frame->document();
321 ASSERT_TRUE(!HasDocType(doc));
322 // Do serialization.
323 SerializeDomForURL(file_url, false);
324 // Load the serialized contents.
325 ASSERT_TRUE(HasSerializedFrame(file_url));
326 const std::string& serialized_contents =
327 GetSerializedContentForFrame(file_url);
328 LoadContents(serialized_contents, file_url,
329 web_frame->document().encoding());
330 // Make sure serialized contents do not have document type.
331 web_frame = GetMainFrame();
332 doc = web_frame->document();
333 ASSERT_TRUE(!HasDocType(doc));
336 void SerializeXMLDocWithBuiltInEntitiesOnRenderer(
337 const GURL& xml_file_url, const std::string& original_contents) {
338 // Do serialization.
339 SerializeDomForURL(xml_file_url, false);
340 // Compare the serialized contents with original contents.
341 ASSERT_TRUE(HasSerializedFrame(xml_file_url));
342 const std::string& serialized_contents =
343 GetSerializedContentForFrame(xml_file_url);
344 ASSERT_EQ(original_contents, serialized_contents);
347 void SerializeHTMLDOMWithAddingMOTWOnRenderer(
348 const GURL& file_url, const std::string& original_contents) {
349 // Make sure original contents does not have MOTW;
350 std::string motw_declaration =
351 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
352 ASSERT_FALSE(motw_declaration.empty());
353 // The encoding of original contents is ISO-8859-1, so we convert the MOTW
354 // declaration to ASCII and search whether original contents has it or not.
355 ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration));
357 // Do serialization.
358 SerializeDomForURL(file_url, false);
359 // Make sure the serialized contents have MOTW ;
360 ASSERT_TRUE(HasSerializedFrame(file_url));
361 const std::string& serialized_contents =
362 GetSerializedContentForFrame(file_url);
363 ASSERT_FALSE(std::string::npos ==
364 serialized_contents.find(motw_declaration));
367 void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer(
368 const GURL& file_url) {
369 // Make sure there is no META charset declaration in original document.
370 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
371 ASSERT_TRUE(web_frame != NULL);
372 WebDocument doc = web_frame->document();
373 ASSERT_TRUE(doc.isHTMLDocument());
374 WebElement head_element = doc.head();
375 ASSERT_TRUE(!head_element.isNull());
376 // Go through all children of HEAD element.
377 for (WebNode child = head_element.firstChild(); !child.isNull();
378 child = child.nextSibling()) {
379 std::string charset_info;
380 if (IsMetaElement(child, charset_info))
381 ASSERT_TRUE(charset_info.empty());
383 // Do serialization.
384 SerializeDomForURL(file_url, false);
386 // Load the serialized contents.
387 ASSERT_TRUE(HasSerializedFrame(file_url));
388 const std::string& serialized_contents =
389 GetSerializedContentForFrame(file_url);
390 LoadContents(serialized_contents, file_url,
391 web_frame->document().encoding());
392 // Make sure the first child of HEAD element is META which has charset
393 // declaration in serialized contents.
394 web_frame = GetMainFrame();
395 ASSERT_TRUE(web_frame != NULL);
396 doc = web_frame->document();
397 ASSERT_TRUE(doc.isHTMLDocument());
398 head_element = doc.head();
399 ASSERT_TRUE(!head_element.isNull());
400 WebNode meta_node = head_element.firstChild();
401 ASSERT_TRUE(!meta_node.isNull());
402 // Get meta charset info.
403 std::string charset_info2;
404 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
405 ASSERT_TRUE(!charset_info2.empty());
406 ASSERT_EQ(charset_info2,
407 std::string(web_frame->document().encoding().utf8()));
409 // Make sure no more additional META tags which have charset declaration.
410 for (WebNode child = meta_node.nextSibling(); !child.isNull();
411 child = child.nextSibling()) {
412 std::string charset_info;
413 if (IsMetaElement(child, charset_info))
414 ASSERT_TRUE(charset_info.empty());
418 void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer(
419 const GURL& file_url) {
420 // Make sure there are multiple META charset declarations in original
421 // document.
422 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
423 ASSERT_TRUE(web_frame != NULL);
424 WebDocument doc = web_frame->document();
425 ASSERT_TRUE(doc.isHTMLDocument());
426 WebElement head_ele = doc.head();
427 ASSERT_TRUE(!head_ele.isNull());
428 // Go through all children of HEAD element.
429 int charset_declaration_count = 0;
430 for (WebNode child = head_ele.firstChild(); !child.isNull();
431 child = child.nextSibling()) {
432 std::string charset_info;
433 if (IsMetaElement(child, charset_info) && !charset_info.empty())
434 charset_declaration_count++;
436 // The original doc has more than META tags which have charset declaration.
437 ASSERT_TRUE(charset_declaration_count > 1);
439 // Do serialization.
440 SerializeDomForURL(file_url, false);
442 // Load the serialized contents.
443 ASSERT_TRUE(HasSerializedFrame(file_url));
444 const std::string& serialized_contents =
445 GetSerializedContentForFrame(file_url);
446 LoadContents(serialized_contents, file_url,
447 web_frame->document().encoding());
448 // Make sure only first child of HEAD element is META which has charset
449 // declaration in serialized contents.
450 web_frame = GetMainFrame();
451 ASSERT_TRUE(web_frame != NULL);
452 doc = web_frame->document();
453 ASSERT_TRUE(doc.isHTMLDocument());
454 head_ele = doc.head();
455 ASSERT_TRUE(!head_ele.isNull());
456 WebNode meta_node = head_ele.firstChild();
457 ASSERT_TRUE(!meta_node.isNull());
458 // Get meta charset info.
459 std::string charset_info2;
460 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
461 ASSERT_TRUE(!charset_info2.empty());
462 ASSERT_EQ(charset_info2,
463 std::string(web_frame->document().encoding().utf8()));
465 // Make sure no more additional META tags which have charset declaration.
466 for (WebNode child = meta_node.nextSibling(); !child.isNull();
467 child = child.nextSibling()) {
468 std::string charset_info;
469 if (IsMetaElement(child, charset_info))
470 ASSERT_TRUE(charset_info.empty());
474 void SerializeHTMLDOMWithEntitiesInTextOnRenderer() {
475 base::FilePath page_file_path = GetTestFilePath(
476 "dom_serializer", "dom_serializer/htmlentities_in_text.htm");
477 // Get file URL. The URL is dummy URL to identify the following loading
478 // actions. The test content is in constant:original_contents.
479 GURL file_url = net::FilePathToFileURL(page_file_path);
480 ASSERT_TRUE(file_url.SchemeIsFile());
481 // Test contents.
482 static const char* const original_contents =
483 "<html><body>&amp;&lt;&gt;\"\'</body></html>";
484 // Load the test contents.
485 LoadContents(original_contents, file_url, WebString());
487 // Get BODY's text content in DOM.
488 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
489 ASSERT_TRUE(web_frame != NULL);
490 WebDocument doc = web_frame->document();
491 ASSERT_TRUE(doc.isHTMLDocument());
492 WebElement body_ele = doc.body();
493 ASSERT_TRUE(!body_ele.isNull());
494 WebNode text_node = body_ele.firstChild();
495 ASSERT_TRUE(text_node.isTextNode());
496 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) ==
497 "&amp;&lt;&gt;\"\'");
498 // Do serialization.
499 SerializeDomForURL(file_url, false);
500 // Compare the serialized contents with original contents.
501 ASSERT_TRUE(HasSerializedFrame(file_url));
502 const std::string& serialized_contents =
503 GetSerializedContentForFrame(file_url);
504 // Compare the serialized contents with original contents to make sure
505 // they are same.
506 // Because we add MOTW when serializing DOM, so before comparison, we also
507 // need to add MOTW to original_contents.
508 std::string original_str =
509 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
510 original_str += original_contents;
511 // Since WebCore now inserts a new HEAD element if there is no HEAD element
512 // when creating BODY element. (Please see
513 // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and
514 // corresponding META content if we find WebCore-generated HEAD element.
515 if (!doc.head().isNull()) {
516 WebString encoding = web_frame->document().encoding();
517 std::string htmlTag("<html>");
518 std::string::size_type pos = original_str.find(htmlTag);
519 ASSERT_NE(std::string::npos, pos);
520 pos += htmlTag.length();
521 std::string head_part("<head>");
522 head_part +=
523 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
524 head_part += "</head>";
525 original_str.insert(pos, head_part);
527 ASSERT_EQ(original_str, serialized_contents);
530 void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() {
531 base::FilePath page_file_path = GetTestFilePath(
532 "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm");
533 // Get file URL. The URL is dummy URL to identify the following loading
534 // actions. The test content is in constant:original_contents.
535 GURL file_url = net::FilePathToFileURL(page_file_path);
536 ASSERT_TRUE(file_url.SchemeIsFile());
537 // Test contents.
538 static const char* const original_contents =
539 "<html><body title=\"&amp;&lt;&gt;&quot;&#39;\"></body></html>";
540 // Load the test contents.
541 LoadContents(original_contents, file_url, WebString());
542 // Get value of BODY's title attribute in DOM.
543 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
544 ASSERT_TRUE(web_frame != NULL);
545 WebDocument doc = web_frame->document();
546 ASSERT_TRUE(doc.isHTMLDocument());
547 WebElement body_ele = doc.body();
548 ASSERT_TRUE(!body_ele.isNull());
549 WebString value = body_ele.getAttribute("title");
550 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'");
551 // Do serialization.
552 SerializeDomForURL(file_url, false);
553 // Compare the serialized contents with original contents.
554 ASSERT_TRUE(HasSerializedFrame(file_url));
555 const std::string& serialized_contents =
556 GetSerializedContentForFrame(file_url);
557 // Compare the serialized contents with original contents to make sure
558 // they are same.
559 std::string original_str =
560 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
561 original_str += original_contents;
562 if (!doc.isNull()) {
563 WebString encoding = web_frame->document().encoding();
564 std::string htmlTag("<html>");
565 std::string::size_type pos = original_str.find(htmlTag);
566 ASSERT_NE(std::string::npos, pos);
567 pos += htmlTag.length();
568 std::string head_part("<head>");
569 head_part +=
570 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
571 head_part += "</head>";
572 original_str.insert(pos, head_part);
574 ASSERT_EQ(original_str, serialized_contents);
577 void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) {
578 // Get value of BODY's title attribute in DOM.
579 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
580 WebDocument doc = web_frame->document();
581 ASSERT_TRUE(doc.isHTMLDocument());
582 WebElement body_element = doc.body();
583 // Unescaped string for "&percnt;&nsup;&sup1;&apos;".
584 static const wchar_t parsed_value[] = {
585 '%', 0x2285, 0x00b9, '\'', 0
587 WebString value = body_element.getAttribute("title");
588 ASSERT_TRUE(base::UTF16ToWide(value) == parsed_value);
589 ASSERT_TRUE(base::UTF16ToWide(body_element.innerText()) == parsed_value);
591 // Do serialization.
592 SerializeDomForURL(file_url, false);
593 // Check the serialized string.
594 ASSERT_TRUE(HasSerializedFrame(file_url));
595 const std::string& serialized_contents =
596 GetSerializedContentForFrame(file_url);
597 // Confirm that the serialized string has no non-standard HTML entities.
598 ASSERT_EQ(std::string::npos, serialized_contents.find("&percnt;"));
599 ASSERT_EQ(std::string::npos, serialized_contents.find("&nsup;"));
600 ASSERT_EQ(std::string::npos, serialized_contents.find("&sup1;"));
601 ASSERT_EQ(std::string::npos, serialized_contents.find("&apos;"));
604 void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url,
605 const GURL& path_dir_url) {
606 // There are total 2 available base tags in this test file.
607 const int kTotalBaseTagCountInTestFile = 2;
609 // Since for this test, we assume there is no savable sub-resource links for
610 // this test file, also all links are relative URLs in this test file, so we
611 // need to check those relative URLs and make sure document has BASE tag.
612 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
613 ASSERT_TRUE(web_frame != NULL);
614 WebDocument doc = web_frame->document();
615 ASSERT_TRUE(doc.isHTMLDocument());
616 // Go through all descent nodes.
617 WebNodeCollection all = doc.all();
618 int original_base_tag_count = 0;
619 for (WebNode node = all.firstItem(); !node.isNull();
620 node = all.nextItem()) {
621 if (!node.isElementNode())
622 continue;
623 WebElement element = node.to<WebElement>();
624 if (element.hasTagName("base")) {
625 original_base_tag_count++;
626 } else {
627 // Get link.
628 WebString value = GetSubResourceLinkFromElement(element);
629 if (value.isNull() && element.hasTagName("a")) {
630 value = element.getAttribute("href");
631 if (value.isEmpty())
632 value = WebString();
634 // Each link is relative link.
635 if (!value.isNull()) {
636 GURL link(value.utf8());
637 ASSERT_TRUE(link.scheme().empty());
641 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile);
642 // Make sure in original document, the base URL is not equal with the
643 // |path_dir_url|.
644 GURL original_base_url(doc.baseURL());
645 ASSERT_NE(original_base_url, path_dir_url);
647 // Do serialization.
648 SerializeDomForURL(file_url, false);
650 // Load the serialized contents.
651 ASSERT_TRUE(HasSerializedFrame(file_url));
652 const std::string& serialized_contents =
653 GetSerializedContentForFrame(file_url);
654 LoadContents(serialized_contents, file_url,
655 web_frame->document().encoding());
657 // Make sure all links are absolute URLs and doc there are some number of
658 // BASE tags in serialized HTML data. Each of those BASE tags have same base
659 // URL which is as same as URL of current test file.
660 web_frame = GetMainFrame();
661 ASSERT_TRUE(web_frame != NULL);
662 doc = web_frame->document();
663 ASSERT_TRUE(doc.isHTMLDocument());
664 // Go through all descent nodes.
665 all = doc.all();
666 int new_base_tag_count = 0;
667 for (WebNode node = all.firstItem(); !node.isNull();
668 node = all.nextItem()) {
669 if (!node.isElementNode())
670 continue;
671 WebElement element = node.to<WebElement>();
672 if (element.hasTagName("base")) {
673 new_base_tag_count++;
674 } else {
675 // Get link.
676 WebString value = GetSubResourceLinkFromElement(element);
677 if (value.isNull() && element.hasTagName("a")) {
678 value = element.getAttribute("href");
679 if (value.isEmpty())
680 value = WebString();
682 // Each link is absolute link.
683 if (!value.isNull()) {
684 GURL link(std::string(value.utf8()));
685 ASSERT_FALSE(link.scheme().empty());
689 // We have one more added BASE tag which is generated by JavaScript.
690 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1);
691 // Make sure in new document, the base URL is equal with the |path_dir_url|.
692 GURL new_base_url(doc.baseURL());
693 ASSERT_EQ(new_base_url, path_dir_url);
696 void SerializeHTMLDOMWithEmptyHeadOnRenderer() {
697 base::FilePath page_file_path = GetTestFilePath(
698 "dom_serializer", "empty_head.htm");
699 GURL file_url = net::FilePathToFileURL(page_file_path);
700 ASSERT_TRUE(file_url.SchemeIsFile());
702 // Load the test html content.
703 static const char* const empty_head_contents =
704 "<html><head></head><body>hello world</body></html>";
705 LoadContents(empty_head_contents, file_url, WebString());
707 // Make sure the head tag is empty.
708 WebFrame* web_frame = GetMainFrame();
709 ASSERT_TRUE(web_frame != NULL);
710 WebDocument doc = web_frame->document();
711 ASSERT_TRUE(doc.isHTMLDocument());
712 WebElement head_element = doc.head();
713 ASSERT_TRUE(!head_element.isNull());
714 ASSERT_TRUE(!head_element.hasChildNodes());
715 ASSERT_TRUE(head_element.childNodes().length() == 0);
717 // Do serialization.
718 SerializeDomForURL(file_url, false);
719 // Make sure the serialized contents have META ;
720 ASSERT_TRUE(HasSerializedFrame(file_url));
721 const std::string& serialized_contents =
722 GetSerializedContentForFrame(file_url);
724 // Reload serialized contents and make sure there is only one META tag.
725 LoadContents(serialized_contents, file_url,
726 web_frame->document().encoding());
727 web_frame = GetMainFrame();
728 ASSERT_TRUE(web_frame != NULL);
729 doc = web_frame->document();
730 ASSERT_TRUE(doc.isHTMLDocument());
731 head_element = doc.head();
732 ASSERT_TRUE(!head_element.isNull());
733 ASSERT_TRUE(head_element.hasChildNodes());
734 ASSERT_TRUE(head_element.childNodes().length() == 1);
735 WebNode meta_node = head_element.firstChild();
736 ASSERT_TRUE(!meta_node.isNull());
737 // Get meta charset info.
738 std::string charset_info;
739 ASSERT_TRUE(IsMetaElement(meta_node, charset_info));
740 ASSERT_TRUE(!charset_info.empty());
741 ASSERT_EQ(charset_info,
742 std::string(web_frame->document().encoding().utf8()));
744 // Check the body's first node is text node and its contents are
745 // "hello world"
746 WebElement body_element = doc.body();
747 ASSERT_TRUE(!body_element.isNull());
748 WebNode text_node = body_element.firstChild();
749 ASSERT_TRUE(text_node.isTextNode());
750 WebString text_node_contents = text_node.nodeValue();
751 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world");
754 void SerializeDocumentWithDownloadedIFrameOnRenderer(const GURL& file_url) {
755 // Do a recursive serialization. We pass if we don't crash.
756 SerializeDomForURL(file_url, true);
759 void SubResourceForElementsInNonHTMLNamespaceOnRenderer(
760 const GURL& file_url) {
761 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
762 ASSERT_TRUE(web_frame != NULL);
763 WebDocument doc = web_frame->document();
764 WebNode lastNodeInBody = doc.body().lastChild();
765 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType());
766 WebString uri = GetSubResourceLinkFromElement(
767 lastNodeInBody.to<WebElement>());
768 EXPECT_TRUE(uri.isNull());
771 private:
772 // Map frame_url to corresponding serialized_content.
773 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap;
774 SerializedFrameContentMap serialized_frame_map_;
775 // Map frame_url to corresponding status of serialization finish.
776 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap;
777 SerializationFinishStatusMap serialization_finish_status_;
778 // Flag indicates whether the process of serializing DOM is finished or not.
779 bool serialized_;
780 // The local_directory_name_ is dummy relative path of directory which
781 // contain all saved auxiliary files included all sub frames and resources.
782 const base::FilePath local_directory_name_;
785 // If original contents have document type, the serialized contents also have
786 // document type.
787 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) {
788 base::FilePath page_file_path =
789 GetTestFilePath("dom_serializer", "youtube_1.htm");
790 GURL file_url = net::FilePathToFileURL(page_file_path);
791 ASSERT_TRUE(file_url.SchemeIsFile());
792 // Load the test file.
793 NavigateToURL(shell(), file_url);
795 PostTaskToInProcessRendererAndWait(
796 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer,
797 base::Unretained(this), file_url));
800 // If original contents do not have document type, the serialized contents
801 // also do not have document type.
802 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) {
803 base::FilePath page_file_path =
804 GetTestFilePath("dom_serializer", "youtube_2.htm");
805 GURL file_url = net::FilePathToFileURL(page_file_path);
806 ASSERT_TRUE(file_url.SchemeIsFile());
807 // Load the test file.
808 NavigateToURL(shell(), file_url);
810 PostTaskToInProcessRendererAndWait(
811 base::Bind(
812 &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer,
813 base::Unretained(this), file_url));
816 // Serialize XML document which has all 5 built-in entities. After
817 // finishing serialization, the serialized contents should be same
818 // with original XML document.
820 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
821 // XML headers are handled differently in the merged serializer.
822 // Bug: http://crbug.com/328354
823 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
824 DISABLED_SerializeXMLDocWithBuiltInEntities) {
825 base::FilePath page_file_path =
826 GetTestFilePath("dom_serializer", "note.html");
827 base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml");
828 // Read original contents for later comparison.
829 std::string original_contents;
830 ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents));
831 // Get file URL.
832 GURL file_url = net::FilePathToFileURL(page_file_path);
833 GURL xml_file_url = net::FilePathToFileURL(xml_file_path);
834 ASSERT_TRUE(file_url.SchemeIsFile());
835 // Load the test file.
836 NavigateToURL(shell(), file_url);
838 PostTaskToInProcessRendererAndWait(
839 base::Bind(
840 &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer,
841 base::Unretained(this), xml_file_url, original_contents));
844 // When serializing DOM, we add MOTW declaration before html tag.
845 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) {
846 base::FilePath page_file_path =
847 GetTestFilePath("dom_serializer", "youtube_2.htm");
848 // Read original contents for later comparison .
849 std::string original_contents;
850 ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents));
851 // Get file URL.
852 GURL file_url = net::FilePathToFileURL(page_file_path);
853 ASSERT_TRUE(file_url.SchemeIsFile());
855 // Load the test file.
856 NavigateToURL(shell(), file_url);
858 PostTaskToInProcessRendererAndWait(
859 base::Bind(
860 &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer,
861 base::Unretained(this), file_url, original_contents));
864 // When serializing DOM, we will add the META which have correct charset
865 // declaration as first child of HEAD element for resolving WebKit bug:
866 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
867 // does not have META charset declaration.
868 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
869 SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
870 base::FilePath page_file_path =
871 GetTestFilePath("dom_serializer", "youtube_1.htm");
872 // Get file URL.
873 GURL file_url = net::FilePathToFileURL(page_file_path);
874 ASSERT_TRUE(file_url.SchemeIsFile());
875 // Load the test file.
876 NavigateToURL(shell(), file_url);
878 PostTaskToInProcessRendererAndWait(
879 base::Bind(
880 &DomSerializerTests::
881 SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer,
882 base::Unretained(this), file_url));
885 // When serializing DOM, if the original document has multiple META charset
886 // declaration, we will add the META which have correct charset declaration
887 // as first child of HEAD element and remove all original META charset
888 // declarations.
889 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
890 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) {
891 base::FilePath page_file_path =
892 GetTestFilePath("dom_serializer", "youtube_2.htm");
893 // Get file URL.
894 GURL file_url = net::FilePathToFileURL(page_file_path);
895 ASSERT_TRUE(file_url.SchemeIsFile());
896 // Load the test file.
897 NavigateToURL(shell(), file_url);
899 PostTaskToInProcessRendererAndWait(
900 base::Bind(
901 &DomSerializerTests::
902 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer,
903 base::Unretained(this), file_url));
906 // Test situation of html entities in text when serializing HTML DOM.
907 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) {
908 // Need to spin up the renderer and also navigate to a file url so that the
909 // renderer code doesn't attempt a fork when it sees a load to file scheme
910 // from non-file scheme.
911 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
913 PostTaskToInProcessRendererAndWait(
914 base::Bind(
915 &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer,
916 base::Unretained(this)));
919 // Test situation of html entities in attribute value when serializing
920 // HTML DOM.
921 // This test started to fail at WebKit r65388. See http://crbug.com/52279.
923 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
924 // Some attributes are handled differently in the merged serializer.
925 // Bug: http://crbug.com/328354
926 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
927 DISABLED_SerializeHTMLDOMWithEntitiesInAttributeValue) {
928 // Need to spin up the renderer and also navigate to a file url so that the
929 // renderer code doesn't attempt a fork when it sees a load to file scheme
930 // from non-file scheme.
931 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
933 PostTaskToInProcessRendererAndWait(
934 base::Bind(
935 &DomSerializerTests::
936 SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer,
937 base::Unretained(this)));
940 // Test situation of non-standard HTML entities when serializing HTML DOM.
941 // This test started to fail at WebKit r65351. See http://crbug.com/52279.
942 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
943 SerializeHTMLDOMWithNonStandardEntities) {
944 // Make a test file URL and load it.
945 base::FilePath page_file_path = GetTestFilePath(
946 "dom_serializer", "nonstandard_htmlentities.htm");
947 GURL file_url = net::FilePathToFileURL(page_file_path);
948 NavigateToURL(shell(), file_url);
950 PostTaskToInProcessRendererAndWait(
951 base::Bind(
952 &DomSerializerTests::
953 SerializeHTMLDOMWithNonStandardEntitiesOnRenderer,
954 base::Unretained(this), file_url));
957 // Test situation of BASE tag in original document when serializing HTML DOM.
958 // When serializing, we should comment the BASE tag, append a new BASE tag.
959 // rewrite all the savable URLs to relative local path, and change other URLs
960 // to absolute URLs.
962 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
963 // Base tags are handled a bit different in merged version.
964 // Bug: http://crbug.com/328354
965 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
966 DISABLED_SerializeHTMLDOMWithBaseTag) {
967 base::FilePath page_file_path = GetTestFilePath(
968 "dom_serializer", "html_doc_has_base_tag.htm");
970 // Get page dir URL which is base URL of this file.
971 base::FilePath dir_name = page_file_path.DirName();
972 dir_name = dir_name.Append(
973 base::FilePath::StringType(base::FilePath::kSeparators[0], 1));
974 GURL path_dir_url = net::FilePathToFileURL(dir_name);
976 // Get file URL.
977 GURL file_url = net::FilePathToFileURL(page_file_path);
978 ASSERT_TRUE(file_url.SchemeIsFile());
979 // Load the test file.
980 NavigateToURL(shell(), file_url);
982 PostTaskToInProcessRendererAndWait(
983 base::Bind(
984 &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer,
985 base::Unretained(this), file_url, path_dir_url));
988 // Serializing page which has an empty HEAD tag.
989 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) {
990 // Need to spin up the renderer and also navigate to a file url so that the
991 // renderer code doesn't attempt a fork when it sees a load to file scheme
992 // from non-file scheme.
993 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
995 PostTaskToInProcessRendererAndWait(
996 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer,
997 base::Unretained(this)));
1000 // Test that we don't crash when the page contains an iframe that
1001 // was handled as a download (http://crbug.com/42212).
1002 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
1003 SerializeDocumentWithDownloadedIFrame) {
1004 base::FilePath page_file_path = GetTestFilePath(
1005 "dom_serializer", "iframe-src-is-exe.htm");
1006 GURL file_url = net::FilePathToFileURL(page_file_path);
1007 ASSERT_TRUE(file_url.SchemeIsFile());
1008 // Load the test file.
1009 NavigateToURL(shell(), file_url);
1011 PostTaskToInProcessRendererAndWait(
1012 base::Bind(
1013 &DomSerializerTests::
1014 SerializeDocumentWithDownloadedIFrameOnRenderer,
1015 base::Unretained(this), file_url));
1018 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
1019 SubResourceForElementsInNonHTMLNamespace) {
1020 base::FilePath page_file_path = GetTestFilePath(
1021 "dom_serializer", "non_html_namespace.htm");
1022 GURL file_url = net::FilePathToFileURL(page_file_path);
1023 NavigateToURL(shell(), file_url);
1025 PostTaskToInProcessRendererAndWait(
1026 base::Bind(
1027 &DomSerializerTests::
1028 SubResourceForElementsInNonHTMLNamespaceOnRenderer,
1029 base::Unretained(this), file_url));
1032 } // namespace content