Add ICU message format support
[chromium-blink-merge.git] / content / renderer / dom_serializer_browsertest.cc
blob78dbe9103e68b0325b3763ba1c9e80331b2b05a3
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/bind.h"
6 #include "base/command_line.h"
7 #include "base/compiler_specific.h"
8 #include "base/containers/hash_tables.h"
9 #include "base/files/file_path.h"
10 #include "base/files/file_util.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/public/renderer/render_view.h"
15 #include "content/public/renderer/render_view_observer.h"
16 #include "content/public/test/content_browser_test.h"
17 #include "content/public/test/content_browser_test_utils.h"
18 #include "content/public/test/test_utils.h"
19 #include "content/renderer/savable_resources.h"
20 #include "content/shell/browser/shell.h"
21 #include "net/base/filename_util.h"
22 #include "net/url_request/url_request_context.h"
23 #include "third_party/WebKit/public/platform/WebCString.h"
24 #include "third_party/WebKit/public/platform/WebData.h"
25 #include "third_party/WebKit/public/platform/WebString.h"
26 #include "third_party/WebKit/public/platform/WebURL.h"
27 #include "third_party/WebKit/public/platform/WebVector.h"
28 #include "third_party/WebKit/public/web/WebDocument.h"
29 #include "third_party/WebKit/public/web/WebElement.h"
30 #include "third_party/WebKit/public/web/WebElementCollection.h"
31 #include "third_party/WebKit/public/web/WebLocalFrame.h"
32 #include "third_party/WebKit/public/web/WebNode.h"
33 #include "third_party/WebKit/public/web/WebNodeList.h"
34 #include "third_party/WebKit/public/web/WebPageSerializer.h"
35 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
36 #include "third_party/WebKit/public/web/WebView.h"
38 using blink::WebCString;
39 using blink::WebData;
40 using blink::WebDocument;
41 using blink::WebElement;
42 using blink::WebElementCollection;
43 using blink::WebFrame;
44 using blink::WebLocalFrame;
45 using blink::WebNode;
46 using blink::WebNodeList;
47 using blink::WebPageSerializer;
48 using blink::WebPageSerializerClient;
49 using blink::WebString;
50 using blink::WebURL;
51 using blink::WebView;
52 using blink::WebVector;
54 namespace {
56 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
57 const int kRenderViewRoutingId = 2;
61 namespace content {
63 // Iterate recursively over sub-frames to find one with with a given url.
64 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) {
65 if (!web_view->mainFrame())
66 return NULL;
68 std::vector<WebFrame*> stack;
69 stack.push_back(web_view->mainFrame());
71 while (!stack.empty()) {
72 WebFrame* current_frame = stack.back();
73 stack.pop_back();
74 if (GURL(current_frame->document().url()) == url)
75 return current_frame;
76 WebElementCollection all = current_frame->document().all();
77 for (WebElement element = all.firstItem();
78 !element.isNull(); element = all.nextItem()) {
79 // Check frame tag and iframe tag
80 if (!element.hasHTMLTagName("frame") && !element.hasHTMLTagName("iframe"))
81 continue;
82 WebFrame* sub_frame = WebLocalFrame::fromFrameOwnerElement(element);
83 if (sub_frame)
84 stack.push_back(sub_frame);
87 return NULL;
90 // Helper function that test whether the first node in the doc is a doc type
91 // node.
92 bool HasDocType(const WebDocument& doc) {
93 WebNode node = doc.firstChild();
94 if (node.isNull())
95 return false;
96 return node.nodeType() == WebNode::DocumentTypeNode;
99 // Helper function for checking whether input node is META tag. Return true
100 // means it is META element, otherwise return false. The parameter charset_info
101 // return actual charset info if the META tag has charset declaration.
102 bool IsMetaElement(const WebNode& node, std::string& charset_info) {
103 if (!node.isElementNode())
104 return false;
105 const WebElement meta = node.toConst<WebElement>();
106 if (!meta.hasHTMLTagName("meta"))
107 return false;
108 charset_info.erase(0, charset_info.length());
109 // Check the META charset declaration.
110 WebString httpEquiv = meta.getAttribute("http-equiv");
111 if (base::LowerCaseEqualsASCII(base::StringPiece16(httpEquiv),
112 "content-type")) {
113 std::string content = meta.getAttribute("content").utf8();
114 int pos = content.find("charset", 0);
115 if (pos > -1) {
116 // Add a dummy charset declaration to charset_info, which indicates this
117 // META tag has charset declaration although we do not get correct value
118 // yet.
119 charset_info.append("has-charset-declaration");
120 int remaining_length = content.length() - pos - 7;
121 if (!remaining_length)
122 return true;
123 int start_pos = pos + 7;
124 // Find "=" symbol.
125 while (remaining_length--)
126 if (content[start_pos++] == L'=')
127 break;
128 // Skip beginning space.
129 while (remaining_length) {
130 if (content[start_pos] > 0x0020)
131 break;
132 ++start_pos;
133 --remaining_length;
135 if (!remaining_length)
136 return true;
137 int end_pos = start_pos;
138 // Now we find out the start point of charset info. Search the end point.
139 while (remaining_length--) {
140 if (content[end_pos] <= 0x0020 || content[end_pos] == L';')
141 break;
142 ++end_pos;
144 // Get actual charset info.
145 charset_info = content.substr(start_pos, end_pos - start_pos);
146 return true;
149 return true;
152 class LoadObserver : public RenderViewObserver {
153 public:
154 LoadObserver(RenderView* render_view, const base::Closure& quit_closure)
155 : RenderViewObserver(render_view),
156 quit_closure_(quit_closure) {}
158 void DidFinishLoad(blink::WebLocalFrame* frame) override {
159 if (frame == render_view()->GetWebView()->mainFrame())
160 quit_closure_.Run();
163 private:
164 base::Closure quit_closure_;
167 class DomSerializerTests : public ContentBrowserTest,
168 public WebPageSerializerClient {
169 public:
170 DomSerializerTests()
171 : serialized_(false),
172 local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) {}
174 void SetUpCommandLine(base::CommandLine* command_line) override {
175 command_line->AppendSwitch(switches::kSingleProcess);
176 #if defined(OS_WIN)
177 // Don't want to try to create a GPU process.
178 command_line->AppendSwitch(switches::kDisableGpu);
179 #endif
182 // DomSerializerDelegate.
183 virtual void didSerializeDataForFrame(const WebURL& frame_web_url,
184 const WebCString& data,
185 PageSerializationStatus status) {
187 GURL frame_url(frame_web_url);
188 // If the all frames are finished saving, check all finish status
189 if (status == WebPageSerializerClient::AllFramesAreFinished) {
190 SerializationFinishStatusMap::iterator it =
191 serialization_finish_status_.begin();
192 for (; it != serialization_finish_status_.end(); ++it)
193 ASSERT_TRUE(it->second);
194 serialized_ = true;
195 return;
198 // Check finish status of current frame.
199 SerializationFinishStatusMap::iterator it =
200 serialization_finish_status_.find(frame_url.spec());
201 // New frame, set initial status as false.
202 if (it == serialization_finish_status_.end())
203 serialization_finish_status_[frame_url.spec()] = false;
205 it = serialization_finish_status_.find(frame_url.spec());
206 ASSERT_TRUE(it != serialization_finish_status_.end());
207 // In process frame, finish status should be false.
208 ASSERT_FALSE(it->second);
210 // Add data to corresponding frame's content.
211 serialized_frame_map_[frame_url.spec()] += data.data();
213 // Current frame is completed saving, change the finish status.
214 if (status == WebPageSerializerClient::CurrentFrameIsFinished)
215 it->second = true;
218 bool HasSerializedFrame(const GURL& frame_url) {
219 return serialized_frame_map_.find(frame_url.spec()) !=
220 serialized_frame_map_.end();
223 const std::string& GetSerializedContentForFrame(
224 const GURL& frame_url) {
225 return serialized_frame_map_[frame_url.spec()];
228 RenderView* GetRenderView() {
229 // We could have the test on the UI thread get the WebContent's routing ID,
230 // but we know this will be the first RV so skip that and just hardcode it.
231 return RenderView::FromRoutingID(kRenderViewRoutingId);
234 WebView* GetWebView() {
235 return GetRenderView()->GetWebView();
238 WebFrame* GetMainFrame() {
239 return GetWebView()->mainFrame();
242 // Load web page according to input content and relative URLs within
243 // the document.
244 void LoadContents(const std::string& contents,
245 const GURL& base_url,
246 const WebString encoding_info) {
247 scoped_refptr<MessageLoopRunner> runner = new MessageLoopRunner;
248 LoadObserver observer(GetRenderView(), runner->QuitClosure());
250 // If input encoding is empty, use UTF-8 as default encoding.
251 if (encoding_info.isEmpty()) {
252 GetMainFrame()->loadHTMLString(contents, base_url);
253 } else {
254 WebData data(contents.data(), contents.length());
256 // Do not use WebFrame.LoadHTMLString because it assumes that input
257 // html contents use UTF-8 encoding.
258 // TODO(darin): This should use WebFrame::loadData.
259 WebFrame* web_frame = GetMainFrame();
261 ASSERT_TRUE(web_frame != NULL);
263 web_frame->loadData(data, "text/html", encoding_info, base_url);
266 runner->Run();
269 // Serialize page DOM according to specific page URL. The parameter
270 // recursive_serialization indicates whether we will serialize all
271 // sub-frames.
272 void SerializeDomForURL(const GURL& page_url,
273 bool recursive_serialization) {
274 // Find corresponding WebFrame according to page_url.
275 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), page_url);
276 ASSERT_TRUE(web_frame != NULL);
277 WebVector<WebURL> links;
278 links.assign(&page_url, 1);
279 WebString file_path =
280 base::FilePath(FILE_PATH_LITERAL("c:\\dummy.htm")).AsUTF16Unsafe();
281 WebVector<WebString> local_paths;
282 local_paths.assign(&file_path, 1);
283 // Start serializing DOM.
284 bool result = WebPageSerializer::serialize(web_frame->toWebLocalFrame(),
285 recursive_serialization,
286 static_cast<WebPageSerializerClient*>(this),
287 links,
288 local_paths,
289 local_directory_name_.AsUTF16Unsafe());
290 ASSERT_TRUE(result);
291 ASSERT_TRUE(serialized_);
294 void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) {
295 // Make sure original contents have document type.
296 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
297 ASSERT_TRUE(web_frame != NULL);
298 WebDocument doc = web_frame->document();
299 ASSERT_TRUE(HasDocType(doc));
300 // Do serialization.
301 SerializeDomForURL(file_url, false);
302 // Load the serialized contents.
303 ASSERT_TRUE(HasSerializedFrame(file_url));
304 const std::string& serialized_contents =
305 GetSerializedContentForFrame(file_url);
306 LoadContents(serialized_contents, file_url,
307 web_frame->document().encoding());
308 // Make sure serialized contents still have document type.
309 web_frame = GetMainFrame();
310 doc = web_frame->document();
311 ASSERT_TRUE(HasDocType(doc));
314 void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) {
315 // Make sure original contents do not have document type.
316 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
317 ASSERT_TRUE(web_frame != NULL);
318 WebDocument doc = web_frame->document();
319 ASSERT_TRUE(!HasDocType(doc));
320 // Do serialization.
321 SerializeDomForURL(file_url, false);
322 // Load the serialized contents.
323 ASSERT_TRUE(HasSerializedFrame(file_url));
324 const std::string& serialized_contents =
325 GetSerializedContentForFrame(file_url);
326 LoadContents(serialized_contents, file_url,
327 web_frame->document().encoding());
328 // Make sure serialized contents do not have document type.
329 web_frame = GetMainFrame();
330 doc = web_frame->document();
331 ASSERT_TRUE(!HasDocType(doc));
334 void SerializeXMLDocWithBuiltInEntitiesOnRenderer(
335 const GURL& xml_file_url, const std::string& original_contents) {
336 // Do serialization.
337 SerializeDomForURL(xml_file_url, false);
338 // Compare the serialized contents with original contents.
339 ASSERT_TRUE(HasSerializedFrame(xml_file_url));
340 const std::string& serialized_contents =
341 GetSerializedContentForFrame(xml_file_url);
342 ASSERT_EQ(original_contents, serialized_contents);
345 void SerializeHTMLDOMWithAddingMOTWOnRenderer(
346 const GURL& file_url, const std::string& original_contents) {
347 // Make sure original contents does not have MOTW;
348 std::string motw_declaration =
349 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
350 ASSERT_FALSE(motw_declaration.empty());
351 // The encoding of original contents is ISO-8859-1, so we convert the MOTW
352 // declaration to ASCII and search whether original contents has it or not.
353 ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration));
355 // Do serialization.
356 SerializeDomForURL(file_url, false);
357 // Make sure the serialized contents have MOTW ;
358 ASSERT_TRUE(HasSerializedFrame(file_url));
359 const std::string& serialized_contents =
360 GetSerializedContentForFrame(file_url);
361 ASSERT_FALSE(std::string::npos ==
362 serialized_contents.find(motw_declaration));
365 void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer(
366 const GURL& file_url) {
367 // Make sure there is no META charset declaration in original document.
368 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
369 ASSERT_TRUE(web_frame != NULL);
370 WebDocument doc = web_frame->document();
371 ASSERT_TRUE(doc.isHTMLDocument());
372 WebElement head_element = doc.head();
373 ASSERT_TRUE(!head_element.isNull());
374 // Go through all children of HEAD element.
375 for (WebNode child = head_element.firstChild(); !child.isNull();
376 child = child.nextSibling()) {
377 std::string charset_info;
378 if (IsMetaElement(child, charset_info))
379 ASSERT_TRUE(charset_info.empty());
381 // Do serialization.
382 SerializeDomForURL(file_url, false);
384 // Load the serialized contents.
385 ASSERT_TRUE(HasSerializedFrame(file_url));
386 const std::string& serialized_contents =
387 GetSerializedContentForFrame(file_url);
388 LoadContents(serialized_contents, file_url,
389 web_frame->document().encoding());
390 // Make sure the first child of HEAD element is META which has charset
391 // declaration in serialized contents.
392 web_frame = GetMainFrame();
393 ASSERT_TRUE(web_frame != NULL);
394 doc = web_frame->document();
395 ASSERT_TRUE(doc.isHTMLDocument());
396 head_element = doc.head();
397 ASSERT_TRUE(!head_element.isNull());
398 WebNode meta_node = head_element.firstChild();
399 ASSERT_TRUE(!meta_node.isNull());
400 // Get meta charset info.
401 std::string charset_info2;
402 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
403 ASSERT_TRUE(!charset_info2.empty());
404 ASSERT_EQ(charset_info2,
405 std::string(web_frame->document().encoding().utf8()));
407 // Make sure no more additional META tags which have charset declaration.
408 for (WebNode child = meta_node.nextSibling(); !child.isNull();
409 child = child.nextSibling()) {
410 std::string charset_info;
411 if (IsMetaElement(child, charset_info))
412 ASSERT_TRUE(charset_info.empty());
416 void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer(
417 const GURL& file_url) {
418 // Make sure there are multiple META charset declarations in original
419 // document.
420 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
421 ASSERT_TRUE(web_frame != NULL);
422 WebDocument doc = web_frame->document();
423 ASSERT_TRUE(doc.isHTMLDocument());
424 WebElement head_ele = doc.head();
425 ASSERT_TRUE(!head_ele.isNull());
426 // Go through all children of HEAD element.
427 int charset_declaration_count = 0;
428 for (WebNode child = head_ele.firstChild(); !child.isNull();
429 child = child.nextSibling()) {
430 std::string charset_info;
431 if (IsMetaElement(child, charset_info) && !charset_info.empty())
432 charset_declaration_count++;
434 // The original doc has more than META tags which have charset declaration.
435 ASSERT_TRUE(charset_declaration_count > 1);
437 // Do serialization.
438 SerializeDomForURL(file_url, false);
440 // Load the serialized contents.
441 ASSERT_TRUE(HasSerializedFrame(file_url));
442 const std::string& serialized_contents =
443 GetSerializedContentForFrame(file_url);
444 LoadContents(serialized_contents, file_url,
445 web_frame->document().encoding());
446 // Make sure only first child of HEAD element is META which has charset
447 // declaration in serialized contents.
448 web_frame = GetMainFrame();
449 ASSERT_TRUE(web_frame != NULL);
450 doc = web_frame->document();
451 ASSERT_TRUE(doc.isHTMLDocument());
452 head_ele = doc.head();
453 ASSERT_TRUE(!head_ele.isNull());
454 WebNode meta_node = head_ele.firstChild();
455 ASSERT_TRUE(!meta_node.isNull());
456 // Get meta charset info.
457 std::string charset_info2;
458 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
459 ASSERT_TRUE(!charset_info2.empty());
460 ASSERT_EQ(charset_info2,
461 std::string(web_frame->document().encoding().utf8()));
463 // Make sure no more additional META tags which have charset declaration.
464 for (WebNode child = meta_node.nextSibling(); !child.isNull();
465 child = child.nextSibling()) {
466 std::string charset_info;
467 if (IsMetaElement(child, charset_info))
468 ASSERT_TRUE(charset_info.empty());
472 void SerializeHTMLDOMWithEntitiesInTextOnRenderer() {
473 base::FilePath page_file_path = GetTestFilePath(
474 "dom_serializer", "dom_serializer/htmlentities_in_text.htm");
475 // Get file URL. The URL is dummy URL to identify the following loading
476 // actions. The test content is in constant:original_contents.
477 GURL file_url = net::FilePathToFileURL(page_file_path);
478 ASSERT_TRUE(file_url.SchemeIsFile());
479 // Test contents.
480 static const char* const original_contents =
481 "<html><body>&amp;&lt;&gt;\"\'</body></html>";
482 // Load the test contents.
483 LoadContents(original_contents, file_url, WebString());
485 // Get BODY's text content in DOM.
486 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
487 ASSERT_TRUE(web_frame != NULL);
488 WebDocument doc = web_frame->document();
489 ASSERT_TRUE(doc.isHTMLDocument());
490 WebElement body_ele = doc.body();
491 ASSERT_TRUE(!body_ele.isNull());
492 WebNode text_node = body_ele.firstChild();
493 ASSERT_TRUE(text_node.isTextNode());
494 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) ==
495 "&amp;&lt;&gt;\"\'");
496 // Do serialization.
497 SerializeDomForURL(file_url, false);
498 // Compare the serialized contents with original contents.
499 ASSERT_TRUE(HasSerializedFrame(file_url));
500 const std::string& serialized_contents =
501 GetSerializedContentForFrame(file_url);
502 // Compare the serialized contents with original contents to make sure
503 // they are same.
504 // Because we add MOTW when serializing DOM, so before comparison, we also
505 // need to add MOTW to original_contents.
506 std::string original_str =
507 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
508 original_str += original_contents;
509 // Since WebCore now inserts a new HEAD element if there is no HEAD element
510 // when creating BODY element. (Please see
511 // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and
512 // corresponding META content if we find WebCore-generated HEAD element.
513 if (!doc.head().isNull()) {
514 WebString encoding = web_frame->document().encoding();
515 std::string htmlTag("<html>");
516 std::string::size_type pos = original_str.find(htmlTag);
517 ASSERT_NE(std::string::npos, pos);
518 pos += htmlTag.length();
519 std::string head_part("<head>");
520 head_part +=
521 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
522 head_part += "</head>";
523 original_str.insert(pos, head_part);
525 ASSERT_EQ(original_str, serialized_contents);
528 void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() {
529 base::FilePath page_file_path = GetTestFilePath(
530 "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm");
531 // Get file URL. The URL is dummy URL to identify the following loading
532 // actions. The test content is in constant:original_contents.
533 GURL file_url = net::FilePathToFileURL(page_file_path);
534 ASSERT_TRUE(file_url.SchemeIsFile());
535 // Test contents.
536 static const char* const original_contents =
537 "<html><body title=\"&amp;<>&quot;'\"></body></html>";
538 // Load the test contents.
539 LoadContents(original_contents, file_url, WebString());
540 // Get value of BODY's title attribute in DOM.
541 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
542 ASSERT_TRUE(web_frame != NULL);
543 WebDocument doc = web_frame->document();
544 ASSERT_TRUE(doc.isHTMLDocument());
545 WebElement body_ele = doc.body();
546 ASSERT_TRUE(!body_ele.isNull());
547 WebString value = body_ele.getAttribute("title");
548 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'");
549 // Do serialization.
550 SerializeDomForURL(file_url, false);
551 // Compare the serialized contents with original contents.
552 ASSERT_TRUE(HasSerializedFrame(file_url));
553 const std::string& serialized_contents =
554 GetSerializedContentForFrame(file_url);
555 // Compare the serialized contents with original contents to make sure
556 // they are same.
557 std::string original_str =
558 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
559 original_str += original_contents;
560 if (!doc.isNull()) {
561 WebString encoding = web_frame->document().encoding();
562 std::string htmlTag("<html>");
563 std::string::size_type pos = original_str.find(htmlTag);
564 ASSERT_NE(std::string::npos, pos);
565 pos += htmlTag.length();
566 std::string head_part("<head>");
567 head_part +=
568 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
569 head_part += "</head>";
570 original_str.insert(pos, head_part);
572 ASSERT_EQ(original_str, serialized_contents);
575 void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) {
576 // Get value of BODY's title attribute in DOM.
577 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
578 WebDocument doc = web_frame->document();
579 ASSERT_TRUE(doc.isHTMLDocument());
580 WebElement body_element = doc.body();
581 // Unescaped string for "&percnt;&nsup;&sup1;&apos;".
582 static const wchar_t parsed_value[] = {
583 '%', 0x2285, 0x00b9, '\'', 0
585 WebString value = body_element.getAttribute("title");
586 WebString content = doc.contentAsTextForTesting();
587 ASSERT_TRUE(base::UTF16ToWide(value) == parsed_value);
588 ASSERT_TRUE(base::UTF16ToWide(content) == parsed_value);
590 // Do serialization.
591 SerializeDomForURL(file_url, false);
592 // Check the serialized string.
593 ASSERT_TRUE(HasSerializedFrame(file_url));
594 const std::string& serialized_contents =
595 GetSerializedContentForFrame(file_url);
596 // Confirm that the serialized string has no non-standard HTML entities.
597 ASSERT_EQ(std::string::npos, serialized_contents.find("&percnt;"));
598 ASSERT_EQ(std::string::npos, serialized_contents.find("&nsup;"));
599 ASSERT_EQ(std::string::npos, serialized_contents.find("&sup1;"));
600 ASSERT_EQ(std::string::npos, serialized_contents.find("&apos;"));
603 void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url,
604 const GURL& path_dir_url) {
605 // There are total 2 available base tags in this test file.
606 const int kTotalBaseTagCountInTestFile = 2;
608 // Since for this test, we assume there is no savable sub-resource links for
609 // this test file, also all links are relative URLs in this test file, so we
610 // need to check those relative URLs and make sure document has BASE tag.
611 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
612 ASSERT_TRUE(web_frame != NULL);
613 WebDocument doc = web_frame->document();
614 ASSERT_TRUE(doc.isHTMLDocument());
615 // Go through all descent nodes.
616 WebElementCollection all = doc.all();
617 int original_base_tag_count = 0;
618 for (WebElement element = all.firstItem(); !element.isNull();
619 element = all.nextItem()) {
620 if (element.hasHTMLTagName("base")) {
621 original_base_tag_count++;
622 } else {
623 // Get link.
624 WebString value = GetSubResourceLinkFromElement(element);
625 if (value.isNull() && element.hasHTMLTagName("a")) {
626 value = element.getAttribute("href");
627 if (value.isEmpty())
628 value = WebString();
630 // Each link is relative link.
631 if (!value.isNull()) {
632 GURL link(value.utf8());
633 ASSERT_TRUE(link.scheme().empty());
637 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile);
638 // Make sure in original document, the base URL is not equal with the
639 // |path_dir_url|.
640 GURL original_base_url(doc.baseURL());
641 ASSERT_NE(original_base_url, path_dir_url);
643 // Do serialization.
644 SerializeDomForURL(file_url, false);
646 // Load the serialized contents.
647 ASSERT_TRUE(HasSerializedFrame(file_url));
648 const std::string& serialized_contents =
649 GetSerializedContentForFrame(file_url);
650 LoadContents(serialized_contents, file_url,
651 web_frame->document().encoding());
653 // Make sure all links are absolute URLs and doc there are some number of
654 // BASE tags in serialized HTML data. Each of those BASE tags have same base
655 // URL which is as same as URL of current test file.
656 web_frame = GetMainFrame();
657 ASSERT_TRUE(web_frame != NULL);
658 doc = web_frame->document();
659 ASSERT_TRUE(doc.isHTMLDocument());
660 // Go through all descent nodes.
661 all = doc.all();
662 int new_base_tag_count = 0;
663 for (WebNode node = all.firstItem(); !node.isNull();
664 node = all.nextItem()) {
665 if (!node.isElementNode())
666 continue;
667 WebElement element = node.to<WebElement>();
668 if (element.hasHTMLTagName("base")) {
669 new_base_tag_count++;
670 } else {
671 // Get link.
672 WebString value = GetSubResourceLinkFromElement(element);
673 if (value.isNull() && element.hasHTMLTagName("a")) {
674 value = element.getAttribute("href");
675 if (value.isEmpty())
676 value = WebString();
678 // Each link is absolute link.
679 if (!value.isNull()) {
680 GURL link(std::string(value.utf8()));
681 ASSERT_FALSE(link.scheme().empty());
685 // We should have the same amount of base tags
686 ASSERT_EQ(new_base_tag_count, original_base_tag_count);
687 // Make sure in new document, the base URL is equal with the |path_dir_url|.
688 GURL new_base_url(doc.baseURL());
689 ASSERT_EQ(new_base_url, path_dir_url);
692 void SerializeHTMLDOMWithEmptyHeadOnRenderer() {
693 base::FilePath page_file_path = GetTestFilePath(
694 "dom_serializer", "empty_head.htm");
695 GURL file_url = net::FilePathToFileURL(page_file_path);
696 ASSERT_TRUE(file_url.SchemeIsFile());
698 // Load the test html content.
699 static const char* const empty_head_contents =
700 "<html><head></head><body>hello world</body></html>";
701 LoadContents(empty_head_contents, file_url, WebString());
703 // Make sure the head tag is empty.
704 WebFrame* web_frame = GetMainFrame();
705 ASSERT_TRUE(web_frame != NULL);
706 WebDocument doc = web_frame->document();
707 ASSERT_TRUE(doc.isHTMLDocument());
708 WebElement head_element = doc.head();
709 ASSERT_TRUE(!head_element.isNull());
710 ASSERT_TRUE(!head_element.hasChildNodes());
711 ASSERT_TRUE(head_element.childNodes().length() == 0);
713 // Do serialization.
714 SerializeDomForURL(file_url, false);
715 // Make sure the serialized contents have META ;
716 ASSERT_TRUE(HasSerializedFrame(file_url));
717 const std::string& serialized_contents =
718 GetSerializedContentForFrame(file_url);
720 // Reload serialized contents and make sure there is only one META tag.
721 LoadContents(serialized_contents, file_url,
722 web_frame->document().encoding());
723 web_frame = GetMainFrame();
724 ASSERT_TRUE(web_frame != NULL);
725 doc = web_frame->document();
726 ASSERT_TRUE(doc.isHTMLDocument());
727 head_element = doc.head();
728 ASSERT_TRUE(!head_element.isNull());
729 ASSERT_TRUE(head_element.hasChildNodes());
730 ASSERT_TRUE(head_element.childNodes().length() == 1);
731 WebNode meta_node = head_element.firstChild();
732 ASSERT_TRUE(!meta_node.isNull());
733 // Get meta charset info.
734 std::string charset_info;
735 ASSERT_TRUE(IsMetaElement(meta_node, charset_info));
736 ASSERT_TRUE(!charset_info.empty());
737 ASSERT_EQ(charset_info,
738 std::string(web_frame->document().encoding().utf8()));
740 // Check the body's first node is text node and its contents are
741 // "hello world"
742 WebElement body_element = doc.body();
743 ASSERT_TRUE(!body_element.isNull());
744 WebNode text_node = body_element.firstChild();
745 ASSERT_TRUE(text_node.isTextNode());
746 WebString text_node_contents = text_node.nodeValue();
747 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world");
750 void SerializeDocumentWithDownloadedIFrameOnRenderer(const GURL& file_url) {
751 // Do a recursive serialization. We pass if we don't crash.
752 SerializeDomForURL(file_url, true);
755 void SubResourceForElementsInNonHTMLNamespaceOnRenderer(
756 const GURL& file_url) {
757 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
758 ASSERT_TRUE(web_frame != NULL);
759 WebDocument doc = web_frame->document();
760 WebNode lastNodeInBody = doc.body().lastChild();
761 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType());
762 WebString uri = GetSubResourceLinkFromElement(
763 lastNodeInBody.to<WebElement>());
764 EXPECT_TRUE(uri.isNull());
767 private:
768 // Map frame_url to corresponding serialized_content.
769 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap;
770 SerializedFrameContentMap serialized_frame_map_;
771 // Map frame_url to corresponding status of serialization finish.
772 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap;
773 SerializationFinishStatusMap serialization_finish_status_;
774 // Flag indicates whether the process of serializing DOM is finished or not.
775 bool serialized_;
776 // The local_directory_name_ is dummy relative path of directory which
777 // contain all saved auxiliary files included all sub frames and resources.
778 const base::FilePath local_directory_name_;
781 // If original contents have document type, the serialized contents also have
782 // document type.
783 // Disabled by ellyjones@ on 2015-05-18, see https://crbug.com/488495.
784 #if defined(OS_MACOSX)
785 #define MAYBE_SerializeHTMLDOMWithDocType DISABLED_SerializeHTMLDOMWithDocType
786 #else
787 #define MAYBE_SerializeHTMLDOMWithDocType SerializeHTMLDOMWithDocType
788 #endif
790 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
791 MAYBE_SerializeHTMLDOMWithDocType) {
792 base::FilePath page_file_path =
793 GetTestFilePath("dom_serializer", "youtube_1.htm");
794 GURL file_url = net::FilePathToFileURL(page_file_path);
795 ASSERT_TRUE(file_url.SchemeIsFile());
796 // Load the test file.
797 NavigateToURL(shell(), file_url);
799 PostTaskToInProcessRendererAndWait(
800 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer,
801 base::Unretained(this), file_url));
804 // If original contents do not have document type, the serialized contents
805 // also do not have document type.
806 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) {
807 base::FilePath page_file_path =
808 GetTestFilePath("dom_serializer", "youtube_2.htm");
809 GURL file_url = net::FilePathToFileURL(page_file_path);
810 ASSERT_TRUE(file_url.SchemeIsFile());
811 // Load the test file.
812 NavigateToURL(shell(), file_url);
814 PostTaskToInProcessRendererAndWait(
815 base::Bind(
816 &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer,
817 base::Unretained(this), file_url));
820 // Serialize XML document which has all 5 built-in entities. After
821 // finishing serialization, the serialized contents should be same
822 // with original XML document.
823 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) {
824 base::FilePath page_file_path =
825 GetTestFilePath("dom_serializer", "note.html");
826 base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml");
827 // Read original contents for later comparison.
828 std::string original_contents;
829 ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents));
830 // Get file URL.
831 GURL file_url = net::FilePathToFileURL(page_file_path);
832 GURL xml_file_url = net::FilePathToFileURL(xml_file_path);
833 ASSERT_TRUE(file_url.SchemeIsFile());
834 // Load the test file.
835 NavigateToURL(shell(), file_url);
837 PostTaskToInProcessRendererAndWait(
838 base::Bind(
839 &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer,
840 base::Unretained(this), xml_file_url, original_contents));
843 // When serializing DOM, we add MOTW declaration before html tag.
844 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) {
845 base::FilePath page_file_path =
846 GetTestFilePath("dom_serializer", "youtube_2.htm");
847 // Read original contents for later comparison .
848 std::string original_contents;
849 ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents));
850 // Get file URL.
851 GURL file_url = net::FilePathToFileURL(page_file_path);
852 ASSERT_TRUE(file_url.SchemeIsFile());
854 // Load the test file.
855 NavigateToURL(shell(), file_url);
857 PostTaskToInProcessRendererAndWait(
858 base::Bind(
859 &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer,
860 base::Unretained(this), file_url, original_contents));
863 // When serializing DOM, we will add the META which have correct charset
864 // declaration as first child of HEAD element for resolving WebKit bug:
865 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
866 // does not have META charset declaration.
867 // Disabled by battre@ on 2015-05-21, see https://crbug.com/488495.
868 #if defined(OS_MACOSX)
869 #define MAYBE_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc \
870 DISABLED_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc
871 #else
872 #define MAYBE_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc \
873 SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc
874 #endif
875 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
876 MAYBE_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
877 base::FilePath page_file_path =
878 GetTestFilePath("dom_serializer", "youtube_1.htm");
879 // Get file URL.
880 GURL file_url = net::FilePathToFileURL(page_file_path);
881 ASSERT_TRUE(file_url.SchemeIsFile());
882 // Load the test file.
883 NavigateToURL(shell(), file_url);
885 PostTaskToInProcessRendererAndWait(
886 base::Bind(
887 &DomSerializerTests::
888 SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer,
889 base::Unretained(this), file_url));
892 // When serializing DOM, if the original document has multiple META charset
893 // declaration, we will add the META which have correct charset declaration
894 // as first child of HEAD element and remove all original META charset
895 // declarations.
896 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
897 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) {
898 base::FilePath page_file_path =
899 GetTestFilePath("dom_serializer", "youtube_2.htm");
900 // Get file URL.
901 GURL file_url = net::FilePathToFileURL(page_file_path);
902 ASSERT_TRUE(file_url.SchemeIsFile());
903 // Load the test file.
904 NavigateToURL(shell(), file_url);
906 PostTaskToInProcessRendererAndWait(
907 base::Bind(
908 &DomSerializerTests::
909 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer,
910 base::Unretained(this), file_url));
913 // Test situation of html entities in text when serializing HTML DOM.
914 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) {
915 // Need to spin up the renderer and also navigate to a file url so that the
916 // renderer code doesn't attempt a fork when it sees a load to file scheme
917 // from non-file scheme.
918 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
920 PostTaskToInProcessRendererAndWait(
921 base::Bind(
922 &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer,
923 base::Unretained(this)));
926 // Test situation of html entities in attribute value when serializing
927 // HTML DOM.
928 // This test started to fail at WebKit r65388. See http://crbug.com/52279.
929 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
930 SerializeHTMLDOMWithEntitiesInAttributeValue) {
931 // Need to spin up the renderer and also navigate to a file url so that the
932 // renderer code doesn't attempt a fork when it sees a load to file scheme
933 // from non-file scheme.
934 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
936 PostTaskToInProcessRendererAndWait(
937 base::Bind(
938 &DomSerializerTests::
939 SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer,
940 base::Unretained(this)));
943 // Test situation of non-standard HTML entities when serializing HTML DOM.
944 // This test started to fail at WebKit r65351. See http://crbug.com/52279.
945 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
946 SerializeHTMLDOMWithNonStandardEntities) {
947 // Make a test file URL and load it.
948 base::FilePath page_file_path = GetTestFilePath(
949 "dom_serializer", "nonstandard_htmlentities.htm");
950 GURL file_url = net::FilePathToFileURL(page_file_path);
951 NavigateToURL(shell(), file_url);
953 PostTaskToInProcessRendererAndWait(
954 base::Bind(
955 &DomSerializerTests::
956 SerializeHTMLDOMWithNonStandardEntitiesOnRenderer,
957 base::Unretained(this), file_url));
960 // Test situation of BASE tag in original document when serializing HTML DOM.
961 // When serializing, we should comment the BASE tag, append a new BASE tag.
962 // rewrite all the savable URLs to relative local path, and change other URLs
963 // to absolute URLs.
964 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) {
965 base::FilePath page_file_path = GetTestFilePath(
966 "dom_serializer", "html_doc_has_base_tag.htm");
968 // Get page dir URL which is base URL of this file.
969 base::FilePath dir_name = page_file_path.DirName();
970 dir_name = dir_name.Append(
971 base::FilePath::StringType(base::FilePath::kSeparators[0], 1));
972 GURL path_dir_url = net::FilePathToFileURL(dir_name);
974 // Get file URL.
975 GURL file_url = net::FilePathToFileURL(page_file_path);
976 ASSERT_TRUE(file_url.SchemeIsFile());
977 // Load the test file.
978 NavigateToURL(shell(), file_url);
980 PostTaskToInProcessRendererAndWait(
981 base::Bind(
982 &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer,
983 base::Unretained(this), file_url, path_dir_url));
986 // Serializing page which has an empty HEAD tag.
987 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) {
988 // Need to spin up the renderer and also navigate to a file url so that the
989 // renderer code doesn't attempt a fork when it sees a load to file scheme
990 // from non-file scheme.
991 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
993 PostTaskToInProcessRendererAndWait(
994 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer,
995 base::Unretained(this)));
998 // Test that we don't crash when the page contains an iframe that
999 // was handled as a download (http://crbug.com/42212).
1000 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
1001 SerializeDocumentWithDownloadedIFrame) {
1002 base::FilePath page_file_path = GetTestFilePath(
1003 "dom_serializer", "iframe-src-is-exe.htm");
1004 GURL file_url = net::FilePathToFileURL(page_file_path);
1005 ASSERT_TRUE(file_url.SchemeIsFile());
1006 // Load the test file.
1007 NavigateToURL(shell(), file_url);
1009 PostTaskToInProcessRendererAndWait(
1010 base::Bind(
1011 &DomSerializerTests::
1012 SerializeDocumentWithDownloadedIFrameOnRenderer,
1013 base::Unretained(this), file_url));
1016 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
1017 SubResourceForElementsInNonHTMLNamespace) {
1018 base::FilePath page_file_path = GetTestFilePath(
1019 "dom_serializer", "non_html_namespace.htm");
1020 GURL file_url = net::FilePathToFileURL(page_file_path);
1021 NavigateToURL(shell(), file_url);
1023 PostTaskToInProcessRendererAndWait(
1024 base::Bind(
1025 &DomSerializerTests::
1026 SubResourceForElementsInNonHTMLNamespaceOnRenderer,
1027 base::Unretained(this), file_url));
1030 } // namespace content