1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 #include "base/command_line.h"
7 #include "base/compiler_specific.h"
8 #include "base/containers/hash_tables.h"
9 #include "base/files/file_path.h"
10 #include "base/files/file_util.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "content/public/browser/render_view_host.h"
14 #include "content/public/browser/web_contents.h"
15 #include "content/public/common/content_switches.h"
16 #include "content/public/renderer/render_view.h"
17 #include "content/public/renderer/render_view_observer.h"
18 #include "content/public/test/content_browser_test.h"
19 #include "content/public/test/content_browser_test_utils.h"
20 #include "content/public/test/test_utils.h"
21 #include "content/renderer/savable_resources.h"
22 #include "content/shell/browser/shell.h"
23 #include "net/base/filename_util.h"
24 #include "net/url_request/url_request_context.h"
25 #include "third_party/WebKit/public/platform/WebCString.h"
26 #include "third_party/WebKit/public/platform/WebData.h"
27 #include "third_party/WebKit/public/platform/WebString.h"
28 #include "third_party/WebKit/public/platform/WebURL.h"
29 #include "third_party/WebKit/public/platform/WebVector.h"
30 #include "third_party/WebKit/public/web/WebDocument.h"
31 #include "third_party/WebKit/public/web/WebDocumentType.h"
32 #include "third_party/WebKit/public/web/WebElement.h"
33 #include "third_party/WebKit/public/web/WebElementCollection.h"
34 #include "third_party/WebKit/public/web/WebLocalFrame.h"
35 #include "third_party/WebKit/public/web/WebNode.h"
36 #include "third_party/WebKit/public/web/WebNodeList.h"
37 #include "third_party/WebKit/public/web/WebPageSerializer.h"
38 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
39 #include "third_party/WebKit/public/web/WebView.h"
41 using blink::WebCString
;
43 using blink::WebDocument
;
44 using blink::WebElement
;
45 using blink::WebElementCollection
;
46 using blink::WebFrame
;
47 using blink::WebLocalFrame
;
49 using blink::WebNodeList
;
50 using blink::WebPageSerializer
;
51 using blink::WebPageSerializerClient
;
52 using blink::WebString
;
55 using blink::WebVector
;
59 // Iterate recursively over sub-frames to find one with with a given url.
60 WebFrame
* FindSubFrameByURL(WebView
* web_view
, const GURL
& url
) {
61 if (!web_view
->mainFrame())
64 std::vector
<WebFrame
*> stack
;
65 stack
.push_back(web_view
->mainFrame());
67 while (!stack
.empty()) {
68 WebFrame
* current_frame
= stack
.back();
70 if (GURL(current_frame
->document().url()) == url
)
72 WebElementCollection all
= current_frame
->document().all();
73 for (WebElement element
= all
.firstItem();
74 !element
.isNull(); element
= all
.nextItem()) {
75 // Check frame tag and iframe tag
76 if (!element
.hasHTMLTagName("frame") && !element
.hasHTMLTagName("iframe"))
78 WebFrame
* sub_frame
= WebLocalFrame::fromFrameOwnerElement(element
);
80 stack
.push_back(sub_frame
);
86 bool HasDocType(const WebDocument
& doc
) {
87 return !doc
.doctype().isNull();
90 // Helper function for checking whether input node is META tag. Return true
91 // means it is META element, otherwise return false. The parameter charset_info
92 // return actual charset info if the META tag has charset declaration.
93 bool IsMetaElement(const WebNode
& node
, std::string
& charset_info
) {
94 if (!node
.isElementNode())
96 const WebElement meta
= node
.toConst
<WebElement
>();
97 if (!meta
.hasHTMLTagName("meta"))
99 charset_info
.erase(0, charset_info
.length());
100 // Check the META charset declaration.
101 WebString httpEquiv
= meta
.getAttribute("http-equiv");
102 if (base::LowerCaseEqualsASCII(base::StringPiece16(httpEquiv
),
104 std::string content
= meta
.getAttribute("content").utf8();
105 int pos
= content
.find("charset", 0);
107 // Add a dummy charset declaration to charset_info, which indicates this
108 // META tag has charset declaration although we do not get correct value
110 charset_info
.append("has-charset-declaration");
111 int remaining_length
= content
.length() - pos
- 7;
112 if (!remaining_length
)
114 int start_pos
= pos
+ 7;
116 while (remaining_length
--)
117 if (content
[start_pos
++] == L
'=')
119 // Skip beginning space.
120 while (remaining_length
) {
121 if (content
[start_pos
] > 0x0020)
126 if (!remaining_length
)
128 int end_pos
= start_pos
;
129 // Now we find out the start point of charset info. Search the end point.
130 while (remaining_length
--) {
131 if (content
[end_pos
] <= 0x0020 || content
[end_pos
] == L
';')
135 // Get actual charset info.
136 charset_info
= content
.substr(start_pos
, end_pos
- start_pos
);
143 class LoadObserver
: public RenderViewObserver
{
145 LoadObserver(RenderView
* render_view
, const base::Closure
& quit_closure
)
146 : RenderViewObserver(render_view
),
147 quit_closure_(quit_closure
) {}
149 void DidFinishLoad(blink::WebLocalFrame
* frame
) override
{
150 if (frame
== render_view()->GetWebView()->mainFrame())
155 base::Closure quit_closure_
;
158 class DomSerializerTests
: public ContentBrowserTest
,
159 public WebPageSerializerClient
{
162 : serialized_(false),
163 local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) {}
165 void SetUpCommandLine(base::CommandLine
* command_line
) override
{
166 command_line
->AppendSwitch(switches::kSingleProcess
);
168 // Don't want to try to create a GPU process.
169 command_line
->AppendSwitch(switches::kDisableGpu
);
173 void SetUpOnMainThread() override
{
174 render_view_routing_id_
=
175 shell()->web_contents()->GetRenderViewHost()->GetRoutingID();
178 // DomSerializerDelegate.
179 virtual void didSerializeDataForFrame(const WebURL
& frame_web_url
,
180 const WebCString
& data
,
181 PageSerializationStatus status
) {
183 GURL
frame_url(frame_web_url
);
184 // If the all frames are finished saving, check all finish status
185 if (status
== WebPageSerializerClient::AllFramesAreFinished
) {
186 SerializationFinishStatusMap::iterator it
=
187 serialization_finish_status_
.begin();
188 for (; it
!= serialization_finish_status_
.end(); ++it
)
189 ASSERT_TRUE(it
->second
);
194 // Check finish status of current frame.
195 SerializationFinishStatusMap::iterator it
=
196 serialization_finish_status_
.find(frame_url
.spec());
197 // New frame, set initial status as false.
198 if (it
== serialization_finish_status_
.end())
199 serialization_finish_status_
[frame_url
.spec()] = false;
201 it
= serialization_finish_status_
.find(frame_url
.spec());
202 ASSERT_TRUE(it
!= serialization_finish_status_
.end());
203 // In process frame, finish status should be false.
204 ASSERT_FALSE(it
->second
);
206 // Add data to corresponding frame's content.
207 serialized_frame_map_
[frame_url
.spec()] += data
.data();
209 // Current frame is completed saving, change the finish status.
210 if (status
== WebPageSerializerClient::CurrentFrameIsFinished
)
214 bool HasSerializedFrame(const GURL
& frame_url
) {
215 return serialized_frame_map_
.find(frame_url
.spec()) !=
216 serialized_frame_map_
.end();
219 const std::string
& GetSerializedContentForFrame(
220 const GURL
& frame_url
) {
221 return serialized_frame_map_
[frame_url
.spec()];
224 RenderView
* GetRenderView() {
225 return RenderView::FromRoutingID(render_view_routing_id_
);
228 WebView
* GetWebView() {
229 return GetRenderView()->GetWebView();
232 WebFrame
* GetMainFrame() {
233 return GetWebView()->mainFrame();
236 // Load web page according to input content and relative URLs within
238 void LoadContents(const std::string
& contents
,
239 const GURL
& base_url
,
240 const WebString encoding_info
) {
241 scoped_refptr
<MessageLoopRunner
> runner
= new MessageLoopRunner
;
242 LoadObserver
observer(GetRenderView(), runner
->QuitClosure());
244 // If input encoding is empty, use UTF-8 as default encoding.
245 if (encoding_info
.isEmpty()) {
246 GetMainFrame()->loadHTMLString(contents
, base_url
);
248 WebData
data(contents
.data(), contents
.length());
250 // Do not use WebFrame.LoadHTMLString because it assumes that input
251 // html contents use UTF-8 encoding.
252 // TODO(darin): This should use WebFrame::loadData.
253 WebFrame
* web_frame
= GetMainFrame();
255 ASSERT_TRUE(web_frame
!= NULL
);
257 web_frame
->loadData(data
, "text/html", encoding_info
, base_url
);
263 // Serialize page DOM according to specific page URL. The parameter
264 // recursive_serialization indicates whether we will serialize all
266 void SerializeDomForURL(const GURL
& page_url
,
267 bool recursive_serialization
) {
268 // Find corresponding WebFrame according to page_url.
269 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), page_url
);
270 ASSERT_TRUE(web_frame
!= NULL
);
271 WebVector
<WebURL
> links
;
272 links
.assign(&page_url
, 1);
273 WebString file_path
=
274 base::FilePath(FILE_PATH_LITERAL("c:\\dummy.htm")).AsUTF16Unsafe();
275 WebVector
<WebString
> local_paths
;
276 local_paths
.assign(&file_path
, 1);
277 // Start serializing DOM.
278 bool result
= WebPageSerializer::serialize(web_frame
->toWebLocalFrame(),
279 recursive_serialization
,
280 static_cast<WebPageSerializerClient
*>(this),
283 local_directory_name_
.AsUTF16Unsafe());
285 ASSERT_TRUE(serialized_
);
288 void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL
& file_url
) {
289 // Make sure original contents have document type.
290 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
291 ASSERT_TRUE(web_frame
!= NULL
);
292 WebDocument doc
= web_frame
->document();
293 ASSERT_TRUE(HasDocType(doc
));
295 SerializeDomForURL(file_url
, false);
296 // Load the serialized contents.
297 ASSERT_TRUE(HasSerializedFrame(file_url
));
298 const std::string
& serialized_contents
=
299 GetSerializedContentForFrame(file_url
);
300 LoadContents(serialized_contents
, file_url
,
301 web_frame
->document().encoding());
302 // Make sure serialized contents still have document type.
303 web_frame
= GetMainFrame();
304 doc
= web_frame
->document();
305 ASSERT_TRUE(HasDocType(doc
));
308 void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL
& file_url
) {
309 // Make sure original contents do not have document type.
310 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
311 ASSERT_TRUE(web_frame
!= NULL
);
312 WebDocument doc
= web_frame
->document();
313 ASSERT_TRUE(!HasDocType(doc
));
315 SerializeDomForURL(file_url
, false);
316 // Load the serialized contents.
317 ASSERT_TRUE(HasSerializedFrame(file_url
));
318 const std::string
& serialized_contents
=
319 GetSerializedContentForFrame(file_url
);
320 LoadContents(serialized_contents
, file_url
,
321 web_frame
->document().encoding());
322 // Make sure serialized contents do not have document type.
323 web_frame
= GetMainFrame();
324 doc
= web_frame
->document();
325 ASSERT_TRUE(!HasDocType(doc
));
328 void SerializeXMLDocWithBuiltInEntitiesOnRenderer(
329 const GURL
& xml_file_url
, const std::string
& original_contents
) {
331 SerializeDomForURL(xml_file_url
, false);
332 // Compare the serialized contents with original contents.
333 ASSERT_TRUE(HasSerializedFrame(xml_file_url
));
334 const std::string
& serialized_contents
=
335 GetSerializedContentForFrame(xml_file_url
);
336 ASSERT_EQ(original_contents
, serialized_contents
);
339 void SerializeHTMLDOMWithAddingMOTWOnRenderer(
340 const GURL
& file_url
, const std::string
& original_contents
) {
341 // Make sure original contents does not have MOTW;
342 std::string motw_declaration
=
343 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url
).utf8();
344 ASSERT_FALSE(motw_declaration
.empty());
345 // The encoding of original contents is ISO-8859-1, so we convert the MOTW
346 // declaration to ASCII and search whether original contents has it or not.
347 ASSERT_TRUE(std::string::npos
== original_contents
.find(motw_declaration
));
350 SerializeDomForURL(file_url
, false);
351 // Make sure the serialized contents have MOTW ;
352 ASSERT_TRUE(HasSerializedFrame(file_url
));
353 const std::string
& serialized_contents
=
354 GetSerializedContentForFrame(file_url
);
355 ASSERT_FALSE(std::string::npos
==
356 serialized_contents
.find(motw_declaration
));
359 void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer(
360 const GURL
& file_url
) {
361 // Make sure there is no META charset declaration in original document.
362 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
363 ASSERT_TRUE(web_frame
!= NULL
);
364 WebDocument doc
= web_frame
->document();
365 ASSERT_TRUE(doc
.isHTMLDocument());
366 WebElement head_element
= doc
.head();
367 ASSERT_TRUE(!head_element
.isNull());
368 // Go through all children of HEAD element.
369 for (WebNode child
= head_element
.firstChild(); !child
.isNull();
370 child
= child
.nextSibling()) {
371 std::string charset_info
;
372 if (IsMetaElement(child
, charset_info
))
373 ASSERT_TRUE(charset_info
.empty());
376 SerializeDomForURL(file_url
, false);
378 // Load the serialized contents.
379 ASSERT_TRUE(HasSerializedFrame(file_url
));
380 const std::string
& serialized_contents
=
381 GetSerializedContentForFrame(file_url
);
382 LoadContents(serialized_contents
, file_url
,
383 web_frame
->document().encoding());
384 // Make sure the first child of HEAD element is META which has charset
385 // declaration in serialized contents.
386 web_frame
= GetMainFrame();
387 ASSERT_TRUE(web_frame
!= NULL
);
388 doc
= web_frame
->document();
389 ASSERT_TRUE(doc
.isHTMLDocument());
390 head_element
= doc
.head();
391 ASSERT_TRUE(!head_element
.isNull());
392 WebNode meta_node
= head_element
.firstChild();
393 ASSERT_TRUE(!meta_node
.isNull());
394 // Get meta charset info.
395 std::string charset_info2
;
396 ASSERT_TRUE(IsMetaElement(meta_node
, charset_info2
));
397 ASSERT_TRUE(!charset_info2
.empty());
398 ASSERT_EQ(charset_info2
,
399 std::string(web_frame
->document().encoding().utf8()));
401 // Make sure no more additional META tags which have charset declaration.
402 for (WebNode child
= meta_node
.nextSibling(); !child
.isNull();
403 child
= child
.nextSibling()) {
404 std::string charset_info
;
405 if (IsMetaElement(child
, charset_info
))
406 ASSERT_TRUE(charset_info
.empty());
410 void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer(
411 const GURL
& file_url
) {
412 // Make sure there are multiple META charset declarations in original
414 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
415 ASSERT_TRUE(web_frame
!= NULL
);
416 WebDocument doc
= web_frame
->document();
417 ASSERT_TRUE(doc
.isHTMLDocument());
418 WebElement head_ele
= doc
.head();
419 ASSERT_TRUE(!head_ele
.isNull());
420 // Go through all children of HEAD element.
421 int charset_declaration_count
= 0;
422 for (WebNode child
= head_ele
.firstChild(); !child
.isNull();
423 child
= child
.nextSibling()) {
424 std::string charset_info
;
425 if (IsMetaElement(child
, charset_info
) && !charset_info
.empty())
426 charset_declaration_count
++;
428 // The original doc has more than META tags which have charset declaration.
429 ASSERT_TRUE(charset_declaration_count
> 1);
432 SerializeDomForURL(file_url
, false);
434 // Load the serialized contents.
435 ASSERT_TRUE(HasSerializedFrame(file_url
));
436 const std::string
& serialized_contents
=
437 GetSerializedContentForFrame(file_url
);
438 LoadContents(serialized_contents
, file_url
,
439 web_frame
->document().encoding());
440 // Make sure only first child of HEAD element is META which has charset
441 // declaration in serialized contents.
442 web_frame
= GetMainFrame();
443 ASSERT_TRUE(web_frame
!= NULL
);
444 doc
= web_frame
->document();
445 ASSERT_TRUE(doc
.isHTMLDocument());
446 head_ele
= doc
.head();
447 ASSERT_TRUE(!head_ele
.isNull());
448 WebNode meta_node
= head_ele
.firstChild();
449 ASSERT_TRUE(!meta_node
.isNull());
450 // Get meta charset info.
451 std::string charset_info2
;
452 ASSERT_TRUE(IsMetaElement(meta_node
, charset_info2
));
453 ASSERT_TRUE(!charset_info2
.empty());
454 ASSERT_EQ(charset_info2
,
455 std::string(web_frame
->document().encoding().utf8()));
457 // Make sure no more additional META tags which have charset declaration.
458 for (WebNode child
= meta_node
.nextSibling(); !child
.isNull();
459 child
= child
.nextSibling()) {
460 std::string charset_info
;
461 if (IsMetaElement(child
, charset_info
))
462 ASSERT_TRUE(charset_info
.empty());
466 void SerializeHTMLDOMWithEntitiesInTextOnRenderer() {
467 base::FilePath page_file_path
= GetTestFilePath(
468 "dom_serializer", "dom_serializer/htmlentities_in_text.htm");
469 // Get file URL. The URL is dummy URL to identify the following loading
470 // actions. The test content is in constant:original_contents.
471 GURL file_url
= net::FilePathToFileURL(page_file_path
);
472 ASSERT_TRUE(file_url
.SchemeIsFile());
474 static const char* const original_contents
=
475 "<html><body>&<>\"\'</body></html>";
476 // Load the test contents.
477 LoadContents(original_contents
, file_url
, WebString());
479 // Get BODY's text content in DOM.
480 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
481 ASSERT_TRUE(web_frame
!= NULL
);
482 WebDocument doc
= web_frame
->document();
483 ASSERT_TRUE(doc
.isHTMLDocument());
484 WebElement body_ele
= doc
.body();
485 ASSERT_TRUE(!body_ele
.isNull());
486 WebNode text_node
= body_ele
.firstChild();
487 ASSERT_TRUE(text_node
.isTextNode());
488 ASSERT_TRUE(std::string(text_node
.nodeValue().utf8()) == "&<>\"\'");
490 SerializeDomForURL(file_url
, false);
491 // Compare the serialized contents with original contents.
492 ASSERT_TRUE(HasSerializedFrame(file_url
));
493 const std::string
& serialized_contents
=
494 GetSerializedContentForFrame(file_url
);
495 // Compare the serialized contents with original contents to make sure
497 // Because we add MOTW when serializing DOM, so before comparison, we also
498 // need to add MOTW to original_contents.
499 std::string original_str
=
500 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url
).utf8();
501 original_str
+= original_contents
;
502 // Since WebCore now inserts a new HEAD element if there is no HEAD element
503 // when creating BODY element. (Please see
504 // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and
505 // corresponding META content if we find WebCore-generated HEAD element.
506 if (!doc
.head().isNull()) {
507 WebString encoding
= web_frame
->document().encoding();
508 std::string
htmlTag("<html>");
509 std::string::size_type pos
= original_str
.find(htmlTag
);
510 ASSERT_NE(std::string::npos
, pos
);
511 pos
+= htmlTag
.length();
512 std::string
head_part("<head>");
514 WebPageSerializer::generateMetaCharsetDeclaration(encoding
).utf8();
515 head_part
+= "</head>";
516 original_str
.insert(pos
, head_part
);
518 ASSERT_EQ(original_str
, serialized_contents
);
521 void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() {
522 base::FilePath page_file_path
= GetTestFilePath(
523 "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm");
524 // Get file URL. The URL is dummy URL to identify the following loading
525 // actions. The test content is in constant:original_contents.
526 GURL file_url
= net::FilePathToFileURL(page_file_path
);
527 ASSERT_TRUE(file_url
.SchemeIsFile());
529 static const char* const original_contents
=
530 "<html><body title=\"&<>"'\"></body></html>";
531 // Load the test contents.
532 LoadContents(original_contents
, file_url
, WebString());
533 // Get value of BODY's title attribute in DOM.
534 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
535 ASSERT_TRUE(web_frame
!= NULL
);
536 WebDocument doc
= web_frame
->document();
537 ASSERT_TRUE(doc
.isHTMLDocument());
538 WebElement body_ele
= doc
.body();
539 ASSERT_TRUE(!body_ele
.isNull());
540 WebString value
= body_ele
.getAttribute("title");
541 ASSERT_TRUE(std::string(value
.utf8()) == "&<>\"\'");
543 SerializeDomForURL(file_url
, false);
544 // Compare the serialized contents with original contents.
545 ASSERT_TRUE(HasSerializedFrame(file_url
));
546 const std::string
& serialized_contents
=
547 GetSerializedContentForFrame(file_url
);
548 // Compare the serialized contents with original contents to make sure
550 std::string original_str
=
551 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url
).utf8();
552 original_str
+= original_contents
;
554 WebString encoding
= web_frame
->document().encoding();
555 std::string
htmlTag("<html>");
556 std::string::size_type pos
= original_str
.find(htmlTag
);
557 ASSERT_NE(std::string::npos
, pos
);
558 pos
+= htmlTag
.length();
559 std::string
head_part("<head>");
561 WebPageSerializer::generateMetaCharsetDeclaration(encoding
).utf8();
562 head_part
+= "</head>";
563 original_str
.insert(pos
, head_part
);
565 ASSERT_EQ(original_str
, serialized_contents
);
568 void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL
& file_url
) {
569 // Get value of BODY's title attribute in DOM.
570 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
571 WebDocument doc
= web_frame
->document();
572 ASSERT_TRUE(doc
.isHTMLDocument());
573 WebElement body_element
= doc
.body();
574 // Unescaped string for "%⊅¹'".
575 static const wchar_t parsed_value
[] = {
576 '%', 0x2285, 0x00b9, '\'', 0
578 WebString value
= body_element
.getAttribute("title");
579 WebString content
= doc
.contentAsTextForTesting();
580 ASSERT_TRUE(base::UTF16ToWide(value
) == parsed_value
);
581 ASSERT_TRUE(base::UTF16ToWide(content
) == parsed_value
);
584 SerializeDomForURL(file_url
, false);
585 // Check the serialized string.
586 ASSERT_TRUE(HasSerializedFrame(file_url
));
587 const std::string
& serialized_contents
=
588 GetSerializedContentForFrame(file_url
);
589 // Confirm that the serialized string has no non-standard HTML entities.
590 ASSERT_EQ(std::string::npos
, serialized_contents
.find("%"));
591 ASSERT_EQ(std::string::npos
, serialized_contents
.find("⊅"));
592 ASSERT_EQ(std::string::npos
, serialized_contents
.find("¹"));
593 ASSERT_EQ(std::string::npos
, serialized_contents
.find("'"));
596 void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL
& file_url
,
597 const GURL
& path_dir_url
) {
598 // There are total 2 available base tags in this test file.
599 const int kTotalBaseTagCountInTestFile
= 2;
601 // Since for this test, we assume there is no savable sub-resource links for
602 // this test file, also all links are relative URLs in this test file, so we
603 // need to check those relative URLs and make sure document has BASE tag.
604 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
605 ASSERT_TRUE(web_frame
!= NULL
);
606 WebDocument doc
= web_frame
->document();
607 ASSERT_TRUE(doc
.isHTMLDocument());
608 // Go through all descent nodes.
609 WebElementCollection all
= doc
.all();
610 int original_base_tag_count
= 0;
611 for (WebElement element
= all
.firstItem(); !element
.isNull();
612 element
= all
.nextItem()) {
613 if (element
.hasHTMLTagName("base")) {
614 original_base_tag_count
++;
617 WebString value
= GetSubResourceLinkFromElement(element
);
618 if (value
.isNull() && element
.hasHTMLTagName("a")) {
619 value
= element
.getAttribute("href");
623 // Each link is relative link.
624 if (!value
.isNull()) {
625 GURL
link(value
.utf8());
626 ASSERT_TRUE(link
.scheme().empty());
630 ASSERT_EQ(original_base_tag_count
, kTotalBaseTagCountInTestFile
);
631 // Make sure in original document, the base URL is not equal with the
633 GURL
original_base_url(doc
.baseURL());
634 ASSERT_NE(original_base_url
, path_dir_url
);
637 SerializeDomForURL(file_url
, false);
639 // Load the serialized contents.
640 ASSERT_TRUE(HasSerializedFrame(file_url
));
641 const std::string
& serialized_contents
=
642 GetSerializedContentForFrame(file_url
);
643 LoadContents(serialized_contents
, file_url
,
644 web_frame
->document().encoding());
646 // Make sure all links are absolute URLs and doc there are some number of
647 // BASE tags in serialized HTML data. Each of those BASE tags have same base
648 // URL which is as same as URL of current test file.
649 web_frame
= GetMainFrame();
650 ASSERT_TRUE(web_frame
!= NULL
);
651 doc
= web_frame
->document();
652 ASSERT_TRUE(doc
.isHTMLDocument());
653 // Go through all descent nodes.
655 int new_base_tag_count
= 0;
656 for (WebNode node
= all
.firstItem(); !node
.isNull();
657 node
= all
.nextItem()) {
658 if (!node
.isElementNode())
660 WebElement element
= node
.to
<WebElement
>();
661 if (element
.hasHTMLTagName("base")) {
662 new_base_tag_count
++;
665 WebString value
= GetSubResourceLinkFromElement(element
);
666 if (value
.isNull() && element
.hasHTMLTagName("a")) {
667 value
= element
.getAttribute("href");
671 // Each link is absolute link.
672 if (!value
.isNull()) {
673 GURL
link(std::string(value
.utf8()));
674 ASSERT_FALSE(link
.scheme().empty());
678 // We have one more added BASE tag which is generated by JavaScript.
679 ASSERT_EQ(new_base_tag_count
, original_base_tag_count
+ 1);
680 // Make sure in new document, the base URL is equal with the |path_dir_url|.
681 GURL
new_base_url(doc
.baseURL());
682 ASSERT_EQ(new_base_url
, path_dir_url
);
685 void SerializeHTMLDOMWithEmptyHeadOnRenderer() {
686 base::FilePath page_file_path
= GetTestFilePath(
687 "dom_serializer", "empty_head.htm");
688 GURL file_url
= net::FilePathToFileURL(page_file_path
);
689 ASSERT_TRUE(file_url
.SchemeIsFile());
691 // Load the test html content.
692 static const char* const empty_head_contents
=
693 "<html><head></head><body>hello world</body></html>";
694 LoadContents(empty_head_contents
, file_url
, WebString());
696 // Make sure the head tag is empty.
697 WebFrame
* web_frame
= GetMainFrame();
698 ASSERT_TRUE(web_frame
!= NULL
);
699 WebDocument doc
= web_frame
->document();
700 ASSERT_TRUE(doc
.isHTMLDocument());
701 WebElement head_element
= doc
.head();
702 ASSERT_TRUE(!head_element
.isNull());
703 ASSERT_TRUE(!head_element
.hasChildNodes());
704 ASSERT_TRUE(head_element
.childNodes().length() == 0);
707 SerializeDomForURL(file_url
, false);
708 // Make sure the serialized contents have META ;
709 ASSERT_TRUE(HasSerializedFrame(file_url
));
710 const std::string
& serialized_contents
=
711 GetSerializedContentForFrame(file_url
);
713 // Reload serialized contents and make sure there is only one META tag.
714 LoadContents(serialized_contents
, file_url
,
715 web_frame
->document().encoding());
716 web_frame
= GetMainFrame();
717 ASSERT_TRUE(web_frame
!= NULL
);
718 doc
= web_frame
->document();
719 ASSERT_TRUE(doc
.isHTMLDocument());
720 head_element
= doc
.head();
721 ASSERT_TRUE(!head_element
.isNull());
722 ASSERT_TRUE(head_element
.hasChildNodes());
723 ASSERT_TRUE(head_element
.childNodes().length() == 1);
724 WebNode meta_node
= head_element
.firstChild();
725 ASSERT_TRUE(!meta_node
.isNull());
726 // Get meta charset info.
727 std::string charset_info
;
728 ASSERT_TRUE(IsMetaElement(meta_node
, charset_info
));
729 ASSERT_TRUE(!charset_info
.empty());
730 ASSERT_EQ(charset_info
,
731 std::string(web_frame
->document().encoding().utf8()));
733 // Check the body's first node is text node and its contents are
735 WebElement body_element
= doc
.body();
736 ASSERT_TRUE(!body_element
.isNull());
737 WebNode text_node
= body_element
.firstChild();
738 ASSERT_TRUE(text_node
.isTextNode());
739 WebString text_node_contents
= text_node
.nodeValue();
740 ASSERT_TRUE(std::string(text_node_contents
.utf8()) == "hello world");
743 void SerializeDocumentWithDownloadedIFrameOnRenderer(const GURL
& file_url
) {
744 // Do a recursive serialization. We pass if we don't crash.
745 SerializeDomForURL(file_url
, true);
748 void SubResourceForElementsInNonHTMLNamespaceOnRenderer(
749 const GURL
& file_url
) {
750 WebFrame
* web_frame
= FindSubFrameByURL(GetWebView(), file_url
);
751 ASSERT_TRUE(web_frame
!= NULL
);
752 WebDocument doc
= web_frame
->document();
753 WebNode lastNodeInBody
= doc
.body().lastChild();
754 ASSERT_TRUE(lastNodeInBody
.isElementNode());
755 WebString uri
= GetSubResourceLinkFromElement(
756 lastNodeInBody
.to
<WebElement
>());
757 EXPECT_TRUE(uri
.isNull());
761 int32 render_view_routing_id_
;
762 // Map frame_url to corresponding serialized_content.
763 typedef base::hash_map
<std::string
, std::string
> SerializedFrameContentMap
;
764 SerializedFrameContentMap serialized_frame_map_
;
765 // Map frame_url to corresponding status of serialization finish.
766 typedef base::hash_map
<std::string
, bool> SerializationFinishStatusMap
;
767 SerializationFinishStatusMap serialization_finish_status_
;
768 // Flag indicates whether the process of serializing DOM is finished or not.
770 // The local_directory_name_ is dummy relative path of directory which
771 // contain all saved auxiliary files included all sub frames and resources.
772 const base::FilePath local_directory_name_
;
775 // If original contents have document type, the serialized contents also have
777 // Disabled by ellyjones@ on 2015-05-18, see https://crbug.com/488495.
778 #if defined(OS_MACOSX)
779 #define MAYBE_SerializeHTMLDOMWithDocType DISABLED_SerializeHTMLDOMWithDocType
781 #define MAYBE_SerializeHTMLDOMWithDocType SerializeHTMLDOMWithDocType
784 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
785 MAYBE_SerializeHTMLDOMWithDocType
) {
786 base::FilePath page_file_path
=
787 GetTestFilePath("dom_serializer", "youtube_1.htm");
788 GURL file_url
= net::FilePathToFileURL(page_file_path
);
789 ASSERT_TRUE(file_url
.SchemeIsFile());
790 // Load the test file.
791 NavigateToURL(shell(), file_url
);
793 PostTaskToInProcessRendererAndWait(
794 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer
,
795 base::Unretained(this), file_url
));
798 // If original contents do not have document type, the serialized contents
799 // also do not have document type.
800 IN_PROC_BROWSER_TEST_F(DomSerializerTests
, SerializeHTMLDOMWithoutDocType
) {
801 base::FilePath page_file_path
=
802 GetTestFilePath("dom_serializer", "youtube_2.htm");
803 GURL file_url
= net::FilePathToFileURL(page_file_path
);
804 ASSERT_TRUE(file_url
.SchemeIsFile());
805 // Load the test file.
806 NavigateToURL(shell(), file_url
);
808 PostTaskToInProcessRendererAndWait(
810 &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer
,
811 base::Unretained(this), file_url
));
814 // Serialize XML document which has all 5 built-in entities. After
815 // finishing serialization, the serialized contents should be same
816 // with original XML document.
818 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
819 // XML headers are handled differently in the merged serializer.
820 // Bug: http://crbug.com/328354
821 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
822 DISABLED_SerializeXMLDocWithBuiltInEntities
) {
823 base::FilePath page_file_path
=
824 GetTestFilePath("dom_serializer", "note.html");
825 base::FilePath xml_file_path
= GetTestFilePath("dom_serializer", "note.xml");
826 // Read original contents for later comparison.
827 std::string original_contents
;
828 ASSERT_TRUE(base::ReadFileToString(xml_file_path
, &original_contents
));
830 GURL file_url
= net::FilePathToFileURL(page_file_path
);
831 GURL xml_file_url
= net::FilePathToFileURL(xml_file_path
);
832 ASSERT_TRUE(file_url
.SchemeIsFile());
833 // Load the test file.
834 NavigateToURL(shell(), file_url
);
836 PostTaskToInProcessRendererAndWait(
838 &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer
,
839 base::Unretained(this), xml_file_url
, original_contents
));
842 // When serializing DOM, we add MOTW declaration before html tag.
843 IN_PROC_BROWSER_TEST_F(DomSerializerTests
, SerializeHTMLDOMWithAddingMOTW
) {
844 base::FilePath page_file_path
=
845 GetTestFilePath("dom_serializer", "youtube_2.htm");
846 // Read original contents for later comparison .
847 std::string original_contents
;
848 ASSERT_TRUE(base::ReadFileToString(page_file_path
, &original_contents
));
850 GURL file_url
= net::FilePathToFileURL(page_file_path
);
851 ASSERT_TRUE(file_url
.SchemeIsFile());
853 // Load the test file.
854 NavigateToURL(shell(), file_url
);
856 PostTaskToInProcessRendererAndWait(
858 &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer
,
859 base::Unretained(this), file_url
, original_contents
));
862 // When serializing DOM, we will add the META which have correct charset
863 // declaration as first child of HEAD element for resolving WebKit bug:
864 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
865 // does not have META charset declaration.
866 // Disabled by battre@ on 2015-05-21, see https://crbug.com/488495.
867 #if defined(OS_MACOSX)
868 #define MAYBE_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc \
869 DISABLED_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc
871 #define MAYBE_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc \
872 SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc
874 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
875 MAYBE_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc
) {
876 base::FilePath page_file_path
=
877 GetTestFilePath("dom_serializer", "youtube_1.htm");
879 GURL file_url
= net::FilePathToFileURL(page_file_path
);
880 ASSERT_TRUE(file_url
.SchemeIsFile());
881 // Load the test file.
882 NavigateToURL(shell(), file_url
);
884 PostTaskToInProcessRendererAndWait(
886 &DomSerializerTests::
887 SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer
,
888 base::Unretained(this), file_url
));
891 // When serializing DOM, if the original document has multiple META charset
892 // declaration, we will add the META which have correct charset declaration
893 // as first child of HEAD element and remove all original META charset
895 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
896 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc
) {
897 base::FilePath page_file_path
=
898 GetTestFilePath("dom_serializer", "youtube_2.htm");
900 GURL file_url
= net::FilePathToFileURL(page_file_path
);
901 ASSERT_TRUE(file_url
.SchemeIsFile());
902 // Load the test file.
903 NavigateToURL(shell(), file_url
);
905 PostTaskToInProcessRendererAndWait(
907 &DomSerializerTests::
908 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer
,
909 base::Unretained(this), file_url
));
912 // Test situation of html entities in text when serializing HTML DOM.
913 IN_PROC_BROWSER_TEST_F(DomSerializerTests
, SerializeHTMLDOMWithEntitiesInText
) {
914 // Need to spin up the renderer and also navigate to a file url so that the
915 // renderer code doesn't attempt a fork when it sees a load to file scheme
916 // from non-file scheme.
917 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
919 PostTaskToInProcessRendererAndWait(
921 &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer
,
922 base::Unretained(this)));
925 // Test situation of html entities in attribute value when serializing
927 // This test started to fail at WebKit r65388. See http://crbug.com/52279.
929 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
930 // Some attributes are handled differently in the merged serializer.
931 // Bug: http://crbug.com/328354
932 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
933 DISABLED_SerializeHTMLDOMWithEntitiesInAttributeValue
) {
934 // Need to spin up the renderer and also navigate to a file url so that the
935 // renderer code doesn't attempt a fork when it sees a load to file scheme
936 // from non-file scheme.
937 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
939 PostTaskToInProcessRendererAndWait(
941 &DomSerializerTests::
942 SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer
,
943 base::Unretained(this)));
946 // Test situation of non-standard HTML entities when serializing HTML DOM.
947 // This test started to fail at WebKit r65351. See http://crbug.com/52279.
948 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
949 SerializeHTMLDOMWithNonStandardEntities
) {
950 // Make a test file URL and load it.
951 base::FilePath page_file_path
= GetTestFilePath(
952 "dom_serializer", "nonstandard_htmlentities.htm");
953 GURL file_url
= net::FilePathToFileURL(page_file_path
);
954 NavigateToURL(shell(), file_url
);
956 PostTaskToInProcessRendererAndWait(
958 &DomSerializerTests::
959 SerializeHTMLDOMWithNonStandardEntitiesOnRenderer
,
960 base::Unretained(this), file_url
));
963 // Test situation of BASE tag in original document when serializing HTML DOM.
964 // When serializing, we should comment the BASE tag, append a new BASE tag.
965 // rewrite all the savable URLs to relative local path, and change other URLs
968 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
969 // Base tags are handled a bit different in merged version.
970 // Bug: http://crbug.com/328354
971 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
972 DISABLED_SerializeHTMLDOMWithBaseTag
) {
973 base::FilePath page_file_path
= GetTestFilePath(
974 "dom_serializer", "html_doc_has_base_tag.htm");
976 // Get page dir URL which is base URL of this file.
977 base::FilePath dir_name
= page_file_path
.DirName();
978 dir_name
= dir_name
.Append(
979 base::FilePath::StringType(base::FilePath::kSeparators
[0], 1));
980 GURL path_dir_url
= net::FilePathToFileURL(dir_name
);
983 GURL file_url
= net::FilePathToFileURL(page_file_path
);
984 ASSERT_TRUE(file_url
.SchemeIsFile());
985 // Load the test file.
986 NavigateToURL(shell(), file_url
);
988 PostTaskToInProcessRendererAndWait(
990 &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer
,
991 base::Unretained(this), file_url
, path_dir_url
));
994 // Serializing page which has an empty HEAD tag.
995 IN_PROC_BROWSER_TEST_F(DomSerializerTests
, SerializeHTMLDOMWithEmptyHead
) {
996 // Need to spin up the renderer and also navigate to a file url so that the
997 // renderer code doesn't attempt a fork when it sees a load to file scheme
998 // from non-file scheme.
999 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
1001 PostTaskToInProcessRendererAndWait(
1002 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer
,
1003 base::Unretained(this)));
1006 // Test that we don't crash when the page contains an iframe that
1007 // was handled as a download (http://crbug.com/42212).
1008 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
1009 SerializeDocumentWithDownloadedIFrame
) {
1010 base::FilePath page_file_path
= GetTestFilePath(
1011 "dom_serializer", "iframe-src-is-exe.htm");
1012 GURL file_url
= net::FilePathToFileURL(page_file_path
);
1013 ASSERT_TRUE(file_url
.SchemeIsFile());
1014 // Load the test file.
1015 NavigateToURL(shell(), file_url
);
1017 PostTaskToInProcessRendererAndWait(
1019 &DomSerializerTests::
1020 SerializeDocumentWithDownloadedIFrameOnRenderer
,
1021 base::Unretained(this), file_url
));
1024 IN_PROC_BROWSER_TEST_F(DomSerializerTests
,
1025 SubResourceForElementsInNonHTMLNamespace
) {
1026 base::FilePath page_file_path
= GetTestFilePath(
1027 "dom_serializer", "non_html_namespace.htm");
1028 GURL file_url
= net::FilePathToFileURL(page_file_path
);
1029 NavigateToURL(shell(), file_url
);
1031 PostTaskToInProcessRendererAndWait(
1033 &DomSerializerTests::
1034 SubResourceForElementsInNonHTMLNamespaceOnRenderer
,
1035 base::Unretained(this), file_url
));
1038 } // namespace content