1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/savable_resources.h"
9 #include "base/compiler_specific.h"
10 #include "base/logging.h"
11 #include "base/strings/string_util.h"
12 #include "third_party/WebKit/public/platform/WebString.h"
13 #include "third_party/WebKit/public/platform/WebVector.h"
14 #include "third_party/WebKit/public/web/WebDocument.h"
15 #include "third_party/WebKit/public/web/WebElement.h"
16 #include "third_party/WebKit/public/web/WebElementCollection.h"
17 #include "third_party/WebKit/public/web/WebInputElement.h"
18 #include "third_party/WebKit/public/web/WebLocalFrame.h"
19 #include "third_party/WebKit/public/web/WebNode.h"
20 #include "third_party/WebKit/public/web/WebNodeList.h"
21 #include "third_party/WebKit/public/web/WebView.h"
23 using blink::WebDocument
;
24 using blink::WebElement
;
25 using blink::WebElementCollection
;
26 using blink::WebFrame
;
27 using blink::WebInputElement
;
28 using blink::WebLocalFrame
;
30 using blink::WebNodeList
;
31 using blink::WebString
;
32 using blink::WebVector
;
38 // Get all savable resource links from current element. One element might
39 // have more than one resource link. It is possible to have some links
40 // in one CSS stylesheet.
41 void GetSavableResourceLinkForElement(
42 const WebElement
& element
,
43 const WebDocument
& current_doc
,
44 SavableResourcesResult
* result
) {
45 // Skipping frame and iframe tag.
46 if (element
.hasHTMLTagName("iframe") || element
.hasHTMLTagName("frame"))
49 // Check whether the node has sub resource URL or not.
50 WebString value
= GetSubResourceLinkFromElement(element
);
54 GURL u
= current_doc
.completeURL(value
);
58 // Ignore those URLs which are not standard protocols. Because FTP
59 // protocol does no have cache mechanism, we will skip all
60 // sub-resources if they use FTP protocol.
61 if (!u
.SchemeIsHTTPOrHTTPS() && !u
.SchemeIs(url::kFileScheme
))
63 // Ignore duplicated resource link.
64 result
->resources_list
->push_back(u
);
65 // Insert referrer for above new resource link.
66 result
->referrer_urls_list
->push_back(GURL());
67 result
->referrer_policies_list
->push_back(blink::WebReferrerPolicyDefault
);
72 bool GetSavableResourceLinksForFrame(WebFrame
* current_frame
,
73 SavableResourcesResult
* result
,
74 const char** savable_schemes
) {
75 // Get current frame's URL.
76 GURL current_frame_url
= current_frame
->document().url();
78 // If url of current frame is invalid, ignore it.
79 if (!current_frame_url
.is_valid())
82 // If url of current frame is not a savable protocol, ignore it.
83 bool is_valid_protocol
= false;
84 for (int i
= 0; savable_schemes
[i
] != NULL
; ++i
) {
85 if (current_frame_url
.SchemeIs(savable_schemes
[i
])) {
86 is_valid_protocol
= true;
90 if (!is_valid_protocol
)
93 // Get current using document.
94 WebDocument current_doc
= current_frame
->document();
95 // Go through all descent nodes.
96 WebElementCollection all
= current_doc
.all();
97 // Go through all elements in this frame.
98 for (WebElement element
= all
.firstItem(); !element
.isNull();
99 element
= all
.nextItem()) {
100 GetSavableResourceLinkForElement(element
,
108 WebString
GetSubResourceLinkFromElement(const WebElement
& element
) {
109 const char* attribute_name
= NULL
;
110 if (element
.hasHTMLTagName("img") ||
111 element
.hasHTMLTagName("script")) {
112 attribute_name
= "src";
113 } else if (element
.hasHTMLTagName("input")) {
114 const WebInputElement input
= element
.toConst
<WebInputElement
>();
115 if (input
.isImageButton()) {
116 attribute_name
= "src";
118 } else if (element
.hasHTMLTagName("body") ||
119 element
.hasHTMLTagName("table") ||
120 element
.hasHTMLTagName("tr") ||
121 element
.hasHTMLTagName("td")) {
122 attribute_name
= "background";
123 } else if (element
.hasHTMLTagName("blockquote") ||
124 element
.hasHTMLTagName("q") ||
125 element
.hasHTMLTagName("del") ||
126 element
.hasHTMLTagName("ins")) {
127 attribute_name
= "cite";
128 } else if (element
.hasHTMLTagName("link")) {
129 // If the link element is not linked to css, ignore it.
130 if (base::LowerCaseEqualsASCII(
131 base::StringPiece16(element
.getAttribute("type")), "text/css") ||
132 base::LowerCaseEqualsASCII(
133 base::StringPiece16(element
.getAttribute("rel")), "stylesheet")) {
134 // TODO(jnd): Add support for extracting links of sub-resources which
135 // are inside style-sheet such as @import, url(), etc.
136 // See bug: http://b/issue?id=1111667.
137 attribute_name
= "href";
142 WebString value
= element
.getAttribute(WebString::fromUTF8(attribute_name
));
143 // If value has content and not start with "javascript:" then return it,
144 // otherwise return NULL.
145 if (!value
.isNull() && !value
.isEmpty() &&
146 !base::StartsWith(value
.utf8(), "javascript:",
147 base::CompareCase::INSENSITIVE_ASCII
))
153 } // namespace content