1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/chrome_render_view_observer.h"
8 #include "base/bind_helpers.h"
9 #include "base/command_line.h"
10 #include "base/debug/trace_event.h"
11 #include "base/message_loop/message_loop.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/chrome_switches.h"
17 #include "chrome/common/prerender_messages.h"
18 #include "chrome/common/render_messages.h"
19 #include "chrome/common/url_constants.h"
20 #include "chrome/renderer/chrome_render_process_observer.h"
21 #include "chrome/renderer/prerender/prerender_helper.h"
22 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
23 #include "chrome/renderer/translate/translate_helper.h"
24 #include "chrome/renderer/webview_color_overlay.h"
25 #include "content/public/common/bindings_policy.h"
26 #include "content/public/renderer/content_renderer_client.h"
27 #include "content/public/renderer/render_frame.h"
28 #include "content/public/renderer/render_view.h"
29 #include "extensions/common/constants.h"
30 #include "net/base/data_url.h"
31 #include "skia/ext/platform_canvas.h"
32 #include "third_party/WebKit/public/platform/WebCString.h"
33 #include "third_party/WebKit/public/platform/WebRect.h"
34 #include "third_party/WebKit/public/platform/WebSize.h"
35 #include "third_party/WebKit/public/platform/WebString.h"
36 #include "third_party/WebKit/public/platform/WebURLRequest.h"
37 #include "third_party/WebKit/public/platform/WebVector.h"
38 #include "third_party/WebKit/public/web/WebAXObject.h"
39 #include "third_party/WebKit/public/web/WebDataSource.h"
40 #include "third_party/WebKit/public/web/WebDocument.h"
41 #include "third_party/WebKit/public/web/WebElement.h"
42 #include "third_party/WebKit/public/web/WebInputEvent.h"
43 #include "third_party/WebKit/public/web/WebLocalFrame.h"
44 #include "third_party/WebKit/public/web/WebNode.h"
45 #include "third_party/WebKit/public/web/WebNodeList.h"
46 #include "third_party/WebKit/public/web/WebView.h"
47 #include "ui/base/ui_base_switches_util.h"
48 #include "ui/gfx/favicon_size.h"
49 #include "ui/gfx/size.h"
50 #include "ui/gfx/size_f.h"
51 #include "ui/gfx/skbitmap_operations.h"
52 #include "v8/include/v8-testing.h"
54 #if defined(ENABLE_EXTENSIONS)
55 #include "chrome/common/extensions/chrome_extension_messages.h"
58 using blink::WebAXObject
;
59 using blink::WebCString
;
60 using blink::WebDataSource
;
61 using blink::WebDocument
;
62 using blink::WebElement
;
63 using blink::WebFrame
;
64 using blink::WebGestureEvent
;
65 using blink::WebIconURL
;
66 using blink::WebLocalFrame
;
68 using blink::WebNodeList
;
70 using blink::WebSecurityOrigin
;
72 using blink::WebString
;
73 using blink::WebTouchEvent
;
75 using blink::WebURLRequest
;
77 using blink::WebVector
;
78 using blink::WebWindowFeatures
;
80 // Delay in milliseconds that we'll wait before capturing the page contents
82 static const int kDelayForCaptureMs
= 500;
84 // Typically, we capture the page data once the page is loaded.
85 // Sometimes, the page never finishes to load, preventing the page capture
86 // To workaround this problem, we always perform a capture after the following
88 static const int kDelayForForcedCaptureMs
= 6000;
90 // define to write the time necessary for thumbnail/DOM text retrieval,
91 // respectively, into the system debug log
92 // #define TIME_TEXT_RETRIEVAL
94 // maximum number of characters in the document to index, any text beyond this
95 // point will be clipped
96 static const size_t kMaxIndexChars
= 65535;
98 // Constants for UMA statistic collection.
99 static const char kTranslateCaptureText
[] = "Translate.CaptureText";
103 #if defined(OS_ANDROID)
104 // Parses the DOM for a <meta> tag with a particular name.
105 // |meta_tag_content| is set to the contents of the 'content' attribute.
106 // |found_tag| is set to true if the tag was successfully found.
107 // Returns true if the document was parsed without errors.
108 bool RetrieveMetaTagContent(const WebFrame
* main_frame
,
109 const GURL
& expected_url
,
110 const std::string
& meta_tag_name
,
112 std::string
* meta_tag_content
) {
113 WebDocument document
=
114 main_frame
? main_frame
->document() : WebDocument();
115 WebElement head
= document
.isNull() ? WebElement() : document
.head();
116 GURL document_url
= document
.isNull() ? GURL() : GURL(document
.url());
118 // Search the DOM for the <meta> tag with the given name.
120 *meta_tag_content
= "";
121 if (!head
.isNull()) {
122 WebNodeList children
= head
.childNodes();
123 for (unsigned i
= 0; i
< children
.length(); ++i
) {
124 WebNode child
= children
.item(i
);
125 if (!child
.isElementNode())
127 WebElement elem
= child
.to
<WebElement
>();
128 if (elem
.hasHTMLTagName("meta")) {
129 if (elem
.hasAttribute("name") && elem
.hasAttribute("content")) {
130 std::string name
= elem
.getAttribute("name").utf8();
131 if (name
== meta_tag_name
) {
132 *meta_tag_content
= elem
.getAttribute("content").utf8();
141 // Make sure we're checking the right page and that the length of the content
142 // string is reasonable.
143 bool success
= document_url
== expected_url
;
144 if (meta_tag_content
->size() > chrome::kMaxMetaTagAttributeLength
) {
145 *meta_tag_content
= "";
155 ChromeRenderViewObserver::ChromeRenderViewObserver(
156 content::RenderView
* render_view
,
157 ChromeRenderProcessObserver
* chrome_render_process_observer
)
158 : content::RenderViewObserver(render_view
),
159 chrome_render_process_observer_(chrome_render_process_observer
),
160 translate_helper_(new TranslateHelper(render_view
)),
161 phishing_classifier_(NULL
),
162 capture_timer_(false, false) {
163 const CommandLine
& command_line
= *CommandLine::ForCurrentProcess();
164 if (!command_line
.HasSwitch(switches::kDisableClientSidePhishingDetection
))
165 OnSetClientSidePhishingDetection(true);
168 ChromeRenderViewObserver::~ChromeRenderViewObserver() {
171 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message
& message
) {
173 IPC_BEGIN_MESSAGE_MAP(ChromeRenderViewObserver
, message
)
174 #if !defined(OS_ANDROID) && !defined(OS_IOS)
175 IPC_MESSAGE_HANDLER(ChromeViewMsg_WebUIJavaScript
, OnWebUIJavaScript
)
177 #if defined(ENABLE_EXTENSIONS)
178 IPC_MESSAGE_HANDLER(ChromeViewMsg_SetVisuallyDeemphasized
,
179 OnSetVisuallyDeemphasized
)
181 #if defined(OS_ANDROID)
182 IPC_MESSAGE_HANDLER(ChromeViewMsg_UpdateTopControlsState
,
183 OnUpdateTopControlsState
)
184 IPC_MESSAGE_HANDLER(ChromeViewMsg_RetrieveWebappInformation
,
185 OnRetrieveWebappInformation
)
186 IPC_MESSAGE_HANDLER(ChromeViewMsg_RetrieveMetaTagContent
,
187 OnRetrieveMetaTagContent
)
189 IPC_MESSAGE_HANDLER(ChromeViewMsg_SetClientSidePhishingDetection
,
190 OnSetClientSidePhishingDetection
)
191 IPC_MESSAGE_HANDLER(ChromeViewMsg_SetWindowFeatures
, OnSetWindowFeatures
)
192 IPC_MESSAGE_UNHANDLED(handled
= false)
193 IPC_END_MESSAGE_MAP()
198 #if !defined(OS_ANDROID) && !defined(OS_IOS)
199 void ChromeRenderViewObserver::OnWebUIJavaScript(
200 const base::string16
& javascript
) {
201 webui_javascript_
.push_back(javascript
);
205 #if defined(OS_ANDROID)
206 void ChromeRenderViewObserver::OnUpdateTopControlsState(
207 content::TopControlsState constraints
,
208 content::TopControlsState current
,
210 render_view()->UpdateTopControlsState(constraints
, current
, animate
);
213 void ChromeRenderViewObserver::OnRetrieveWebappInformation(
214 const GURL
& expected_url
) {
215 WebFrame
* main_frame
= render_view()->GetWebView()->mainFrame();
217 std::string content_str
;
219 // Search for the "mobile-web-app-capable" tag.
220 bool mobile_parse_success
= RetrieveMetaTagContent(
223 "mobile-web-app-capable",
226 bool is_mobile_webapp_capable
= mobile_parse_success
&& found_tag
&&
227 LowerCaseEqualsASCII(content_str
, "yes");
229 // Search for the "apple-mobile-web-app-capable" tag.
230 bool apple_parse_success
= RetrieveMetaTagContent(
233 "apple-mobile-web-app-capable",
236 bool is_apple_mobile_webapp_capable
= apple_parse_success
&& found_tag
&&
237 LowerCaseEqualsASCII(content_str
, "yes");
239 bool is_only_apple_mobile_webapp_capable
=
240 is_apple_mobile_webapp_capable
&& !is_mobile_webapp_capable
;
241 if (main_frame
&& is_only_apple_mobile_webapp_capable
) {
242 blink::WebConsoleMessage
message(
243 blink::WebConsoleMessage::LevelWarning
,
244 "<meta name=\"apple-mobile-web-app-capable\" content=\"yes\"> is "
245 "deprecated. Please include <meta name=\"mobile-web-app-capable\" "
246 "content=\"yes\"> - "
247 "http://developers.google.com/chrome/mobile/docs/installtohomescreen");
248 main_frame
->addMessageToConsole(message
);
251 Send(new ChromeViewHostMsg_DidRetrieveWebappInformation(
253 mobile_parse_success
&& apple_parse_success
,
254 is_mobile_webapp_capable
,
255 is_apple_mobile_webapp_capable
,
259 void ChromeRenderViewObserver::OnRetrieveMetaTagContent(
260 const GURL
& expected_url
,
261 const std::string tag_name
) {
263 std::string content_str
;
264 bool parsed_successfully
= RetrieveMetaTagContent(
265 render_view()->GetWebView()->mainFrame(),
271 Send(new ChromeViewHostMsg_DidRetrieveMetaTagContent(
273 parsed_successfully
&& found_tag
,
280 void ChromeRenderViewObserver::OnSetWindowFeatures(
281 const WebWindowFeatures
& window_features
) {
282 render_view()->GetWebView()->setWindowFeatures(window_features
);
285 void ChromeRenderViewObserver::Navigate(const GURL
& url
) {
286 // Execute cache clear operations that were postponed until a navigation
287 // event (including tab reload).
288 if (chrome_render_process_observer_
)
289 chrome_render_process_observer_
->ExecutePendingClearCache();
290 // Let translate_helper do any preparatory work for loading a URL.
291 if (translate_helper_
)
292 translate_helper_
->PrepareForUrl(url
);
295 void ChromeRenderViewObserver::OnSetClientSidePhishingDetection(
296 bool enable_phishing_detection
) {
297 #if defined(FULL_SAFE_BROWSING) && !defined(OS_CHROMEOS)
298 phishing_classifier_
= enable_phishing_detection
?
299 safe_browsing::PhishingClassifierDelegate::Create(render_view(), NULL
) :
304 #if defined(ENABLE_EXTENSIONS)
305 void ChromeRenderViewObserver::OnSetVisuallyDeemphasized(bool deemphasized
) {
306 bool already_deemphasized
= !!dimmed_color_overlay_
.get();
307 if (already_deemphasized
== deemphasized
)
312 SkColor greyish
= SkColorSetARGB(178, 0, 0, 0);
313 dimmed_color_overlay_
.reset(
314 new WebViewColorOverlay(render_view(), greyish
));
316 dimmed_color_overlay_
.reset();
321 void ChromeRenderViewObserver::DidStartLoading() {
322 if ((render_view()->GetEnabledBindings() & content::BINDINGS_POLICY_WEB_UI
) &&
323 !webui_javascript_
.empty()) {
324 for (size_t i
= 0; i
< webui_javascript_
.size(); ++i
) {
325 render_view()->GetMainRenderFrame()->ExecuteJavaScript(
326 webui_javascript_
[i
]);
328 webui_javascript_
.clear();
332 void ChromeRenderViewObserver::DidStopLoading() {
333 WebFrame
* main_frame
= render_view()->GetWebView()->mainFrame();
334 GURL osdd_url
= main_frame
->document().openSearchDescriptionURL();
335 if (!osdd_url
.is_empty()) {
336 Send(new ChromeViewHostMsg_PageHasOSDD(
337 routing_id(), main_frame
->document().url(), osdd_url
,
338 search_provider::AUTODETECTED_PROVIDER
));
341 // Don't capture pages including refresh meta tag.
342 if (HasRefreshMetaTag(main_frame
))
345 CapturePageInfoLater(
346 false, // preliminary_capture
347 base::TimeDelta::FromMilliseconds(
348 render_view()->GetContentStateImmediately() ?
349 0 : kDelayForCaptureMs
));
352 void ChromeRenderViewObserver::DidCommitProvisionalLoad(
353 WebLocalFrame
* frame
, bool is_new_navigation
) {
354 // Don't capture pages being not new, or including refresh meta tag.
355 if (!is_new_navigation
|| HasRefreshMetaTag(frame
))
358 CapturePageInfoLater(
359 true, // preliminary_capture
360 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs
));
363 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture
,
364 base::TimeDelta delay
) {
365 capture_timer_
.Start(
368 base::Bind(&ChromeRenderViewObserver::CapturePageInfo
,
369 base::Unretained(this),
370 preliminary_capture
));
373 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture
) {
374 if (!render_view()->GetWebView())
377 WebFrame
* main_frame
= render_view()->GetWebView()->mainFrame();
381 // Don't index/capture pages that are in view source mode.
382 if (main_frame
->isViewSourceModeEnabled())
385 // Don't index/capture pages that failed to load. This only checks the top
386 // level frame so the thumbnail may contain a frame that failed to load.
387 WebDataSource
* ds
= main_frame
->dataSource();
388 if (ds
&& ds
->hasUnreachableURL())
391 // Don't index/capture pages that are being prerendered.
392 if (prerender::PrerenderHelper::IsPrerendering(
393 render_view()->GetMainRenderFrame())) {
397 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the
398 // translate helper for language detection and possible translation.
399 base::string16 contents
;
400 base::TimeTicks capture_begin_time
= base::TimeTicks::Now();
401 CaptureText(main_frame
, &contents
);
402 UMA_HISTOGRAM_TIMES(kTranslateCaptureText
,
403 base::TimeTicks::Now() - capture_begin_time
);
404 if (translate_helper_
)
405 translate_helper_
->PageCaptured(contents
);
407 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");
409 #if defined(FULL_SAFE_BROWSING)
410 // Will swap out the string.
411 if (phishing_classifier_
)
412 phishing_classifier_
->PageCaptured(&contents
, preliminary_capture
);
416 void ChromeRenderViewObserver::CaptureText(WebFrame
* frame
,
417 base::string16
* contents
) {
422 #ifdef TIME_TEXT_RETRIEVAL
423 double begin
= time_util::GetHighResolutionTimeNow();
426 // get the contents of the frame
427 *contents
= frame
->contentAsText(kMaxIndexChars
);
429 #ifdef TIME_TEXT_RETRIEVAL
430 double end
= time_util::GetHighResolutionTimeNow();
432 sprintf_s(buf
, "%d chars retrieved for indexing in %gms\n",
433 contents
.size(), (end
- begin
)*1000);
434 OutputDebugStringA(buf
);
437 // When the contents are clipped to the maximum, we don't want to have a
438 // partial word indexed at the end that might have been clipped. Therefore,
439 // terminate the string at the last space to ensure no words are clipped.
440 if (contents
->size() == kMaxIndexChars
) {
441 size_t last_space_index
= contents
->find_last_of(base::kWhitespaceUTF16
);
442 if (last_space_index
!= base::string16::npos
)
443 contents
->resize(last_space_index
);
447 bool ChromeRenderViewObserver::HasRefreshMetaTag(WebFrame
* frame
) {
450 WebElement head
= frame
->document().head();
451 if (head
.isNull() || !head
.hasChildNodes())
454 const WebString
tag_name(base::ASCIIToUTF16("meta"));
455 const WebString
attribute_name(base::ASCIIToUTF16("http-equiv"));
457 WebNodeList children
= head
.childNodes();
458 for (size_t i
= 0; i
< children
.length(); ++i
) {
459 WebNode node
= children
.item(i
);
460 if (!node
.isElementNode())
462 WebElement element
= node
.to
<WebElement
>();
463 if (!element
.hasHTMLTagName(tag_name
))
465 WebString value
= element
.getAttribute(attribute_name
);
466 if (value
.isNull() || !LowerCaseEqualsASCII(value
, "refresh"))