1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/translate/content/renderer/translate_helper.h"
8 #include "base/compiler_specific.h"
9 #include "base/logging.h"
10 #include "base/message_loop/message_loop.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "components/translate/content/common/translate_messages.h"
16 #include "components/translate/content/renderer/renderer_cld_data_provider.h"
17 #include "components/translate/content/renderer/renderer_cld_data_provider_factory.h"
18 #include "components/translate/content/renderer/renderer_cld_utils.h"
19 #include "components/translate/core/common/translate_constants.h"
20 #include "components/translate/core/common/translate_metrics.h"
21 #include "components/translate/core/common/translate_util.h"
22 #include "components/translate/core/language_detection/language_detection_util.h"
23 #include "content/public/common/content_constants.h"
24 #include "content/public/common/url_constants.h"
25 #include "content/public/renderer/render_thread.h"
26 #include "content/public/renderer/render_view.h"
27 #include "ipc/ipc_platform_file.h"
28 #include "third_party/WebKit/public/web/WebDocument.h"
29 #include "third_party/WebKit/public/web/WebElement.h"
30 #include "third_party/WebKit/public/web/WebFrame.h"
31 #include "third_party/WebKit/public/web/WebNode.h"
32 #include "third_party/WebKit/public/web/WebNodeList.h"
33 #include "third_party/WebKit/public/web/WebScriptSource.h"
34 #include "third_party/WebKit/public/web/WebView.h"
35 #include "third_party/WebKit/public/web/WebWidget.h"
37 #include "v8/include/v8.h"
39 using base::ASCIIToUTF16
;
40 using blink::WebDocument
;
41 using blink::WebElement
;
42 using blink::WebFrame
;
44 using blink::WebNodeList
;
45 using blink::WebScriptSource
;
46 using blink::WebSecurityOrigin
;
47 using blink::WebString
;
48 using blink::WebVector
;
53 // The delay in milliseconds that we'll wait before checking to see if the
54 // translate library injected in the page is ready.
55 const int kTranslateInitCheckDelayMs
= 150;
57 // The maximum number of times we'll check to see if the translate library
58 // injected in the page is ready.
59 const int kMaxTranslateInitCheckAttempts
= 5;
61 // The delay we wait in milliseconds before checking whether the translation has
63 const int kTranslateStatusCheckDelayMs
= 400;
65 // Language name passed to the Translate element for it to detect the language.
66 const char kAutoDetectionLanguage
[] = "auto";
68 // Isolated world sets following content-security-policy.
69 const char kContentSecurityPolicy
[] = "script-src 'self' 'unsafe-eval'";
71 // Whether or not we have set the CLD callback yet.
72 bool g_cld_callback_set
= false;
74 // Obtain a new CLD data provider. Defined as a standalone method for ease of
75 // use in constructor initialization list.
76 scoped_ptr
<translate::RendererCldDataProvider
> CreateDataProvider(
77 content::RenderViewObserver
* render_view_observer
) {
78 translate::RendererCldUtils::ConfigureDefaultDataProvider();
79 return scoped_ptr
<translate::RendererCldDataProvider
>(
80 translate::RendererCldDataProviderFactory::Get()->
81 CreateRendererCldDataProvider(render_view_observer
));
88 ////////////////////////////////////////////////////////////////////////////////
89 // TranslateHelper, public:
91 TranslateHelper::TranslateHelper(content::RenderView
* render_view
,
94 const std::string
& extension_scheme
)
95 : content::RenderViewObserver(render_view
),
97 translation_pending_(false),
98 cld_data_provider_(CreateDataProvider(this)),
99 cld_data_polling_started_(false),
100 cld_data_polling_canceled_(false),
101 deferred_page_capture_(false),
102 deferred_page_seq_no_(-1),
104 extension_group_(extension_group
),
105 extension_scheme_(extension_scheme
),
106 weak_method_factory_(this) {
109 TranslateHelper::~TranslateHelper() {
110 CancelPendingTranslation();
111 CancelCldDataPolling();
114 void TranslateHelper::PrepareForUrl(const GURL
& url
) {
116 Send(new ChromeViewHostMsg_TranslateAssignedSequenceNumber(
117 routing_id(), page_seq_no_
));
118 deferred_page_capture_
= false;
119 deferred_page_seq_no_
= -1;
120 deferred_contents_
.clear();
121 if (cld_data_polling_started_
)
124 // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
125 // components/translate/core/common/translate_util.cc, and ignore any URL
126 // that fails that check. This will require moving unit tests and rewiring
127 // other function calls as well, so for now replicate the logic here.
130 if (url
.SchemeIs(content::kChromeUIScheme
))
132 if (url
.SchemeIs(content::kChromeDevToolsScheme
))
134 if (url
.SchemeIs(url::kFtpScheme
))
136 if (url
.SchemeIs(extension_scheme_
.c_str()))
139 // Start polling for CLD data.
140 cld_data_polling_started_
= true;
141 TranslateHelper::SendCldDataRequest(0, 1000);
144 void TranslateHelper::PageCaptured(const base::string16
& contents
) {
145 PageCapturedImpl(page_seq_no_
, contents
);
148 void TranslateHelper::PageCapturedImpl(int page_seq_no
,
149 const base::string16
& contents
) {
150 // Get the document language as set by WebKit from the http-equiv
151 // meta tag for "content-language". This may or may not also
152 // have a value derived from the actual Content-Language HTTP
153 // header. The two actually have different meanings (despite the
154 // original intent of http-equiv to be an equivalent) with the former
155 // being the language of the document and the latter being the
156 // language of the intended audience (a distinction really only
157 // relevant for things like langauge textbooks). This distinction
158 // shouldn't affect translation.
159 WebFrame
* main_frame
= GetMainFrame();
160 if (!main_frame
|| page_seq_no_
!= page_seq_no
)
163 if (!cld_data_provider_
->IsCldDataAvailable()) {
164 // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
165 // is loaded, if ever.
166 deferred_page_capture_
= true;
167 deferred_page_seq_no_
= page_seq_no
;
168 deferred_contents_
= contents
;
169 RecordLanguageDetectionTiming(DEFERRED
);
173 if (deferred_page_seq_no_
== -1) {
174 // CLD data was available before language detection was requested.
175 RecordLanguageDetectionTiming(ON_TIME
);
177 // This is a request that was triggered because CLD data is now available
178 // and was previously deferred.
179 RecordLanguageDetectionTiming(RESUMED
);
182 WebDocument document
= main_frame
->document();
183 std::string content_language
= document
.contentLanguage().utf8();
184 WebElement html_element
= document
.documentElement();
185 std::string html_lang
;
186 // |html_element| can be null element, e.g. in
187 // BrowserTest.WindowOpenClose.
188 if (!html_element
.isNull())
189 html_lang
= html_element
.getAttribute("lang").utf8();
190 std::string cld_language
;
191 bool is_cld_reliable
;
192 std::string language
= DeterminePageLanguage(
193 content_language
, html_lang
, contents
, &cld_language
, &is_cld_reliable
);
195 if (language
.empty())
198 language_determined_time_
= base::TimeTicks::Now();
200 GURL
url(document
.url());
201 LanguageDetectionDetails details
;
202 details
.time
= base::Time::Now();
204 details
.content_language
= content_language
;
205 details
.cld_language
= cld_language
;
206 details
.is_cld_reliable
= is_cld_reliable
;
207 details
.html_root_language
= html_lang
;
208 details
.adopted_language
= language
;
210 // TODO(hajimehoshi): If this affects performance, it should be set only if
211 // translate-internals tab exists.
212 details
.contents
= contents
;
214 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
217 IsTranslationAllowed(&document
) && !language
.empty()));
220 void TranslateHelper::CancelPendingTranslation() {
221 weak_method_factory_
.InvalidateWeakPtrs();
222 translation_pending_
= false;
223 source_lang_
.clear();
224 target_lang_
.clear();
225 CancelCldDataPolling();
228 ////////////////////////////////////////////////////////////////////////////////
229 // TranslateHelper, protected:
231 bool TranslateHelper::IsTranslateLibAvailable() {
232 return ExecuteScriptAndGetBoolResult(
233 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
234 "typeof cr.googleTranslate.translate == 'function'", false);
237 bool TranslateHelper::IsTranslateLibReady() {
238 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
241 bool TranslateHelper::HasTranslationFinished() {
242 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
245 bool TranslateHelper::HasTranslationFailed() {
246 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
249 bool TranslateHelper::StartTranslation() {
250 std::string script
= "cr.googleTranslate.translate('" +
255 return ExecuteScriptAndGetBoolResult(script
, false);
258 std::string
TranslateHelper::GetOriginalPageLanguage() {
259 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
262 base::TimeDelta
TranslateHelper::AdjustDelay(int delayInMs
) {
263 // Just converts |delayInMs| without any modification in practical cases.
264 // Tests will override this function to return modified value.
265 return base::TimeDelta::FromMilliseconds(delayInMs
);
268 void TranslateHelper::ExecuteScript(const std::string
& script
) {
269 WebFrame
* main_frame
= GetMainFrame();
273 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
274 main_frame
->executeScriptInIsolatedWorld(
275 world_id_
, &source
, 1, extension_group_
);
278 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string
& script
,
280 WebFrame
* main_frame
= GetMainFrame();
284 v8::HandleScope
handle_scope(v8::Isolate::GetCurrent());
285 WebVector
<v8::Local
<v8::Value
> > results
;
286 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
287 main_frame
->executeScriptInIsolatedWorld(
288 world_id_
, &source
, 1, extension_group_
, &results
);
289 if (results
.size() != 1 || results
[0].IsEmpty() || !results
[0]->IsBoolean()) {
294 return results
[0]->BooleanValue();
297 std::string
TranslateHelper::ExecuteScriptAndGetStringResult(
298 const std::string
& script
) {
299 WebFrame
* main_frame
= GetMainFrame();
301 return std::string();
303 v8::HandleScope
handle_scope(v8::Isolate::GetCurrent());
304 WebVector
<v8::Local
<v8::Value
> > results
;
305 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
306 main_frame
->executeScriptInIsolatedWorld(
307 world_id_
, &source
, 1, extension_group_
, &results
);
308 if (results
.size() != 1 || results
[0].IsEmpty() || !results
[0]->IsString()) {
310 return std::string();
313 v8::Local
<v8::String
> v8_str
= results
[0].As
<v8::String
>();
314 int length
= v8_str
->Utf8Length() + 1;
315 scoped_ptr
<char[]> str(new char[length
]);
316 v8_str
->WriteUtf8(str
.get(), length
);
317 return std::string(str
.get());
320 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
321 const std::string
& script
) {
322 WebFrame
* main_frame
= GetMainFrame();
326 v8::HandleScope
handle_scope(v8::Isolate::GetCurrent());
327 WebVector
<v8::Local
<v8::Value
> > results
;
328 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
329 main_frame
->executeScriptInIsolatedWorld(
330 world_id_
, &source
, 1, extension_group_
, &results
);
331 if (results
.size() != 1 || results
[0].IsEmpty() || !results
[0]->IsNumber()) {
336 return results
[0]->NumberValue();
339 ////////////////////////////////////////////////////////////////////////////////
340 // TranslateHelper, private:
344 bool TranslateHelper::IsTranslationAllowed(WebDocument
* document
) {
345 WebElement head
= document
->head();
346 if (head
.isNull() || !head
.hasChildNodes())
349 const WebString
meta(ASCIIToUTF16("meta"));
350 const WebString
name(ASCIIToUTF16("name"));
351 const WebString
google(ASCIIToUTF16("google"));
352 const WebString
value(ASCIIToUTF16("value"));
353 const WebString
content(ASCIIToUTF16("content"));
355 WebNodeList children
= head
.childNodes();
356 for (size_t i
= 0; i
< children
.length(); ++i
) {
357 WebNode node
= children
.item(i
);
358 if (!node
.isElementNode())
360 WebElement element
= node
.to
<WebElement
>();
361 // Check if a tag is <meta>.
362 if (!element
.hasHTMLTagName(meta
))
364 // Check if the tag contains name="google".
365 WebString attribute
= element
.getAttribute(name
);
366 if (attribute
.isNull() || attribute
!= google
)
368 // Check if the tag contains value="notranslate", or content="notranslate".
369 attribute
= element
.getAttribute(value
);
370 if (attribute
.isNull())
371 attribute
= element
.getAttribute(content
);
372 if (attribute
.isNull())
374 if (LowerCaseEqualsASCII(attribute
, "notranslate"))
380 bool TranslateHelper::OnMessageReceived(const IPC::Message
& message
) {
382 IPC_BEGIN_MESSAGE_MAP(TranslateHelper
, message
)
383 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage
, OnTranslatePage
)
384 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation
, OnRevertTranslation
)
385 IPC_MESSAGE_UNHANDLED(handled
= false)
386 IPC_END_MESSAGE_MAP()
388 handled
= cld_data_provider_
->OnMessageReceived(message
);
393 void TranslateHelper::OnTranslatePage(int page_seq_no
,
394 const std::string
& translate_script
,
395 const std::string
& source_lang
,
396 const std::string
& target_lang
) {
397 WebFrame
* main_frame
= GetMainFrame();
398 if (!main_frame
|| page_seq_no_
!= page_seq_no
)
399 return; // We navigated away, nothing to do.
401 // A similar translation is already under way, nothing to do.
402 if (translation_pending_
&& target_lang_
== target_lang
)
405 // Any pending translation is now irrelevant.
406 CancelPendingTranslation();
409 translation_pending_
= true;
411 // If the source language is undetermined, we'll let the translate element
413 source_lang_
= (source_lang
!= kUnknownLanguageCode
) ? source_lang
414 : kAutoDetectionLanguage
;
415 target_lang_
= target_lang
;
417 ReportUserActionDuration(language_determined_time_
, base::TimeTicks::Now());
419 GURL
url(main_frame
->document().url());
420 ReportPageScheme(url
.scheme());
422 // Set up v8 isolated world with proper content-security-policy and
424 WebFrame
* frame
= GetMainFrame();
426 frame
->setIsolatedWorldContentSecurityPolicy(
427 world_id_
, WebString::fromUTF8(kContentSecurityPolicy
));
429 GURL security_origin
= GetTranslateSecurityOrigin();
430 frame
->setIsolatedWorldSecurityOrigin(
431 world_id_
, WebSecurityOrigin::create(security_origin
));
434 if (!IsTranslateLibAvailable()) {
435 // Evaluate the script to add the translation related method to the global
436 // context of the page.
437 ExecuteScript(translate_script
);
438 DCHECK(IsTranslateLibAvailable());
441 TranslatePageImpl(page_seq_no
, 0);
444 void TranslateHelper::OnRevertTranslation(int page_seq_no
) {
445 if (page_seq_no_
!= page_seq_no
)
446 return; // We navigated away, nothing to do.
448 if (!IsTranslateLibAvailable()) {
453 CancelPendingTranslation();
455 ExecuteScript("cr.googleTranslate.revert()");
458 void TranslateHelper::CheckTranslateStatus(int page_seq_no
) {
459 // If this is not the same page, the translation has been canceled. If the
460 // view is gone, the page is closing.
461 if (page_seq_no_
!= page_seq_no
|| !render_view()->GetWebView())
464 // First check if there was an error.
465 if (HasTranslationFailed()) {
466 // TODO(toyoshim): Check |errorCode| of translate.js and notify it here.
467 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR
);
468 return; // There was an error.
471 if (HasTranslationFinished()) {
472 std::string actual_source_lang
;
473 // Translation was successfull, if it was auto, retrieve the source
474 // language the Translate Element detected.
475 if (source_lang_
== kAutoDetectionLanguage
) {
476 actual_source_lang
= GetOriginalPageLanguage();
477 if (actual_source_lang
.empty()) {
478 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE
);
480 } else if (actual_source_lang
== target_lang_
) {
481 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES
);
485 actual_source_lang
= source_lang_
;
488 if (!translation_pending_
) {
493 translation_pending_
= false;
495 // Check JavaScript performance counters for UMA reports.
496 ReportTimeToTranslate(
497 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
499 // Notify the browser we are done.
501 new ChromeViewHostMsg_PageTranslated(render_view()->GetRoutingID(),
504 TranslateErrors::NONE
));
508 // The translation is still pending, check again later.
509 base::MessageLoop::current()->PostDelayedTask(
511 base::Bind(&TranslateHelper::CheckTranslateStatus
,
512 weak_method_factory_
.GetWeakPtr(), page_seq_no
),
513 AdjustDelay(kTranslateStatusCheckDelayMs
));
516 void TranslateHelper::TranslatePageImpl(int page_seq_no
, int count
) {
517 DCHECK_LT(count
, kMaxTranslateInitCheckAttempts
);
518 if (page_seq_no_
!= page_seq_no
|| !render_view()->GetWebView())
521 if (!IsTranslateLibReady()) {
522 // The library is not ready, try again later, unless we have tried several
523 // times unsucessfully already.
524 if (++count
>= kMaxTranslateInitCheckAttempts
) {
525 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR
);
528 base::MessageLoop::current()->PostDelayedTask(
530 base::Bind(&TranslateHelper::TranslatePageImpl
,
531 weak_method_factory_
.GetWeakPtr(),
533 AdjustDelay(count
* kTranslateInitCheckDelayMs
));
537 // The library is loaded, and ready for translation now.
538 // Check JavaScript performance counters for UMA reports.
540 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
542 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
544 if (!StartTranslation()) {
545 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR
);
548 // Check the status of the translation.
549 base::MessageLoop::current()->PostDelayedTask(
551 base::Bind(&TranslateHelper::CheckTranslateStatus
,
552 weak_method_factory_
.GetWeakPtr(), page_seq_no
),
553 AdjustDelay(kTranslateStatusCheckDelayMs
));
556 void TranslateHelper::NotifyBrowserTranslationFailed(
557 TranslateErrors::Type error
) {
558 translation_pending_
= false;
559 // Notify the browser there was an error.
560 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
561 render_view()->GetRoutingID(), source_lang_
, target_lang_
, error
));
564 WebFrame
* TranslateHelper::GetMainFrame() {
565 WebView
* web_view
= render_view()->GetWebView();
567 // When the tab is going to be closed, the web_view can be NULL.
571 return web_view
->mainFrame();
574 void TranslateHelper::CancelCldDataPolling() {
575 cld_data_polling_canceled_
= true;
578 void TranslateHelper::SendCldDataRequest(const int delay_millis
,
579 const int next_delay_millis
) {
580 DCHECK_GE(delay_millis
, 0);
581 DCHECK_GT(next_delay_millis
, 0);
583 // Terminate immediately if told to stop polling.
584 if (cld_data_polling_canceled_
) {
585 DVLOG(1) << "Aborting CLD data request (polling canceled)";
589 // Terminate immediately if data is already loaded.
590 if (cld_data_provider_
->IsCldDataAvailable()) {
591 DVLOG(1) << "Aborting CLD data request (data available)";
595 // Terminate immediately if the decayed delay is sufficiently large.
596 if (next_delay_millis
> std::numeric_limits
<int>::max() / 2) {
597 DVLOG(1) << "Aborting CLD data request (exceeded max number of requests)";
598 cld_data_polling_started_
= false;
602 if (!g_cld_callback_set
) {
603 g_cld_callback_set
= true;
604 cld_data_provider_
->SetCldAvailableCallback(
605 base::Bind(&TranslateHelper::OnCldDataAvailable
,
606 weak_method_factory_
.GetWeakPtr()));
609 // Else, make an asynchronous request to get the data we need.
610 DVLOG(1) << "Requesting CLD data from data provider";
611 cld_data_provider_
->SendCldDataRequest();
613 // ... and enqueue another delayed task to call again. This will start a
614 // chain of polling that will last until the pointer stops being NULL,
615 // which is the right thing to do.
616 // NB: In the great majority of cases, the data file will be available and
617 // the very first delayed task will be a no-op that terminates the chain.
618 // It's only while downloading the file that this will chain for a
619 // nontrivial amount of time.
620 // Use a weak pointer to avoid keeping this helper object around forever.
621 base::MessageLoop::current()->PostDelayedTask(
623 base::Bind(&TranslateHelper::SendCldDataRequest
,
624 weak_method_factory_
.GetWeakPtr(),
626 next_delay_millis
* 2),
627 base::TimeDelta::FromMilliseconds(delay_millis
));
630 void TranslateHelper::OnCldDataAvailable() {
631 if (deferred_page_capture_
) {
632 deferred_page_capture_
= false; // Don't do this a second time.
633 PageCapturedImpl(deferred_page_seq_no_
, deferred_contents_
);
634 deferred_page_seq_no_
= -1; // Clean up for sanity
635 deferred_contents_
.clear(); // Clean up for sanity
639 void TranslateHelper::RecordLanguageDetectionTiming(
640 LanguageDetectionTiming timing
) {
641 // The following comment is copied from page_load_histograms.cc, and applies
642 // just as equally here:
644 // Since there are currently no guarantees that renderer histograms will be
645 // sent to the browser, we initiate a PostTask here to be sure that we send
646 // the histograms we generated. Without this call, pages that don't have an
647 // on-close-handler might generate data that is lost when the renderer is
648 // shutdown abruptly (perchance because the user closed the tab).
649 DVLOG(1) << "Language detection timing: " << timing
;
650 UMA_HISTOGRAM_ENUMERATION("Translate.LanguageDetectionTiming", timing
,
651 LANGUAGE_DETECTION_TIMING_MAX_VALUE
);
653 // Note on performance: Under normal circumstances, this should get called
654 // once per page load. The code will either manage to do it ON_TIME or will
655 // be DEFERRED until CLD is ready. In the latter case, CLD is in dynamic mode
656 // and may eventually become available, triggering the RESUMED event; after
657 // this, everything should start being ON_TIME. This should never run more
658 // than twice in a page load, under any conditions.
659 // Also note that language detection is triggered off of a delay AFTER the
660 // page load completed event has fired, making this very much off the critical
662 content::RenderThread::Get()->UpdateHistograms(
663 content::kHistogramSynchronizerReservedSequenceNumber
);
666 } // namespace translate