1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/translate/content/renderer/translate_helper.h"
8 #include "base/compiler_specific.h"
9 #include "base/location.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram_macros.h"
12 #include "base/single_thread_task_runner.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/thread_task_runner_handle.h"
17 #include "components/translate/content/common/translate_messages.h"
18 #include "components/translate/content/renderer/renderer_cld_data_provider.h"
19 #include "components/translate/content/renderer/renderer_cld_data_provider_factory.h"
20 #include "components/translate/content/renderer/renderer_cld_utils.h"
21 #include "components/translate/core/common/translate_constants.h"
22 #include "components/translate/core/common/translate_metrics.h"
23 #include "components/translate/core/common/translate_util.h"
24 #include "components/translate/core/language_detection/language_detection_util.h"
25 #include "content/public/common/content_constants.h"
26 #include "content/public/common/url_constants.h"
27 #include "content/public/renderer/render_thread.h"
28 #include "content/public/renderer/render_view.h"
29 #include "ipc/ipc_platform_file.h"
30 #include "third_party/WebKit/public/web/WebDocument.h"
31 #include "third_party/WebKit/public/web/WebElement.h"
32 #include "third_party/WebKit/public/web/WebFrame.h"
33 #include "third_party/WebKit/public/web/WebNode.h"
34 #include "third_party/WebKit/public/web/WebNodeList.h"
35 #include "third_party/WebKit/public/web/WebScriptSource.h"
36 #include "third_party/WebKit/public/web/WebView.h"
37 #include "third_party/WebKit/public/web/WebWidget.h"
39 #include "v8/include/v8.h"
41 using base::ASCIIToUTF16
;
42 using blink::WebDocument
;
43 using blink::WebElement
;
44 using blink::WebFrame
;
46 using blink::WebNodeList
;
47 using blink::WebScriptSource
;
48 using blink::WebSecurityOrigin
;
49 using blink::WebString
;
50 using blink::WebVector
;
55 // The delay in milliseconds that we'll wait before checking to see if the
56 // translate library injected in the page is ready.
57 const int kTranslateInitCheckDelayMs
= 150;
59 // The maximum number of times we'll check to see if the translate library
60 // injected in the page is ready.
61 const int kMaxTranslateInitCheckAttempts
= 5;
63 // The delay we wait in milliseconds before checking whether the translation has
65 const int kTranslateStatusCheckDelayMs
= 400;
67 // Language name passed to the Translate element for it to detect the language.
68 const char kAutoDetectionLanguage
[] = "auto";
70 // Isolated world sets following content-security-policy.
71 const char kContentSecurityPolicy
[] = "script-src 'self' 'unsafe-eval'";
73 // Whether or not we have set the CLD callback yet.
74 bool g_cld_callback_set
= false;
76 // Obtain a new CLD data provider. Defined as a standalone method for ease of
77 // use in constructor initialization list.
78 scoped_ptr
<translate::RendererCldDataProvider
> CreateDataProvider(
79 content::RenderViewObserver
* render_view_observer
) {
80 translate::RendererCldUtils::ConfigureDefaultDataProvider();
81 return scoped_ptr
<translate::RendererCldDataProvider
>(
82 translate::RendererCldDataProviderFactory::Get()->
83 CreateRendererCldDataProvider(render_view_observer
));
86 // Returns whether the page associated with |document| is a candidate for
87 // translation. Some pages can explictly specify (via a meta-tag) that they
88 // should not be translated.
89 bool HasNoTranslateMeta(WebDocument
* document
) {
90 WebElement head
= document
->head();
91 if (head
.isNull() || !head
.hasChildNodes())
94 const WebString
meta(ASCIIToUTF16("meta"));
95 const WebString
name(ASCIIToUTF16("name"));
96 const WebString
google(ASCIIToUTF16("google"));
97 const WebString
value(ASCIIToUTF16("value"));
98 const WebString
content(ASCIIToUTF16("content"));
100 WebNodeList children
= head
.childNodes();
101 for (size_t i
= 0; i
< children
.length(); ++i
) {
102 WebNode node
= children
.item(i
);
103 if (!node
.isElementNode())
105 WebElement element
= node
.to
<WebElement
>();
106 // Check if a tag is <meta>.
107 if (!element
.hasHTMLTagName(meta
))
109 // Check if the tag contains name="google".
110 WebString attribute
= element
.getAttribute(name
);
111 if (attribute
.isNull() || attribute
!= google
)
113 // Check if the tag contains value="notranslate", or content="notranslate".
114 attribute
= element
.getAttribute(value
);
115 if (attribute
.isNull())
116 attribute
= element
.getAttribute(content
);
117 if (attribute
.isNull())
119 if (base::LowerCaseEqualsASCII(base::StringPiece16(attribute
),
128 namespace translate
{
130 ////////////////////////////////////////////////////////////////////////////////
131 // TranslateHelper, public:
133 TranslateHelper::TranslateHelper(content::RenderView
* render_view
,
136 const std::string
& extension_scheme
)
137 : content::RenderViewObserver(render_view
),
139 translation_pending_(false),
140 cld_data_provider_(CreateDataProvider(this)),
141 cld_data_polling_started_(false),
142 cld_data_polling_canceled_(false),
143 deferred_page_capture_(false),
144 deferred_page_seq_no_(-1),
146 extension_group_(extension_group
),
147 extension_scheme_(extension_scheme
),
148 weak_method_factory_(this) {
151 TranslateHelper::~TranslateHelper() {
152 CancelPendingTranslation();
153 CancelCldDataPolling();
156 void TranslateHelper::PrepareForUrl(const GURL
& url
) {
158 Send(new ChromeViewHostMsg_TranslateAssignedSequenceNumber(
159 routing_id(), page_seq_no_
));
160 deferred_page_capture_
= false;
161 deferred_page_seq_no_
= -1;
162 deferred_contents_
.clear();
163 if (cld_data_polling_started_
)
166 // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
167 // components/translate/core/common/translate_util.cc, and ignore any URL
168 // that fails that check. This will require moving unit tests and rewiring
169 // other function calls as well, so for now replicate the logic here.
172 if (url
.SchemeIs(content::kChromeUIScheme
))
174 if (url
.SchemeIs(content::kChromeDevToolsScheme
))
176 if (url
.SchemeIs(url::kFtpScheme
))
178 if (url
.SchemeIs(extension_scheme_
.c_str()))
181 // Start polling for CLD data.
182 cld_data_polling_started_
= true;
183 TranslateHelper::SendCldDataRequest(0, 1000);
186 void TranslateHelper::PageCaptured(const base::string16
& contents
) {
187 PageCapturedImpl(page_seq_no_
, contents
);
190 void TranslateHelper::PageCapturedImpl(int page_seq_no
,
191 const base::string16
& contents
) {
192 // Get the document language as set by WebKit from the http-equiv
193 // meta tag for "content-language". This may or may not also
194 // have a value derived from the actual Content-Language HTTP
195 // header. The two actually have different meanings (despite the
196 // original intent of http-equiv to be an equivalent) with the former
197 // being the language of the document and the latter being the
198 // language of the intended audience (a distinction really only
199 // relevant for things like langauge textbooks). This distinction
200 // shouldn't affect translation.
201 WebFrame
* main_frame
= GetMainFrame();
202 if (!main_frame
|| page_seq_no_
!= page_seq_no
)
205 if (!cld_data_provider_
->IsCldDataAvailable()) {
206 // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
207 // is loaded, if ever.
208 deferred_page_capture_
= true;
209 deferred_page_seq_no_
= page_seq_no
;
210 deferred_contents_
= contents
;
211 RecordLanguageDetectionTiming(DEFERRED
);
215 if (deferred_page_seq_no_
== -1) {
216 // CLD data was available before language detection was requested.
217 RecordLanguageDetectionTiming(ON_TIME
);
219 // This is a request that was triggered because CLD data is now available
220 // and was previously deferred.
221 RecordLanguageDetectionTiming(RESUMED
);
224 WebDocument document
= main_frame
->document();
225 std::string content_language
= document
.contentLanguage().utf8();
226 WebElement html_element
= document
.documentElement();
227 std::string html_lang
;
228 // |html_element| can be null element, e.g. in
229 // BrowserTest.WindowOpenClose.
230 if (!html_element
.isNull())
231 html_lang
= html_element
.getAttribute("lang").utf8();
232 std::string cld_language
;
233 bool is_cld_reliable
;
234 std::string language
= DeterminePageLanguage(
235 content_language
, html_lang
, contents
, &cld_language
, &is_cld_reliable
);
237 if (language
.empty())
240 language_determined_time_
= base::TimeTicks::Now();
242 GURL
url(document
.url());
243 LanguageDetectionDetails details
;
244 details
.time
= base::Time::Now();
246 details
.content_language
= content_language
;
247 details
.cld_language
= cld_language
;
248 details
.is_cld_reliable
= is_cld_reliable
;
249 details
.has_notranslate
= HasNoTranslateMeta(&document
);
250 details
.html_root_language
= html_lang
;
251 details
.adopted_language
= language
;
253 // TODO(hajimehoshi): If this affects performance, it should be set only if
254 // translate-internals tab exists.
255 details
.contents
= contents
;
257 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
260 !details
.has_notranslate
&& !language
.empty()));
263 void TranslateHelper::CancelPendingTranslation() {
264 weak_method_factory_
.InvalidateWeakPtrs();
265 translation_pending_
= false;
266 source_lang_
.clear();
267 target_lang_
.clear();
268 CancelCldDataPolling();
271 ////////////////////////////////////////////////////////////////////////////////
272 // TranslateHelper, protected:
274 bool TranslateHelper::IsTranslateLibAvailable() {
275 return ExecuteScriptAndGetBoolResult(
276 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
277 "typeof cr.googleTranslate.translate == 'function'", false);
280 bool TranslateHelper::IsTranslateLibReady() {
281 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
284 bool TranslateHelper::HasTranslationFinished() {
285 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
288 bool TranslateHelper::HasTranslationFailed() {
289 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
292 bool TranslateHelper::StartTranslation() {
293 std::string script
= "cr.googleTranslate.translate('" +
298 return ExecuteScriptAndGetBoolResult(script
, false);
301 std::string
TranslateHelper::GetOriginalPageLanguage() {
302 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
305 base::TimeDelta
TranslateHelper::AdjustDelay(int delayInMs
) {
306 // Just converts |delayInMs| without any modification in practical cases.
307 // Tests will override this function to return modified value.
308 return base::TimeDelta::FromMilliseconds(delayInMs
);
311 void TranslateHelper::ExecuteScript(const std::string
& script
) {
312 WebFrame
* main_frame
= GetMainFrame();
316 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
317 main_frame
->executeScriptInIsolatedWorld(
318 world_id_
, &source
, 1, extension_group_
);
321 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string
& script
,
323 WebFrame
* main_frame
= GetMainFrame();
327 v8::HandleScope
handle_scope(v8::Isolate::GetCurrent());
328 WebVector
<v8::Local
<v8::Value
> > results
;
329 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
330 main_frame
->executeScriptInIsolatedWorld(
331 world_id_
, &source
, 1, extension_group_
, &results
);
332 if (results
.size() != 1 || results
[0].IsEmpty() || !results
[0]->IsBoolean()) {
337 return results
[0]->BooleanValue();
340 std::string
TranslateHelper::ExecuteScriptAndGetStringResult(
341 const std::string
& script
) {
342 WebFrame
* main_frame
= GetMainFrame();
344 return std::string();
346 v8::HandleScope
handle_scope(v8::Isolate::GetCurrent());
347 WebVector
<v8::Local
<v8::Value
> > results
;
348 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
349 main_frame
->executeScriptInIsolatedWorld(
350 world_id_
, &source
, 1, extension_group_
, &results
);
351 if (results
.size() != 1 || results
[0].IsEmpty() || !results
[0]->IsString()) {
353 return std::string();
356 v8::Local
<v8::String
> v8_str
= results
[0].As
<v8::String
>();
357 int length
= v8_str
->Utf8Length() + 1;
358 scoped_ptr
<char[]> str(new char[length
]);
359 v8_str
->WriteUtf8(str
.get(), length
);
360 return std::string(str
.get());
363 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
364 const std::string
& script
) {
365 WebFrame
* main_frame
= GetMainFrame();
369 v8::HandleScope
handle_scope(v8::Isolate::GetCurrent());
370 WebVector
<v8::Local
<v8::Value
> > results
;
371 WebScriptSource source
= WebScriptSource(ASCIIToUTF16(script
));
372 main_frame
->executeScriptInIsolatedWorld(
373 world_id_
, &source
, 1, extension_group_
, &results
);
374 if (results
.size() != 1 || results
[0].IsEmpty() || !results
[0]->IsNumber()) {
379 return results
[0]->NumberValue();
382 ////////////////////////////////////////////////////////////////////////////////
383 // TranslateHelper, private:
385 bool TranslateHelper::OnMessageReceived(const IPC::Message
& message
) {
387 IPC_BEGIN_MESSAGE_MAP(TranslateHelper
, message
)
388 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage
, OnTranslatePage
)
389 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation
, OnRevertTranslation
)
390 IPC_MESSAGE_UNHANDLED(handled
= false)
391 IPC_END_MESSAGE_MAP()
393 handled
= cld_data_provider_
->OnMessageReceived(message
);
398 void TranslateHelper::OnTranslatePage(int page_seq_no
,
399 const std::string
& translate_script
,
400 const std::string
& source_lang
,
401 const std::string
& target_lang
) {
402 WebFrame
* main_frame
= GetMainFrame();
403 if (!main_frame
|| page_seq_no_
!= page_seq_no
)
404 return; // We navigated away, nothing to do.
406 // A similar translation is already under way, nothing to do.
407 if (translation_pending_
&& target_lang_
== target_lang
)
410 // Any pending translation is now irrelevant.
411 CancelPendingTranslation();
414 translation_pending_
= true;
416 // If the source language is undetermined, we'll let the translate element
418 source_lang_
= (source_lang
!= kUnknownLanguageCode
) ? source_lang
419 : kAutoDetectionLanguage
;
420 target_lang_
= target_lang
;
422 ReportUserActionDuration(language_determined_time_
, base::TimeTicks::Now());
424 GURL
url(main_frame
->document().url());
425 ReportPageScheme(url
.scheme());
427 // Set up v8 isolated world with proper content-security-policy and
429 WebFrame
* frame
= GetMainFrame();
431 frame
->setIsolatedWorldContentSecurityPolicy(
432 world_id_
, WebString::fromUTF8(kContentSecurityPolicy
));
434 GURL security_origin
= GetTranslateSecurityOrigin();
435 frame
->setIsolatedWorldSecurityOrigin(
436 world_id_
, WebSecurityOrigin::create(security_origin
));
439 if (!IsTranslateLibAvailable()) {
440 // Evaluate the script to add the translation related method to the global
441 // context of the page.
442 ExecuteScript(translate_script
);
443 DCHECK(IsTranslateLibAvailable());
446 TranslatePageImpl(page_seq_no
, 0);
449 void TranslateHelper::OnRevertTranslation(int page_seq_no
) {
450 if (page_seq_no_
!= page_seq_no
)
451 return; // We navigated away, nothing to do.
453 if (!IsTranslateLibAvailable()) {
458 CancelPendingTranslation();
460 ExecuteScript("cr.googleTranslate.revert()");
463 void TranslateHelper::CheckTranslateStatus(int page_seq_no
) {
464 // If this is not the same page, the translation has been canceled. If the
465 // view is gone, the page is closing.
466 if (page_seq_no_
!= page_seq_no
|| !render_view()->GetWebView())
469 // First check if there was an error.
470 if (HasTranslationFailed()) {
471 // TODO(toyoshim): Check |errorCode| of translate.js and notify it here.
472 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR
);
473 return; // There was an error.
476 if (HasTranslationFinished()) {
477 std::string actual_source_lang
;
478 // Translation was successfull, if it was auto, retrieve the source
479 // language the Translate Element detected.
480 if (source_lang_
== kAutoDetectionLanguage
) {
481 actual_source_lang
= GetOriginalPageLanguage();
482 if (actual_source_lang
.empty()) {
483 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE
);
485 } else if (actual_source_lang
== target_lang_
) {
486 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES
);
490 actual_source_lang
= source_lang_
;
493 if (!translation_pending_
) {
498 translation_pending_
= false;
500 // Check JavaScript performance counters for UMA reports.
501 ReportTimeToTranslate(
502 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
504 // Notify the browser we are done.
506 new ChromeViewHostMsg_PageTranslated(render_view()->GetRoutingID(),
509 TranslateErrors::NONE
));
513 // The translation is still pending, check again later.
514 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
515 FROM_HERE
, base::Bind(&TranslateHelper::CheckTranslateStatus
,
516 weak_method_factory_
.GetWeakPtr(), page_seq_no
),
517 AdjustDelay(kTranslateStatusCheckDelayMs
));
520 void TranslateHelper::TranslatePageImpl(int page_seq_no
, int count
) {
521 DCHECK_LT(count
, kMaxTranslateInitCheckAttempts
);
522 if (page_seq_no_
!= page_seq_no
|| !render_view()->GetWebView())
525 if (!IsTranslateLibReady()) {
526 // The library is not ready, try again later, unless we have tried several
527 // times unsucessfully already.
528 if (++count
>= kMaxTranslateInitCheckAttempts
) {
529 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR
);
532 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
534 base::Bind(&TranslateHelper::TranslatePageImpl
,
535 weak_method_factory_
.GetWeakPtr(), page_seq_no
, count
),
536 AdjustDelay(count
* kTranslateInitCheckDelayMs
));
540 // The library is loaded, and ready for translation now.
541 // Check JavaScript performance counters for UMA reports.
543 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
545 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
547 if (!StartTranslation()) {
548 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR
);
551 // Check the status of the translation.
552 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
553 FROM_HERE
, base::Bind(&TranslateHelper::CheckTranslateStatus
,
554 weak_method_factory_
.GetWeakPtr(), page_seq_no
),
555 AdjustDelay(kTranslateStatusCheckDelayMs
));
558 void TranslateHelper::NotifyBrowserTranslationFailed(
559 TranslateErrors::Type error
) {
560 translation_pending_
= false;
561 // Notify the browser there was an error.
562 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
563 render_view()->GetRoutingID(), source_lang_
, target_lang_
, error
));
566 WebFrame
* TranslateHelper::GetMainFrame() {
567 WebView
* web_view
= render_view()->GetWebView();
569 // When the tab is going to be closed, the web_view can be NULL.
573 return web_view
->mainFrame();
576 void TranslateHelper::CancelCldDataPolling() {
577 cld_data_polling_canceled_
= true;
580 void TranslateHelper::SendCldDataRequest(const int delay_millis
,
581 const int next_delay_millis
) {
582 DCHECK_GE(delay_millis
, 0);
583 DCHECK_GT(next_delay_millis
, 0);
585 // Terminate immediately if told to stop polling.
586 if (cld_data_polling_canceled_
) {
587 DVLOG(1) << "Aborting CLD data request (polling canceled)";
591 // Terminate immediately if data is already loaded.
592 if (cld_data_provider_
->IsCldDataAvailable()) {
593 DVLOG(1) << "Aborting CLD data request (data available)";
597 // Terminate immediately if the decayed delay is sufficiently large.
598 if (next_delay_millis
> std::numeric_limits
<int>::max() / 2) {
599 DVLOG(1) << "Aborting CLD data request (exceeded max number of requests)";
600 cld_data_polling_started_
= false;
604 if (!g_cld_callback_set
) {
605 g_cld_callback_set
= true;
606 cld_data_provider_
->SetCldAvailableCallback(
607 base::Bind(&TranslateHelper::OnCldDataAvailable
,
608 weak_method_factory_
.GetWeakPtr()));
611 // Else, make an asynchronous request to get the data we need.
612 DVLOG(1) << "Requesting CLD data from data provider";
613 cld_data_provider_
->SendCldDataRequest();
615 // ... and enqueue another delayed task to call again. This will start a
616 // chain of polling that will last until the pointer stops being NULL,
617 // which is the right thing to do.
618 // NB: In the great majority of cases, the data file will be available and
619 // the very first delayed task will be a no-op that terminates the chain.
620 // It's only while downloading the file that this will chain for a
621 // nontrivial amount of time.
622 // Use a weak pointer to avoid keeping this helper object around forever.
623 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
624 FROM_HERE
, base::Bind(&TranslateHelper::SendCldDataRequest
,
625 weak_method_factory_
.GetWeakPtr(),
626 next_delay_millis
, next_delay_millis
* 2),
627 base::TimeDelta::FromMilliseconds(delay_millis
));
630 void TranslateHelper::OnCldDataAvailable() {
631 if (deferred_page_capture_
) {
632 deferred_page_capture_
= false; // Don't do this a second time.
633 PageCapturedImpl(deferred_page_seq_no_
, deferred_contents_
);
634 deferred_page_seq_no_
= -1; // Clean up for sanity
635 deferred_contents_
.clear(); // Clean up for sanity
639 void TranslateHelper::RecordLanguageDetectionTiming(
640 LanguageDetectionTiming timing
) {
641 // The following comment is copied from page_load_histograms.cc, and applies
642 // just as equally here:
644 // Since there are currently no guarantees that renderer histograms will be
645 // sent to the browser, we initiate a PostTask here to be sure that we send
646 // the histograms we generated. Without this call, pages that don't have an
647 // on-close-handler might generate data that is lost when the renderer is
648 // shutdown abruptly (perchance because the user closed the tab).
649 DVLOG(1) << "Language detection timing: " << timing
;
650 UMA_HISTOGRAM_ENUMERATION("Translate.LanguageDetectionTiming", timing
,
651 LANGUAGE_DETECTION_TIMING_MAX_VALUE
);
653 // Note on performance: Under normal circumstances, this should get called
654 // once per page load. The code will either manage to do it ON_TIME or will
655 // be DEFERRED until CLD is ready. In the latter case, CLD is in dynamic mode
656 // and may eventually become available, triggering the RESUMED event; after
657 // this, everything should start being ON_TIME. This should never run more
658 // than twice in a page load, under any conditions.
659 // Also note that language detection is triggered off of a delay AFTER the
660 // page load completed event has fired, making this very much off the critical
662 content::RenderThread::Get()->UpdateHistograms(
663 content::kHistogramSynchronizerReservedSequenceNumber
);
666 } // namespace translate