1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 __gCrWeb
['languageDetection'] = {};
10 * The cache of the text content that was extracted from the page
12 __gCrWeb
.languageDetection
.bufferedTextContent
= null;
15 * The number of active requests that have populated the cache. This is
16 * incremented every time a call to |__gCrWeb.languageDetection.detectLanguage|
17 * populates the buffer. This is decremented every time there is a call to
18 * retrieve the buffer. The buffer is purged when this goes down to 0.
20 __gCrWeb
.languageDetection
.activeRequests
= 0;
23 * Returns true if translation of the page is allowed.
24 * Translation is not allowed when a "notranslate" meta tag is defined.
25 * @return {boolean} true if translation of the page is allowed.
27 __gCrWeb
.languageDetection
['translationAllowed'] = function() {
28 var metaTags
= document
.getElementsByTagName('meta');
29 for (var i
= 0; i
< metaTags
.length
; ++i
) {
30 if (metaTags
[i
].name
=== 'google') {
31 if (metaTags
[i
].content
=== 'notranslate' ||
32 metaTags
[i
].getAttribute('value') === 'notranslate') {
41 * Gets the content of a meta tag by httpEquiv.
42 * The function is case insensitive.
43 * @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
45 * @return {string} Value of the "content" attribute of the meta tag.
47 __gCrWeb
.languageDetection
['getMetaContentByHttpEquiv'] = function(httpEquiv
) {
48 var metaTags
= document
.getElementsByTagName('meta');
49 for (var i
= 0; i
< metaTags
.length
; ++i
) {
50 if (metaTags
[i
].httpEquiv
.toLowerCase() === httpEquiv
) {
51 return metaTags
[i
].content
;
57 // Used by the |getTextContent| function below.
58 __gCrWeb
.languageDetection
['nonTextNodeNames'] = {
67 * Walks a DOM tree to extract the text content.
68 * Does not walk into a node when its name is in |nonTextNodeNames|.
69 * @param {HTMLElement} node The DOM tree
70 * @param {number} maxLen Output will be truncated to |maxLen|
71 * @return {string} The text content
73 __gCrWeb
.languageDetection
['getTextContent'] = function(node
, maxLen
) {
74 if (!node
|| maxLen
<= 0) {
79 // Formatting and filtering.
80 if (node
.nodeType
=== Node
.ELEMENT_NODE
) {
81 // Reject non-text nodes such as scripts.
82 if (__gCrWeb
.languageDetection
.nonTextNodeNames
[node
.nodeName
]) {
85 if (node
.nodeName
=== 'BR') {
88 var style
= window
.getComputedStyle(node
);
89 // Only proceed if the element is visible.
90 if (style
.display
=== 'none' || style
.visibility
=== 'hidden') {
93 // No need to add a line break before |body| as it is the first element.
94 if (node
.nodeName
!== 'BODY' && style
.display
!== 'inline') {
99 if (node
.hasChildNodes()) {
100 for (var childIdx
= 0;
101 childIdx
< node
.childNodes
.length
&& txt
.length
< maxLen
;
103 txt
+= __gCrWeb
.languageDetection
.getTextContent(
104 node
.childNodes
[childIdx
], maxLen
- txt
.length
);
106 } else if (node
.nodeType
=== Node
.TEXT_NODE
&& node
.textContent
) {
107 txt
+= node
.textContent
.substring(0, maxLen
- txt
.length
);
114 * Detects if a page has content that needs translation and informs the native
115 * side. The text content of a page is cached in
116 * |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time
117 * retrived at a later time directly from the Obj-C side. This is to avoid
118 * using |invokeOnHost|.
120 __gCrWeb
.languageDetection
['detectLanguage'] = function() {
121 if (!__gCrWeb
.languageDetection
.translationAllowed()) {
122 __gCrWeb
.message
.invokeOnHost({
123 'command': 'languageDetection.textCaptured',
124 'translationAllowed': false});
126 // Constant for the maximum length of the extracted text returned by
127 // |-detectLanguage| to the native side.
128 // Matches desktop implementation.
129 // Note: This should stay in sync with the constant in
130 // js_language_detection_manager.mm .
131 var kMaxIndexChars
= 65535;
132 var captureBeginTime
= new Date();
133 __gCrWeb
.languageDetection
.activeRequests
+= 1;
134 __gCrWeb
.languageDetection
.bufferedTextContent
=
135 __gCrWeb
.languageDetection
.getTextContent(document
.body
,
137 var captureTextTime
=
138 (new Date()).getMilliseconds() - captureBeginTime
.getMilliseconds();
139 var httpContentLanguage
=
140 __gCrWeb
.languageDetection
.getMetaContentByHttpEquiv(
142 __gCrWeb
.message
.invokeOnHost({
143 'command': 'languageDetection.textCaptured',
144 'translationAllowed': true,
145 'captureTextTime': captureTextTime
,
146 'htmlLang': document
.documentElement
.lang
,
147 'httpContentLanguage': httpContentLanguage
});
152 * Retrives the cached text content of a page. Returns it and then purges the
155 __gCrWeb
.languageDetection
['retrieveBufferedTextContent'] = function() {
156 var textContent
= __gCrWeb
.languageDetection
.bufferedTextContent
;
157 __gCrWeb
.languageDetection
.activeRequests
-= 1;
158 if (__gCrWeb
.languageDetection
.activeRequests
== 0) {
159 __gCrWeb
.languageDetection
.bufferedTextContent
= null;
164 }()) // End of anonymous function.