1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 __gCrWeb['languageDetection'] = {};
10 * The cache of the text content that was extracted from the page
12 __gCrWeb.languageDetection.bufferedTextContent = null;
15 * The number of active requests that have populated the cache. This is
16 * incremented every time a call to |__gCrWeb.languageDetection.detectLanguage|
17 * populates the buffer. This is decremented every time there is a call to
18 * retrieve the buffer. The buffer is purged when this goes down to 0.
20 __gCrWeb.languageDetection.activeRequests = 0;
23 * Returns true if translation of the page is allowed.
24 * Translation is not allowed when a "notranslate" meta tag is defined.
25 * @return {boolean} true if translation of the page is allowed.
27 __gCrWeb.languageDetection['translationAllowed'] = function() {
28 var metaTags = document.getElementsByTagName('meta');
29 for (var i = 0; i < metaTags.length; ++i) {
30 if (metaTags[i].name === 'google') {
31 if (metaTags[i].content === 'notranslate' ||
32 metaTags[i].getAttribute('value') === 'notranslate') {
41 * Gets the content of a meta tag by httpEquiv.
42 * The function is case insensitive.
43 * @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
45 * @return {string} Value of the "content" attribute of the meta tag.
47 __gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) {
48 var metaTags = document.getElementsByTagName('meta');
49 for (var i = 0; i < metaTags.length; ++i) {
50 if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) {
51 return metaTags[i].content;
57 // Used by the |getTextContent| function below.
58 __gCrWeb.languageDetection['nonTextNodeNames'] = {
67 * Walks a DOM tree to extract the text content.
68 * Does not walk into a node when its name is in |nonTextNodeNames|.
69 * @param {HTMLElement} node The DOM tree
70 * @param {number} maxLen Output will be truncated to |maxLen|
71 * @return {string} The text content
73 __gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) {
74 if (!node || maxLen <= 0) {
79 // Formatting and filtering.
80 if (node.nodeType === Node.ELEMENT_NODE) {
81 // Reject non-text nodes such as scripts.
82 if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) {
85 if (node.nodeName === 'BR') {
88 var style = window.getComputedStyle(node);
89 // Only proceed if the element is visible.
90 if (style.display === 'none' || style.visibility === 'hidden') {
93 // No need to add a line break before |body| as it is the first element.
94 if (node.nodeName !== 'BODY' && style.display !== 'inline') {
99 if (node.hasChildNodes()) {
100 for (var childIdx = 0;
101 childIdx < node.childNodes.length && txt.length < maxLen;
103 txt += __gCrWeb.languageDetection.getTextContent(
104 node.childNodes[childIdx], maxLen - txt.length);
106 } else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
107 txt += node.textContent.substring(0, maxLen - txt.length);
114 * Detects if a page has content that needs translation and informs the native
115 * side. The text content of a page is cached in
116 * |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time
117 * retrived at a later time directly from the Obj-C side. This is to avoid
118 * using |invokeOnHost|.
120 __gCrWeb.languageDetection['detectLanguage'] = function() {
121 if (!__gCrWeb.languageDetection.translationAllowed()) {
122 __gCrWeb.message.invokeOnHost({
123 'command': 'languageDetection.textCaptured',
124 'translationAllowed': false});
126 // Constant for the maximum length of the extracted text returned by
127 // |-detectLanguage| to the native side.
128 // Matches desktop implementation.
129 // Note: This should stay in sync with the constant in
130 // js_language_detection_manager.mm .
131 var kMaxIndexChars = 65535;
132 var captureBeginTime = new Date();
133 __gCrWeb.languageDetection.activeRequests += 1;
134 __gCrWeb.languageDetection.bufferedTextContent =
135 __gCrWeb.languageDetection.getTextContent(document.body,
137 var captureTextTime =
138 (new Date()).getMilliseconds() - captureBeginTime.getMilliseconds();
139 var httpContentLanguage =
140 __gCrWeb.languageDetection.getMetaContentByHttpEquiv(
142 __gCrWeb.message.invokeOnHost({
143 'command': 'languageDetection.textCaptured',
144 'translationAllowed': true,
145 'captureTextTime': captureTextTime,
146 'htmlLang': document.documentElement.lang,
147 'httpContentLanguage': httpContentLanguage});
152 * Retrives the cached text content of a page. Returns it and then purges the
155 __gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() {
156 var textContent = __gCrWeb.languageDetection.bufferedTextContent;
157 __gCrWeb.languageDetection.activeRequests -= 1;
158 if (__gCrWeb.languageDetection.activeRequests == 0) {
159 __gCrWeb.languageDetection.bufferedTextContent = null;
164 }()) // End of anonymous function.