components/translate/ios/browser/resources/language_detection.js

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 __gCrWeb['languageDetection'] = {};
   6
   7
   8 (function() {
   9 /**
  10  * The cache of the text content that was extracted from the page
  11  */
  12 __gCrWeb.languageDetection.bufferedTextContent = null;
  13
  14 /**
  15  * The number of active requests that have populated the cache. This is
  16  * incremented every time a call to |__gCrWeb.languageDetection.detectLanguage|
  17  * populates the buffer. This is decremented every time there is a call to
  18  * retrieve the buffer. The buffer is purged when this goes down to 0.
  19  */
  20 __gCrWeb.languageDetection.activeRequests = 0;
  21
  22 /**
  23  * Returns true if translation of the page is allowed.
  24  * Translation is not allowed when a "notranslate" meta tag is defined.
  25  * @return {boolean} true if translation of the page is allowed.
  26  */
  27 __gCrWeb.languageDetection['translationAllowed'] = function() {
  28   var metaTags = document.getElementsByTagName('meta');
  29   for (var i = 0; i < metaTags.length; ++i) {
  30     if (metaTags[i].name === 'google') {
  31       if (metaTags[i].content === 'notranslate' ||
  32           metaTags[i].getAttribute('value') === 'notranslate') {
  33         return false;
  34       }
  35     }
  36   }
  37   return true;
  38 };
  39
  40 /**
  41  * Gets the content of a meta tag by httpEquiv.
  42  * The function is case insensitive.
  43  * @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
  44  *     lower case.
  45  * @return {string} Value of the "content" attribute of the meta tag.
  46  */
  47 __gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) {
  48   var metaTags = document.getElementsByTagName('meta');
  49   for (var i = 0; i < metaTags.length; ++i) {
  50     if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) {
  51       return metaTags[i].content;
  52     }
  53   }
  54   return '';
  55 };
  56
  57 // Used by the |getTextContent| function below.
  58 __gCrWeb.languageDetection['nonTextNodeNames'] = {
  59   'SCRIPT': 1,
  60   'NOSCRIPT': 1,
  61   'STYLE': 1,
  62   'EMBED': 1,
  63   'OBJECT': 1
  64 };
  65
  66 /**
  67  * Walks a DOM tree to extract the text content.
  68  * Does not walk into a node when its name is in |nonTextNodeNames|.
  69  * @param {HTMLElement} node The DOM tree
  70  * @param {number} maxLen Output will be truncated to |maxLen|
  71  * @return {string} The text content
  72  */
  73 __gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) {
  74   if (!node || maxLen <= 0) {
  75     return '';
  76   }
  77
  78   var txt = '';
  79   // Formatting and filtering.
  80   if (node.nodeType === Node.ELEMENT_NODE) {
  81     // Reject non-text nodes such as scripts.
  82     if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) {
  83       return '';
  84     }
  85     if (node.nodeName === 'BR') {
  86       return '\n';
  87     }
  88     var style = window.getComputedStyle(node);
  89     // Only proceed if the element is visible.
  90     if (style.display === 'none' || style.visibility === 'hidden') {
  91       return '';
  92     }
  93     // No need to add a line break before |body| as it is the first element.
  94     if (node.nodeName !== 'BODY' && style.display !== 'inline') {
  95       txt = '\n';
  96     }
  97   }
  98
  99   if (node.hasChildNodes()) {
 100     for (var childIdx = 0;
 101          childIdx < node.childNodes.length && txt.length < maxLen;
 102          childIdx++) {
 103       txt += __gCrWeb.languageDetection.getTextContent(
 104           node.childNodes[childIdx], maxLen - txt.length);
 105     }
 106   } else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
 107     txt += node.textContent.substring(0, maxLen - txt.length);
 108   }
 109
 110   return txt;
 111 };
 112
 113 /**
 114  * Detects if a page has content that needs translation and informs the native
 115  * side. The text content of a page is cached in
 116  * |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time
 117  * retrived at a later time directly from the Obj-C side. This is to avoid
 118  * using |invokeOnHost|.
 119  */
 120 __gCrWeb.languageDetection['detectLanguage'] = function() {
 121   if (!__gCrWeb.languageDetection.translationAllowed()) {
 122     __gCrWeb.message.invokeOnHost({
 123         'command': 'languageDetection.textCaptured',
 124         'translationAllowed': false});
 125   } else {
 126     // Constant for the maximum length of the extracted text returned by
 127     // |-detectLanguage| to the native side.
 128     // Matches desktop implementation.
 129     // Note: This should stay in sync with the constant in
 130     // js_language_detection_manager.mm .
 131     var kMaxIndexChars = 65535;
 132     var captureBeginTime = new Date();
 133     __gCrWeb.languageDetection.activeRequests += 1;
 134     __gCrWeb.languageDetection.bufferedTextContent =
 135         __gCrWeb.languageDetection.getTextContent(document.body,
 136             kMaxIndexChars);
 137     var captureTextTime =
 138         (new Date()).getMilliseconds() - captureBeginTime.getMilliseconds();
 139     var httpContentLanguage =
 140         __gCrWeb.languageDetection.getMetaContentByHttpEquiv(
 141             'content-language');
 142     __gCrWeb.message.invokeOnHost({
 143         'command': 'languageDetection.textCaptured',
 144         'translationAllowed': true,
 145         'captureTextTime': captureTextTime,
 146         'htmlLang': document.documentElement.lang,
 147         'httpContentLanguage': httpContentLanguage});
 148   }
 149 }
 150
 151 /**
 152  * Retrives the cached text content of a page. Returns it and then purges the
 153  * cache.
 154  */
 155 __gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() {
 156   var textContent = __gCrWeb.languageDetection.bufferedTextContent;
 157   __gCrWeb.languageDetection.activeRequests -= 1;
 158   if (__gCrWeb.languageDetection.activeRequests == 0) {
 159     __gCrWeb.languageDetection.bufferedTextContent = null;
 160   }
 161   return textContent;
 162 }
 163
 164 }())  // End of anonymous function.