lingucomponent/source/languageguessing/guess.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <sal/config.h>
  21
  22 #include <cassert>
  23 #include <string.h>
  24
  25 #ifdef SYSTEM_LIBEXTTEXTCAT
  26 #include <libexttextcat/textcat.h>
  27 #else
  28 #include <textcat.h>
  29 #endif
  30
  31 #include "guess.hxx"
  32
  33 /* Old textcat.h versions defined bad spelled constants. */
  34 #ifndef TEXTCAT_RESULT_UNKNOWN_STR
  35 #define TEXTCAT_RESULT_UNKNOWN_STR _TEXTCAT_RESULT_UNKOWN
  36 #endif
  37
  38 #ifndef TEXTCAT_RESULT_SHORT_STR
  39 #define TEXTCAT_RESULT_SHORT_STR _TEXTCAT_RESULT_SHORT
  40 #endif
  41
  42 Guess::Guess()
  43     : language_str(DEFAULT_LANGUAGE)
  44     , country_str(DEFAULT_COUNTRY)
  45 {
  46 }
  47
  48 /*
  49 * this use a char * string to build the guess object
  50 * a string like those is made as : [language-country-encoding]...
  51 *
  52 */
  53 Guess::Guess(const char * guess_str)
  54     : language_str(DEFAULT_LANGUAGE)
  55     , country_str(DEFAULT_COUNTRY)
  56 {
  57     //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets
  58     if(strcmp(guess_str + 1, TEXTCAT_RESULT_UNKNOWN_STR) == 0
  59        || strcmp(guess_str + 1, TEXTCAT_RESULT_SHORT_STR) == 0)
  60         return;
  61
  62     // From how this ctor is called from SimpleGuesser::GuessLanguage and
  63     // SimpleGuesser::GetManagedLanguages in
  64     // lingucomponent/source/languageguessing/simpleguesser.cxx, guess_str must start with "[":
  65     assert(guess_str[0] == GUESS_SEPARATOR_OPEN);
  66     auto const start = guess_str + 1;
  67     // Only look at the prefix of guess_str, delimited by the next "]" or "[" or end-of-string;
  68     // split it into at most three segments separated by "-" (where excess occurrences of "-"
  69     // would become part of the third segment), like "en-US-utf8"; the first segment denotes the
  70     // language; if there are three segments, the second denotes the country and the third the
  71     // encoding; otherwise, the second segment, if any (e.g., in "haw-utf8"), denotes the
  72     // encoding:
  73     char const * dash1 = nullptr;
  74     char const * dash2 = nullptr;
  75     auto p = start;
  76     for (;; ++p) {
  77         auto const c = *p;
  78         if (c == '\0' || c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_CLOSE) {
  79             break;
  80         }
  81         if (c == GUESS_SEPARATOR_SEP) {
  82             if (dash1 == nullptr) {
  83                 dash1 = p;
  84             } else {
  85                 dash2 = p;
  86                 // The encoding is ignored, so we can stop as soon as we found the second "-":
  87                 break;
  88             }
  89         }
  90     }
  91     auto const langLen = (dash1 == nullptr ? p : dash1) - start;
  92     if (langLen != 0) { // if not we use the default value
  93         language_str.assign(start, langLen);
  94     }
  95     if (dash2 != nullptr) {
  96         country_str.assign(dash1 + 1, dash2 - (dash1 + 1));
  97     }
  98 }
  99
 100 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */