Bump version to 24.04.3.4
[LibreOffice.git] / lingucomponent / source / languageguessing / simpleguesser.cxx
blob7210b1f451e3a72629bc080ee11e7ca904671841
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 /**
25 * TODO
26 * - Add exception throwing when h == NULL
27 * - Not init h when implicit constructor is launched
30 #include <string.h>
32 #ifdef SYSTEM_LIBEXTTEXTCAT
33 #include <libexttextcat/textcat.h>
34 #include <libexttextcat/common.h>
35 #include <libexttextcat/constants.h>
36 #include <libexttextcat/fingerprint.h>
37 #else
38 #include <textcat.h>
39 #include <common.h>
40 #include <constants.h>
41 #include <fingerprint.h>
42 #endif
44 #include <sal/types.h>
46 #include<rtl/character.hxx>
47 #include "simpleguesser.hxx"
49 static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){
50 size_t i;
51 int ret = 0;
53 size_t min = s1.length();
54 if (min > s2.length())
55 min = s2.length();
57 for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){
58 ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i]))
59 - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i]));
60 if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character
62 return ret;
65 namespace {
67 /**
68 * This following structure is from textcat.c
70 typedef struct textcat_t{
72 void **fprint;
73 char *fprint_disable;
74 uint4 size;
75 uint4 maxsize;
77 char output[MAXOUTPUTSIZE];
79 } textcat_t;
80 // end of the 3 structs
84 SimpleGuesser::SimpleGuesser()
86 h = nullptr;
89 SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
90 // Check for self-assignment!
91 if (this == &sg) // Same object?
92 return *this; // Yes, so skip assignment, and just return *this.
94 if(h){textcat_Done(h);}
95 h = sg.h;
96 return *this;
99 SimpleGuesser::~SimpleGuesser()
101 if(h){textcat_Done(h);}
105 \fn SimpleGuesser::GuessLanguage(char* text)
107 std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
109 std::vector<Guess> guesses;
111 if (!h)
112 return guesses;
114 int len = strlen(text);
116 if (len > MAX_STRING_LENGTH_TO_ANALYSE)
117 len = MAX_STRING_LENGTH_TO_ANALYSE;
119 const char *guess_list = textcat_Classify(h, text, len);
121 if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
122 return guesses;
124 int current_pointer = 0;
126 while(guess_list[current_pointer] != '\0')
128 while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
129 current_pointer++;
130 if(guess_list[current_pointer] != '\0')
132 Guess g(guess_list + current_pointer);
134 guesses.push_back(g);
136 current_pointer++;
140 return guesses;
143 Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
145 std::vector<Guess> ret = GuessLanguage(text);
146 return ret.empty() ? Guess() : ret[0];
149 * Is used to know which language is available, unavailable or both
150 * when mask = 0xF0, return only Available
151 * when mask = 0x0F, return only Unavailable
152 * when mask = 0xFF, return both Available and Unavailable
154 std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
156 textcat_t *tables = static_cast<textcat_t*>(h);
158 std::vector<Guess> lang;
159 if(!h){return lang;}
161 for (size_t i=0; i<tables->size; ++i)
163 if (tables->fprint_disable[i] & mask)
165 std::string langStr = "[";
166 langStr += fp_Name(tables->fprint[i]);
167 Guess g(langStr.c_str());
168 lang.push_back(g);
172 return lang;
175 std::vector<Guess> SimpleGuesser::GetAvailableLanguages()
177 return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
180 std::vector<Guess> SimpleGuesser::GetUnavailableLanguages()
182 return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
185 std::vector<Guess> SimpleGuesser::GetAllManagedLanguages()
187 return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
190 void SimpleGuesser::XableLanguage(const std::string& lang, char mask)
192 textcat_t *tables = static_cast<textcat_t*>(h);
194 if(!h){return;}
196 for (size_t i=0; i<tables->size; i++)
198 std::string language(fp_Name(tables->fprint[i]));
199 if (startsAsciiCaseInsensitive(language,lang) == 0)
200 tables->fprint_disable[i] = mask;
204 void SimpleGuesser::EnableLanguage(const std::string& lang)
206 XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
209 void SimpleGuesser::DisableLanguage(const std::string& lang)
211 XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
214 void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
216 if (h)
217 textcat_Done(h);
218 h = special_textcat_Init(path, prefix);
221 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */