fix baseline build (old cairo) - 'cairo_rectangle_int_t' does not name a type
[LibreOffice.git] / lingucomponent / source / languageguessing / simpleguesser.cxx
blobcf5324416a86c13881d968cce2937d2e51aa48c8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 /**
25 * TODO
26 * - Add exception throwing when h == NULL
27 * - Not init h when implicit constructor is launched
30 #include <string.h>
31 #include <sstream>
32 #include <iostream>
34 #ifdef SYSTEM_LIBEXTTEXTCAT
35 #include <libexttextcat/textcat.h>
36 #include <libexttextcat/common.h>
37 #include <libexttextcat/constants.h>
38 #include <libexttextcat/fingerprint.h>
39 #include <libexttextcat/utf8misc.h>
40 #else
41 #include <textcat.h>
42 #include <common.h>
43 #include <constants.h>
44 #include <fingerprint.h>
45 #include <utf8misc.h>
46 #endif
48 #include <sal/types.h>
50 #include "altstrfunc.hxx"
51 #include "simpleguesser.hxx"
53 using namespace std;
55 /**
56 * This 3 following structures are from fingerprint.c and textcat.c
58 typedef struct ngram_t {
60 sint2 rank;
61 char str[MAXNGRAMSIZE+1];
63 } ngram_t;
65 typedef struct fp_t {
67 const char *name;
68 ngram_t *fprint;
69 uint4 size;
71 } fp_t;
73 typedef struct textcat_t{
75 void **fprint;
76 char *fprint_disable;
77 uint4 size;
78 uint4 maxsize;
80 char output[MAXOUTPUTSIZE];
82 } textcat_t;
83 // end of the 3 structs
85 SimpleGuesser::SimpleGuesser()
87 h = NULL;
90 SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
91 // Check for self-assignment!
92 if (this == &sg) // Same object?
93 return *this; // Yes, so skip assignment, and just return *this.
95 if(h){textcat_Done(h);}
96 h = sg.h;
97 return *this;
100 SimpleGuesser::~SimpleGuesser()
102 if(h){textcat_Done(h);}
106 \fn SimpleGuesser::GuessLanguage(char* text)
108 vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
110 vector<Guess> guesses;
112 if (!h)
113 return guesses;
115 int len = strlen(text);
117 if (len > MAX_STRING_LENGTH_TO_ANALYSE)
118 len = MAX_STRING_LENGTH_TO_ANALYSE;
120 const char *guess_list = textcat_Classify(h, text, len);
122 // FIXME just a temporary check until new version with renamed macros deployed
123 #if EXTTEXTCAT_VERSION_MAJOR > 3 || (EXTTEXTCAT_VERSION_MAJOR == 3 && (EXTTEXTCAT_VERSION_MINOR > 4 || (EXTTEXTCAT_VERSION_MINOR == 4 && (EXTTEXTCAT_VERSION_MICRO >= 1))))
124 if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
125 #else
126 if (strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0)
127 #endif
128 return guesses;
130 int current_pointer = 0;
132 for(int i = 0; guess_list[current_pointer] != '\0'; i++)
134 while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
135 current_pointer++;
136 if(guess_list[current_pointer] != '\0')
138 Guess g(guess_list + current_pointer);
140 guesses.push_back(g);
142 current_pointer++;
146 return guesses;
149 Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
151 vector<Guess> ret = GuessLanguage(text);
152 return ret.empty() ? Guess() : ret[0];
155 * Is used to know which language is available, unavailable or both
156 * when mask = 0xF0, return only Available
157 * when mask = 0x0F, return only Unavailable
158 * when mask = 0xFF, return both Available and Unavailable
160 vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
162 textcat_t *tables = static_cast<textcat_t*>(h);
164 vector<Guess> lang;
165 if(!h){return lang;}
167 for (size_t i=0; i<tables->size; ++i)
169 if (tables->fprint_disable[i] & mask)
171 string langStr = "[";
172 langStr += fp_Name(tables->fprint[i]);
173 Guess g(langStr.c_str());
174 lang.push_back(g);
178 return lang;
181 vector<Guess> SimpleGuesser::GetAvailableLanguages()
183 return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
186 vector<Guess> SimpleGuesser::GetUnavailableLanguages()
188 return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
191 vector<Guess> SimpleGuesser::GetAllManagedLanguages()
193 return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
196 void SimpleGuesser::XableLanguage(const string& lang, char mask)
198 textcat_t *tables = static_cast<textcat_t*>(h);
200 if(!h){return;}
202 for (size_t i=0; i<tables->size; i++)
204 string language(fp_Name(tables->fprint[i]));
205 if (start(language,lang) == 0)
206 tables->fprint_disable[i] = mask;
210 void SimpleGuesser::EnableLanguage(const string& lang)
212 XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
215 void SimpleGuesser::DisableLanguage(const string& lang)
217 XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
220 void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
222 if (h)
223 textcat_Done(h);
224 h = special_textcat_Init(path, prefix);
227 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */