merged tag ooo/DEV300_m102
[LibreOffice.git] / lingucomponent / source / languageguessing / simpleguesser.cxx
blobaa4f670746fdb50d602781b1e8c451bc7f014fcb
1 /***************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 /**
33 * TODO
34 * - Add exception throwing when h == NULL
35 * - Not init h when implicit constructor is launched
38 // MARKER(update_precomp.py): autogen include statement, do not remove
39 #include "precompiled_lingucomponent.hxx"
41 #include <string.h>
42 #include <sstream>
43 #include <iostream>
45 #include <libtextcat/textcat.h>
46 #include <libtextcat/common.h>
47 #include <libtextcat/constants.h>
48 #include <libtextcat/fingerprint.h>
49 #include <libtextcat/utf8misc.h>
51 #include <sal/types.h>
53 #include "altstrfunc.hxx"
54 #include "simpleguesser.hxx"
56 #ifndef _UTF8_
57 #define _UTF8_
58 #endif
61 using namespace std;
64 /**
65 * This 3 following structures are from fingerprint.c and textcat.c
68 typedef struct ngram_t {
70 sint2 rank;
71 char str[MAXNGRAMSIZE+1];
73 } ngram_t;
75 typedef struct fp_t {
77 const char *name;
78 ngram_t *fprint;
79 uint4 size;
81 } fp_t;
83 typedef struct textcat_t{
85 void **fprint;
86 char *fprint_disable;
87 uint4 size;
88 uint4 maxsize;
90 char output[MAXOUTPUTSIZE];
92 } textcat_t;
93 /** end of the 3 structs */
95 SimpleGuesser::SimpleGuesser()
97 h = NULL;
100 void SimpleGuesser::operator=(SimpleGuesser& sg){
101 if(h){textcat_Done(h);}
102 h = sg.h;
105 SimpleGuesser::~SimpleGuesser()
107 if(h){textcat_Done(h);}
112 \fn SimpleGuesser::GuessLanguage(char* text)
114 vector<Guess> SimpleGuesser::GuessLanguage(char* text)
116 vector<Guess> guesses;
118 if(!h){return guesses;}
120 //calculate le number of unicode charcters (symbols)
121 int len = utfstrlen(text);
123 if( len > MAX_STRING_LENGTH_TO_ANALYSE ){len = MAX_STRING_LENGTH_TO_ANALYSE ;}
125 char *guess_list = textcat_Classify(h, text, len);
127 if(strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0){
128 return guesses;
131 int current_pointer = 0;
133 for(int i = 0; guess_list[current_pointer] != '\0'; i++)
135 while(guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0'){
136 current_pointer++;
138 if(guess_list[current_pointer] != '\0')
140 Guess g((char*)(guess_list + current_pointer));
142 guesses.push_back(g);
144 current_pointer++;
148 return guesses;
152 \fn SimpleGuesser::GuessPrimaryLanguage(char* text)
154 Guess SimpleGuesser::GuessPrimaryLanguage(char* text)
156 vector<Guess> ret = GuessLanguage(text);
157 if(ret.size() > 0){
158 return GuessLanguage(text)[0];
160 else{
161 return Guess();
165 * Is used to know wich language is available, unavailable or both
166 * when mask = 0xF0, return only Available
167 * when mask = 0x0F, return only Unavailable
168 * when mask = 0xFF, return both Available and Unavailable
170 vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
172 size_t i;
173 textcat_t *tables = (textcat_t*)h;
175 vector<Guess> lang;
176 if(!h){return lang;}
178 for (i=0; i<tables->size; i++) {
179 if(tables->fprint_disable[i] & mask){
180 string langStr = "[";
181 langStr += (char*)fp_Name(tables->fprint[i]);
182 Guess g( (char *)langStr.c_str());
183 lang.push_back(g);
187 return lang;
190 vector<Guess> SimpleGuesser::GetAvailableLanguages(){
191 return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
194 vector<Guess> SimpleGuesser::GetUnavailableLanguages(){
195 return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
198 vector<Guess> SimpleGuesser::GetAllManagedLanguages(){
199 return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
202 void SimpleGuesser::XableLanguage(string lang, char mask){
203 size_t i;
204 textcat_t *tables = (textcat_t*)h;
206 if(!h){return;}
208 for (i=0; i<tables->size; i++) {
209 string language(fp_Name(tables->fprint[i]));
210 if(start(language,lang) == 0){
211 //cout << language << endl;
212 tables->fprint_disable[i] = mask;
213 //continue;
218 void SimpleGuesser::EnableLanguage(string lang){
219 XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
222 void SimpleGuesser::DisableLanguage(string lang){
223 XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
229 void SimpleGuesser::SetDBPath(const char* path, const char* prefix){
230 if(h){
231 textcat_Done(h);
233 h = special_textcat_Init(path, prefix);