Update ooo320-m1
[ooovba.git] / lingucomponent / source / languageguessing / simpleguesser.cxx
blob484393d1884c5c1913af2b466e9a1371ba6bcea0
1 /***************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: simpleguesser.cxx,v $
10 * $Revision: 1.6 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 /**
36 * TODO
37 * - Add exception throwing when h == NULL
38 * - Not init h when implicit constructor is launched
41 // MARKER(update_precomp.py): autogen include statement, do not remove
42 #include "precompiled_lingucomponent.hxx"
44 #include <string.h>
45 #include <sstream>
46 #include <iostream>
48 #include <libtextcat/textcat.h>
49 #include <libtextcat/common.h>
50 #include <libtextcat/constants.h>
51 #include <libtextcat/fingerprint.h>
52 #include <libtextcat/utf8misc.h>
54 #include <sal/types.h>
56 #include "altstrfunc.hxx"
57 #include "simpleguesser.hxx"
59 #ifndef _UTF8_
60 #define _UTF8_
61 #endif
64 using namespace std;
67 /**
68 * This 3 following structures are from fingerprint.c and textcat.c
71 typedef struct ngram_t {
73 sint2 rank;
74 char str[MAXNGRAMSIZE+1];
76 } ngram_t;
78 typedef struct fp_t {
80 const char *name;
81 ngram_t *fprint;
82 uint4 size;
84 } fp_t;
86 typedef struct textcat_t{
88 void **fprint;
89 char *fprint_disable;
90 uint4 size;
91 uint4 maxsize;
93 char output[MAXOUTPUTSIZE];
95 } textcat_t;
96 /** end of the 3 structs */
98 SimpleGuesser::SimpleGuesser()
100 h = NULL;
103 void SimpleGuesser::operator=(SimpleGuesser& sg){
104 if(h){textcat_Done(h);}
105 h = sg.h;
108 SimpleGuesser::~SimpleGuesser()
110 if(h){textcat_Done(h);}
115 \fn SimpleGuesser::GuessLanguage(char* text)
117 vector<Guess> SimpleGuesser::GuessLanguage(char* text)
119 vector<Guess> guesses;
121 if(!h){return guesses;}
123 //calculate le number of unicode charcters (symbols)
124 int len = utfstrlen(text);
126 if( len > MAX_STRING_LENGTH_TO_ANALYSE ){len = MAX_STRING_LENGTH_TO_ANALYSE ;}
128 char *guess_list = textcat_Classify(h, text, len);
130 if(strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0){
131 return guesses;
134 int current_pointer = 0;
136 for(int i = 0; guess_list[current_pointer] != '\0'; i++)
138 while(guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0'){
139 current_pointer++;
141 if(guess_list[current_pointer] != '\0')
143 Guess g((char*)(guess_list + current_pointer));
145 guesses.push_back(g);
147 current_pointer++;
151 return guesses;
155 \fn SimpleGuesser::GuessPrimaryLanguage(char* text)
157 Guess SimpleGuesser::GuessPrimaryLanguage(char* text)
159 vector<Guess> ret = GuessLanguage(text);
160 if(ret.size() > 0){
161 return GuessLanguage(text)[0];
163 else{
164 return Guess();
168 * Is used to know wich language is available, unavailable or both
169 * when mask = 0xF0, return only Available
170 * when mask = 0x0F, return only Unavailable
171 * when mask = 0xFF, return both Available and Unavailable
173 vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
175 size_t i;
176 textcat_t *tables = (textcat_t*)h;
178 vector<Guess> lang;
179 if(!h){return lang;}
181 for (i=0; i<tables->size; i++) {
182 if(tables->fprint_disable[i] & mask){
183 string langStr = "[";
184 langStr += (char*)fp_Name(tables->fprint[i]);
185 Guess g( (char *)langStr.c_str());
186 lang.push_back(g);
190 return lang;
193 vector<Guess> SimpleGuesser::GetAvailableLanguages(){
194 return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
197 vector<Guess> SimpleGuesser::GetUnavailableLanguages(){
198 return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
201 vector<Guess> SimpleGuesser::GetAllManagedLanguages(){
202 return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
205 void SimpleGuesser::XableLanguage(string lang, char mask){
206 size_t i;
207 textcat_t *tables = (textcat_t*)h;
209 if(!h){return;}
211 for (i=0; i<tables->size; i++) {
212 string language(fp_Name(tables->fprint[i]));
213 if(start(language,lang) == 0){
214 //cout << language << endl;
215 tables->fprint_disable[i] = mask;
216 //continue;
221 void SimpleGuesser::EnableLanguage(string lang){
222 XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
225 void SimpleGuesser::DisableLanguage(string lang){
226 XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
232 void SimpleGuesser::SetDBPath(const char* path, const char* prefix){
233 if(h){
234 textcat_Done(h);
236 h = special_textcat_Init(path, prefix);