1 /***************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: simpleguesser.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
37 * - Add exception throwing when h == NULL
38 * - Not init h when implicit constructor is launched
41 // MARKER(update_precomp.py): autogen include statement, do not remove
42 #include "precompiled_lingucomponent.hxx"
48 #include <libtextcat/textcat.h>
49 #include <libtextcat/common.h>
50 #include <libtextcat/constants.h>
51 #include <libtextcat/fingerprint.h>
52 #include <libtextcat/utf8misc.h>
54 #include <sal/types.h>
56 #include "altstrfunc.hxx"
57 #include "simpleguesser.hxx"
68 * This 3 following structures are from fingerprint.c and textcat.c
71 typedef struct ngram_t
{
74 char str
[MAXNGRAMSIZE
+1];
86 typedef struct textcat_t
{
93 char output
[MAXOUTPUTSIZE
];
96 /** end of the 3 structs */
98 SimpleGuesser::SimpleGuesser()
103 void SimpleGuesser::operator=(SimpleGuesser
& sg
){
104 if(h
){textcat_Done(h
);}
108 SimpleGuesser::~SimpleGuesser()
110 if(h
){textcat_Done(h
);}
115 \fn SimpleGuesser::GuessLanguage(char* text)
117 vector
<Guess
> SimpleGuesser::GuessLanguage(char* text
)
119 vector
<Guess
> guesses
;
121 if(!h
){return guesses
;}
123 //calculate le number of unicode charcters (symbols)
124 int len
= utfstrlen(text
);
126 if( len
> MAX_STRING_LENGTH_TO_ANALYSE
){len
= MAX_STRING_LENGTH_TO_ANALYSE
;}
128 char *guess_list
= textcat_Classify(h
, text
, len
);
130 if(strcmp(guess_list
, _TEXTCAT_RESULT_SHORT
) == 0){
134 int current_pointer
= 0;
136 for(int i
= 0; guess_list
[current_pointer
] != '\0'; i
++)
138 while(guess_list
[current_pointer
] != GUESS_SEPARATOR_OPEN
&& guess_list
[current_pointer
] != '\0'){
141 if(guess_list
[current_pointer
] != '\0')
143 Guess
g((char*)(guess_list
+ current_pointer
));
145 guesses
.push_back(g
);
155 \fn SimpleGuesser::GuessPrimaryLanguage(char* text)
157 Guess
SimpleGuesser::GuessPrimaryLanguage(char* text
)
159 vector
<Guess
> ret
= GuessLanguage(text
);
161 return GuessLanguage(text
)[0];
168 * Is used to know wich language is available, unavailable or both
169 * when mask = 0xF0, return only Available
170 * when mask = 0x0F, return only Unavailable
171 * when mask = 0xFF, return both Available and Unavailable
173 vector
<Guess
> SimpleGuesser::GetManagedLanguages(const char mask
)
176 textcat_t
*tables
= (textcat_t
*)h
;
181 for (i
=0; i
<tables
->size
; i
++) {
182 if(tables
->fprint_disable
[i
] & mask
){
183 string langStr
= "[";
184 langStr
+= (char*)fp_Name(tables
->fprint
[i
]);
185 Guess
g( (char *)langStr
.c_str());
193 vector
<Guess
> SimpleGuesser::GetAvailableLanguages(){
194 return GetManagedLanguages( sal::static_int_cast
< char >( 0xF0 ) );
197 vector
<Guess
> SimpleGuesser::GetUnavailableLanguages(){
198 return GetManagedLanguages( sal::static_int_cast
< char >( 0x0F ));
201 vector
<Guess
> SimpleGuesser::GetAllManagedLanguages(){
202 return GetManagedLanguages( sal::static_int_cast
< char >( 0xFF ));
205 void SimpleGuesser::XableLanguage(string lang
, char mask
){
207 textcat_t
*tables
= (textcat_t
*)h
;
211 for (i
=0; i
<tables
->size
; i
++) {
212 string
language(fp_Name(tables
->fprint
[i
]));
213 if(start(language
,lang
) == 0){
214 //cout << language << endl;
215 tables
->fprint_disable
[i
] = mask
;
221 void SimpleGuesser::EnableLanguage(string lang
){
222 XableLanguage(lang
, sal::static_int_cast
< char >( 0xF0 ));
225 void SimpleGuesser::DisableLanguage(string lang
){
226 XableLanguage(lang
, sal::static_int_cast
< char >( 0x0F ));
232 void SimpleGuesser::SetDBPath(const char* path
, const char* prefix
){
236 h
= special_textcat_Init(path
, prefix
);