1 /***************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
34 * - Add exception throwing when h == NULL
35 * - Not init h when implicit constructor is launched
38 // MARKER(update_precomp.py): autogen include statement, do not remove
39 #include "precompiled_lingucomponent.hxx"
45 #include <libtextcat/textcat.h>
46 #include <libtextcat/common.h>
47 #include <libtextcat/constants.h>
48 #include <libtextcat/fingerprint.h>
49 #include <libtextcat/utf8misc.h>
51 #include <sal/types.h>
53 #include "altstrfunc.hxx"
54 #include "simpleguesser.hxx"
65 * This 3 following structures are from fingerprint.c and textcat.c
68 typedef struct ngram_t
{
71 char str
[MAXNGRAMSIZE
+1];
83 typedef struct textcat_t
{
90 char output
[MAXOUTPUTSIZE
];
93 /** end of the 3 structs */
95 SimpleGuesser::SimpleGuesser()
100 void SimpleGuesser::operator=(SimpleGuesser
& sg
){
101 if(h
){textcat_Done(h
);}
105 SimpleGuesser::~SimpleGuesser()
107 if(h
){textcat_Done(h
);}
112 \fn SimpleGuesser::GuessLanguage(char* text)
114 vector
<Guess
> SimpleGuesser::GuessLanguage(char* text
)
116 vector
<Guess
> guesses
;
118 if(!h
){return guesses
;}
120 //calculate le number of unicode charcters (symbols)
121 int len
= utfstrlen(text
);
123 if( len
> MAX_STRING_LENGTH_TO_ANALYSE
){len
= MAX_STRING_LENGTH_TO_ANALYSE
;}
125 char *guess_list
= textcat_Classify(h
, text
, len
);
127 if(strcmp(guess_list
, _TEXTCAT_RESULT_SHORT
) == 0){
131 int current_pointer
= 0;
133 for(int i
= 0; guess_list
[current_pointer
] != '\0'; i
++)
135 while(guess_list
[current_pointer
] != GUESS_SEPARATOR_OPEN
&& guess_list
[current_pointer
] != '\0'){
138 if(guess_list
[current_pointer
] != '\0')
140 Guess
g((char*)(guess_list
+ current_pointer
));
142 guesses
.push_back(g
);
152 \fn SimpleGuesser::GuessPrimaryLanguage(char* text)
154 Guess
SimpleGuesser::GuessPrimaryLanguage(char* text
)
156 vector
<Guess
> ret
= GuessLanguage(text
);
158 return GuessLanguage(text
)[0];
165 * Is used to know wich language is available, unavailable or both
166 * when mask = 0xF0, return only Available
167 * when mask = 0x0F, return only Unavailable
168 * when mask = 0xFF, return both Available and Unavailable
170 vector
<Guess
> SimpleGuesser::GetManagedLanguages(const char mask
)
173 textcat_t
*tables
= (textcat_t
*)h
;
178 for (i
=0; i
<tables
->size
; i
++) {
179 if(tables
->fprint_disable
[i
] & mask
){
180 string langStr
= "[";
181 langStr
+= (char*)fp_Name(tables
->fprint
[i
]);
182 Guess
g( (char *)langStr
.c_str());
190 vector
<Guess
> SimpleGuesser::GetAvailableLanguages(){
191 return GetManagedLanguages( sal::static_int_cast
< char >( 0xF0 ) );
194 vector
<Guess
> SimpleGuesser::GetUnavailableLanguages(){
195 return GetManagedLanguages( sal::static_int_cast
< char >( 0x0F ));
198 vector
<Guess
> SimpleGuesser::GetAllManagedLanguages(){
199 return GetManagedLanguages( sal::static_int_cast
< char >( 0xFF ));
202 void SimpleGuesser::XableLanguage(string lang
, char mask
){
204 textcat_t
*tables
= (textcat_t
*)h
;
208 for (i
=0; i
<tables
->size
; i
++) {
209 string
language(fp_Name(tables
->fprint
[i
]));
210 if(start(language
,lang
) == 0){
211 //cout << language << endl;
212 tables
->fprint_disable
[i
] = mask
;
218 void SimpleGuesser::EnableLanguage(string lang
){
219 XableLanguage(lang
, sal::static_int_cast
< char >( 0xF0 ));
222 void SimpleGuesser::DisableLanguage(string lang
){
223 XableLanguage(lang
, sal::static_int_cast
< char >( 0x0F ));
229 void SimpleGuesser::SetDBPath(const char* path
, const char* prefix
){
233 h
= special_textcat_Init(path
, prefix
);