1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 * - Add exception throwing when h == NULL
27 * - Not init h when implicit constructor is launched
34 #ifdef SYSTEM_LIBEXTTEXTCAT
35 #include <libexttextcat/textcat.h>
36 #include <libexttextcat/common.h>
37 #include <libexttextcat/constants.h>
38 #include <libexttextcat/fingerprint.h>
39 #include <libexttextcat/utf8misc.h>
43 #include <constants.h>
44 #include <fingerprint.h>
48 #include <sal/types.h>
50 #include<rtl/character.hxx>
51 #include "simpleguesser.hxx"
55 static int startsAsciiCaseInsensitive(const std::string
&s1
, const std::string
&s2
){
59 size_t min
= s1
.length();
60 if (min
> s2
.length())
63 for(i
= 0; i
< min
&& s2
[i
] && s1
[i
] && !ret
; i
++){
64 ret
= rtl::toAsciiUpperCase(static_cast<unsigned char>(s1
[i
]))
65 - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2
[i
]));
66 if(s1
[i
] == '.' || s2
[i
] == '.') {ret
= 0;} //. is a neutral character
72 * This following structure is from textcat.c
74 typedef struct textcat_t
{
81 char output
[MAXOUTPUTSIZE
];
84 // end of the 3 structs
86 SimpleGuesser::SimpleGuesser()
91 SimpleGuesser
& SimpleGuesser::operator=(const SimpleGuesser
& sg
){
92 // Check for self-assignment!
93 if (this == &sg
) // Same object?
94 return *this; // Yes, so skip assignment, and just return *this.
96 if(h
){textcat_Done(h
);}
101 SimpleGuesser::~SimpleGuesser()
103 if(h
){textcat_Done(h
);}
107 \fn SimpleGuesser::GuessLanguage(char* text)
109 vector
<Guess
> SimpleGuesser::GuessLanguage(const char* text
)
111 vector
<Guess
> guesses
;
116 int len
= strlen(text
);
118 if (len
> MAX_STRING_LENGTH_TO_ANALYSE
)
119 len
= MAX_STRING_LENGTH_TO_ANALYSE
;
121 const char *guess_list
= textcat_Classify(h
, text
, len
);
123 if (strcmp(guess_list
, TEXTCAT_RESULT_SHORT_STR
) == 0)
126 int current_pointer
= 0;
128 for(int i
= 0; guess_list
[current_pointer
] != '\0'; i
++)
130 while (guess_list
[current_pointer
] != GUESS_SEPARATOR_OPEN
&& guess_list
[current_pointer
] != '\0')
132 if(guess_list
[current_pointer
] != '\0')
134 Guess
g(guess_list
+ current_pointer
);
136 guesses
.push_back(g
);
145 Guess
SimpleGuesser::GuessPrimaryLanguage(const char* text
)
147 vector
<Guess
> ret
= GuessLanguage(text
);
148 return ret
.empty() ? Guess() : ret
[0];
151 * Is used to know which language is available, unavailable or both
152 * when mask = 0xF0, return only Available
153 * when mask = 0x0F, return only Unavailable
154 * when mask = 0xFF, return both Available and Unavailable
156 vector
<Guess
> SimpleGuesser::GetManagedLanguages(const char mask
)
158 textcat_t
*tables
= static_cast<textcat_t
*>(h
);
163 for (size_t i
=0; i
<tables
->size
; ++i
)
165 if (tables
->fprint_disable
[i
] & mask
)
167 string langStr
= "[";
168 langStr
+= fp_Name(tables
->fprint
[i
]);
169 Guess
g(langStr
.c_str());
177 vector
<Guess
> SimpleGuesser::GetAvailableLanguages()
179 return GetManagedLanguages( sal::static_int_cast
< char >( 0xF0 ) );
182 vector
<Guess
> SimpleGuesser::GetUnavailableLanguages()
184 return GetManagedLanguages( sal::static_int_cast
< char >( 0x0F ));
187 vector
<Guess
> SimpleGuesser::GetAllManagedLanguages()
189 return GetManagedLanguages( sal::static_int_cast
< char >( 0xFF ));
192 void SimpleGuesser::XableLanguage(const string
& lang
, char mask
)
194 textcat_t
*tables
= static_cast<textcat_t
*>(h
);
198 for (size_t i
=0; i
<tables
->size
; i
++)
200 string
language(fp_Name(tables
->fprint
[i
]));
201 if (startsAsciiCaseInsensitive(language
,lang
) == 0)
202 tables
->fprint_disable
[i
] = mask
;
206 void SimpleGuesser::EnableLanguage(const string
& lang
)
208 XableLanguage(lang
, sal::static_int_cast
< char >( 0xF0 ));
211 void SimpleGuesser::DisableLanguage(const string
& lang
)
213 XableLanguage(lang
, sal::static_int_cast
< char >( 0x0F ));
216 void SimpleGuesser::SetDBPath(const char* path
, const char* prefix
)
220 h
= special_textcat_Init(path
, prefix
);
223 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */