1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Mozilla Spellchecker Component.
17 * The Initial Developer of the Original Code is
19 * Portions created by the Initial Developer are Copyright (C) 2001
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s): David Einstein Deinst@world.std.com
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #include "mozEnglishWordUtils.h"
39 #include "nsICharsetAlias.h"
40 #include "nsReadableUtils.h"
41 #include "nsIServiceManager.h"
42 #include "nsUnicharUtilCIID.h"
45 NS_IMPL_ISUPPORTS1(mozEnglishWordUtils
, mozISpellI18NUtil
)
47 mozEnglishWordUtils::mozEnglishWordUtils()
49 mLanguage
.AssignLiteral("en");
52 mURLDetector
= do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID
, &rv
);
53 mCaseConv
= do_GetService(NS_UNICHARUTIL_CONTRACTID
);
54 mCategories
= do_GetService(NS_UNICHARCATEGORY_CONTRACTID
);
57 mozEnglishWordUtils::~mozEnglishWordUtils()
61 /* attribute wstring language; */
62 NS_IMETHODIMP
mozEnglishWordUtils::GetLanguage(PRUnichar
* *aLanguage
)
65 NS_ENSURE_ARG_POINTER(aLanguage
);
67 *aLanguage
= ToNewUnicode(mLanguage
);
68 if(!aLanguage
) rv
= NS_ERROR_OUT_OF_MEMORY
;
72 /* void GetRootForm (in wstring aWord, in PRUint32 type, [array, size_is (count)] out wstring words, out PRUint32 count); */
73 // return the possible root forms of aWord.
74 NS_IMETHODIMP
mozEnglishWordUtils::GetRootForm(const PRUnichar
*aWord
, PRUint32 type
, PRUnichar
***words
, PRUint32
*count
)
76 nsAutoString
word(aWord
);
78 PRInt32 length
= word
.Length();
82 mozEnglishWordUtils::myspCapitalization ct
= captype(word
);
87 tmpPtr
= (PRUnichar
**)nsMemory::Alloc(sizeof(PRUnichar
*));
89 return NS_ERROR_OUT_OF_MEMORY
;
90 tmpPtr
[0] = ToNewUnicode(word
);
92 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr
);
93 return NS_ERROR_OUT_OF_MEMORY
;
101 tmpPtr
= (PRUnichar
**)nsMemory::Alloc(sizeof(PRUnichar
*) * 3);
103 return NS_ERROR_OUT_OF_MEMORY
;
104 tmpPtr
[0] = ToNewUnicode(word
);
106 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr
);
107 return NS_ERROR_OUT_OF_MEMORY
;
109 mCaseConv
->ToLower(tmpPtr
[0], tmpPtr
[0], length
);
111 tmpPtr
[1] = ToNewUnicode(word
);
113 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr
);
114 return NS_ERROR_OUT_OF_MEMORY
;
116 mCaseConv
->ToLower(tmpPtr
[1], tmpPtr
[1], length
);
117 mCaseConv
->ToUpper(tmpPtr
[1], tmpPtr
[1], 1);
119 tmpPtr
[2] = ToNewUnicode(word
);
121 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr
);
122 return NS_ERROR_OUT_OF_MEMORY
;
130 tmpPtr
= (PRUnichar
**)nsMemory::Alloc(sizeof(PRUnichar
*) * 2);
132 return NS_ERROR_OUT_OF_MEMORY
;
134 tmpPtr
[0] = ToNewUnicode(word
);
136 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr
);
137 return NS_ERROR_OUT_OF_MEMORY
;
139 mCaseConv
->ToLower(tmpPtr
[0], tmpPtr
[0], length
);
141 tmpPtr
[1] = ToNewUnicode(word
);
143 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr
);
144 return NS_ERROR_OUT_OF_MEMORY
;
151 return NS_ERROR_FAILURE
; // should never get here;
156 // This needs vast improvement
157 PRBool
mozEnglishWordUtils::ucIsAlpha(PRUnichar aChar
)
159 // XXX we have to fix callers to handle the full Unicode range
160 return nsIUGenCategory::kLetter
== mCategories
->Get(PRUint32(aChar
));
163 /* void FindNextWord (in wstring word, in PRUint32 length, in PRUint32 offset, out PRUint32 begin, out PRUint32 end); */
164 NS_IMETHODIMP
mozEnglishWordUtils::FindNextWord(const PRUnichar
*word
, PRUint32 length
, PRUint32 offset
, PRInt32
*begin
, PRInt32
*end
)
166 const PRUnichar
*p
= word
+ offset
;
167 const PRUnichar
*endbuf
= word
+ length
;
168 const PRUnichar
*startWord
=p
;
170 // XXX These loops should be modified to handle non-BMP characters.
171 // if previous character is a word character, need to advance out of the word
172 if (offset
> 0 && ucIsAlpha(*(p
-1))) {
173 while (p
< endbuf
&& ucIsAlpha(*p
))
176 while((p
< endbuf
) && (!ucIsAlpha(*p
)))
181 while((p
< endbuf
) && ((ucIsAlpha(*p
))||(*p
=='\'')))
186 // we could be trying to break down a url, we don't want to break a url into parts,
187 // instead we want to find out if it really is a url and if so, skip it, advancing startWord
188 // to a point after the url.
190 // before we spend more time looking to see if the word is a url, look for a url identifer
191 // and make sure that identifer isn't the last character in the word fragment.
192 if ( (*p
== ':' || *p
== '@' || *p
== '.') && p
< endbuf
- 1) {
194 // ok, we have a possible url...do more research to find out if we really have one
195 // and determine the length of the url so we can skip over it.
199 PRInt32 startPos
= -1;
202 mURLDetector
->FindURLInPlaintext(startWord
, endbuf
- startWord
, p
- startWord
, &startPos
, &endPos
);
204 // ok, if we got a url, adjust the array bounds, skip the current url text and find the next word again
205 if (startPos
!= -1 && endPos
!= -1) {
206 startWord
= p
+ endPos
+ 1; // skip over the url
207 p
= startWord
; // reset p
209 // now recursively call FindNextWord to search for the next word now that we have skipped the url
210 return FindNextWord(word
, length
, startWord
- word
, begin
, end
);
215 while((p
> startWord
)&&(*(p
-1) == '\'')){ // trim trailing apostrophes
222 if(startWord
== endbuf
){
227 *begin
= startWord
-word
;
233 mozEnglishWordUtils::myspCapitalization
234 mozEnglishWordUtils::captype(const nsString
&word
)
236 if(!mCaseConv
) return HuhCap
; //punt
237 PRUnichar
* lword
=ToNewUnicode(word
);
238 mCaseConv
->ToUpper(lword
,lword
,word
.Length());
239 if(word
.Equals(lword
)){
240 nsMemory::Free(lword
);
244 mCaseConv
->ToLower(lword
,lword
,word
.Length());
245 if(word
.Equals(lword
)){
246 nsMemory::Free(lword
);
249 PRInt32 length
=word
.Length();
250 if(Substring(word
,1,length
-1).Equals(lword
+1)){
251 nsMemory::Free(lword
);
254 nsMemory::Free(lword
);
258 // Convert the list of words in iwords to the same capitalization aWord and
259 // return them in owords.
260 NS_IMETHODIMP
mozEnglishWordUtils::FromRootForm(const PRUnichar
*aWord
, const PRUnichar
**iwords
, PRUint32 icount
, PRUnichar
***owords
, PRUint32
*ocount
)
262 nsAutoString
word(aWord
);
266 PRUnichar
**tmpPtr
= (PRUnichar
**)nsMemory::Alloc(sizeof(PRUnichar
*)*icount
);
268 return NS_ERROR_OUT_OF_MEMORY
;
270 mozEnglishWordUtils::myspCapitalization ct
= captype(word
);
271 for(PRUint32 i
= 0; i
< icount
; ++i
) {
272 length
= nsCRT::strlen(iwords
[i
]);
273 tmpPtr
[i
] = (PRUnichar
*) nsMemory::Alloc(sizeof(PRUnichar
) * (length
+ 1));
274 if (NS_UNLIKELY(!tmpPtr
[i
])) {
275 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i
, tmpPtr
);
276 return NS_ERROR_OUT_OF_MEMORY
;
278 memcpy(tmpPtr
[i
], iwords
[i
], (length
+ 1) * sizeof(PRUnichar
));
280 nsAutoString
capTest(tmpPtr
[i
]);
281 mozEnglishWordUtils::myspCapitalization newCt
=captype(capTest
);
289 rv
= mCaseConv
->ToUpper(tmpPtr
[i
],tmpPtr
[i
],length
);
292 rv
= mCaseConv
->ToUpper(tmpPtr
[i
],tmpPtr
[i
],1);
295 rv
= NS_ERROR_FAILURE
; // should never get here;
301 if (NS_SUCCEEDED(rv
)){