Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / extensions / universalchardet / src / base / nsSBCSGroupProber.cpp
blob65afdfef016523e82ada6b62a15d34efd7b6e135
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is Mozilla Universal charset detector code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2001
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Shy Shalom <shooshX@gmail.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include <stdio.h>
40 #include "prmem.h"
42 #include "nsSBCharSetProber.h"
43 #include "nsSBCSGroupProber.h"
45 #include "nsHebrewProber.h"
47 nsSBCSGroupProber::nsSBCSGroupProber()
49 mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
50 mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
51 mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
52 mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
53 mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
54 mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
55 mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
56 mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
57 mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
58 mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
60 nsHebrewProber *hebprober = new nsHebrewProber();
61 // Notice: Any change in these indexes - 10,11,12 must be reflected
62 // in the code below as well.
63 mProbers[10] = hebprober;
64 mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
65 mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
66 // Tell the Hebrew prober about the logical and visual probers
67 if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
69 hebprober->SetModelProbers(mProbers[11], mProbers[12]);
71 else // One or more is null. avoid any Hebrew probing, null them all
73 for (PRUint32 i = 10; i <= 12; ++i)
75 delete mProbers[i];
76 mProbers[i] = 0;
80 // disable latin2 before latin1 is available, otherwise all latin1
81 // will be detected as latin2 because of their similarity.
82 //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
83 //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
85 Reset();
88 nsSBCSGroupProber::~nsSBCSGroupProber()
90 for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
92 delete mProbers[i];
97 const char* nsSBCSGroupProber::GetCharSetName()
99 //if we have no answer yet
100 if (mBestGuess == -1)
102 GetConfidence();
103 //no charset seems positive
104 if (mBestGuess == -1)
105 //we will use default.
106 mBestGuess = 0;
108 return mProbers[mBestGuess]->GetCharSetName();
111 void nsSBCSGroupProber::Reset(void)
113 mActiveNum = 0;
114 for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
116 if (mProbers[i]) // not null
118 mProbers[i]->Reset();
119 mIsActive[i] = PR_TRUE;
120 ++mActiveNum;
122 else
123 mIsActive[i] = PR_FALSE;
125 mBestGuess = -1;
126 mState = eDetecting;
130 nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
132 nsProbingState st;
133 PRUint32 i;
134 char *newBuf1 = 0;
135 PRUint32 newLen1 = 0;
137 //apply filter to original buffer, and we got new buffer back
138 //depend on what script it is, we will feed them the new buffer
139 //we got after applying proper filter
140 //this is done without any consideration to KeepEnglishLetters
141 //of each prober since as of now, there are no probers here which
142 //recognize languages with English characters.
143 if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
144 goto done;
146 if (newLen1 == 0)
147 goto done; // Nothing to see here, move on.
149 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
151 if (!mIsActive[i])
152 continue;
153 st = mProbers[i]->HandleData(newBuf1, newLen1);
154 if (st == eFoundIt)
156 mBestGuess = i;
157 mState = eFoundIt;
158 break;
160 else if (st == eNotMe)
162 mIsActive[i] = PR_FALSE;
163 mActiveNum--;
164 if (mActiveNum <= 0)
166 mState = eNotMe;
167 break;
172 done:
173 PR_FREEIF(newBuf1);
175 return mState;
178 float nsSBCSGroupProber::GetConfidence(void)
180 PRUint32 i;
181 float bestConf = 0.0, cf;
183 switch (mState)
185 case eFoundIt:
186 return (float)0.99; //sure yes
187 case eNotMe:
188 return (float)0.01; //sure no
189 default:
190 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
192 if (!mIsActive[i])
193 continue;
194 cf = mProbers[i]->GetConfidence();
195 if (bestConf < cf)
197 bestConf = cf;
198 mBestGuess = i;
202 return bestConf;
205 #ifdef DEBUG_chardet
206 void nsSBCSGroupProber::DumpStatus()
208 PRUint32 i;
209 float cf;
211 cf = GetConfidence();
212 printf(" SBCS Group Prober --------begin status \r\n");
213 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
215 if (!mIsActive[i])
216 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
217 else
218 mProbers[i]->DumpStatus();
220 printf(" SBCS Group found best match [%s] confidence %f.\r\n",
221 mProbers[mBestGuess]->GetCharSetName(), cf);
223 #endif