1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Mozilla Universal charset detector code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2001
20 * the Initial Developer. All Rights Reserved.
23 * Shy Shalom <shooshX@gmail.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
42 #include "nsSBCharSetProber.h"
43 #include "nsSBCSGroupProber.h"
45 #include "nsHebrewProber.h"
47 nsSBCSGroupProber::nsSBCSGroupProber()
49 mProbers
[0] = new nsSingleByteCharSetProber(&Win1251Model
);
50 mProbers
[1] = new nsSingleByteCharSetProber(&Koi8rModel
);
51 mProbers
[2] = new nsSingleByteCharSetProber(&Latin5Model
);
52 mProbers
[3] = new nsSingleByteCharSetProber(&MacCyrillicModel
);
53 mProbers
[4] = new nsSingleByteCharSetProber(&Ibm866Model
);
54 mProbers
[5] = new nsSingleByteCharSetProber(&Ibm855Model
);
55 mProbers
[6] = new nsSingleByteCharSetProber(&Latin7Model
);
56 mProbers
[7] = new nsSingleByteCharSetProber(&Win1253Model
);
57 mProbers
[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel
);
58 mProbers
[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel
);
60 nsHebrewProber
*hebprober
= new nsHebrewProber();
61 // Notice: Any change in these indexes - 10,11,12 must be reflected
62 // in the code below as well.
63 mProbers
[10] = hebprober
;
64 mProbers
[11] = new nsSingleByteCharSetProber(&Win1255Model
, PR_FALSE
, hebprober
); // Logical Hebrew
65 mProbers
[12] = new nsSingleByteCharSetProber(&Win1255Model
, PR_TRUE
, hebprober
); // Visual Hebrew
66 // Tell the Hebrew prober about the logical and visual probers
67 if (mProbers
[10] && mProbers
[11] && mProbers
[12]) // all are not null
69 hebprober
->SetModelProbers(mProbers
[11], mProbers
[12]);
71 else // One or more is null. avoid any Hebrew probing, null them all
73 for (PRUint32 i
= 10; i
<= 12; ++i
)
80 // disable latin2 before latin1 is available, otherwise all latin1
81 // will be detected as latin2 because of their similarity.
82 //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
83 //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
88 nsSBCSGroupProber::~nsSBCSGroupProber()
90 for (PRUint32 i
= 0; i
< NUM_OF_SBCS_PROBERS
; i
++)
97 const char* nsSBCSGroupProber::GetCharSetName()
99 //if we have no answer yet
100 if (mBestGuess
== -1)
103 //no charset seems positive
104 if (mBestGuess
== -1)
105 //we will use default.
108 return mProbers
[mBestGuess
]->GetCharSetName();
111 void nsSBCSGroupProber::Reset(void)
114 for (PRUint32 i
= 0; i
< NUM_OF_SBCS_PROBERS
; i
++)
116 if (mProbers
[i
]) // not null
118 mProbers
[i
]->Reset();
119 mIsActive
[i
] = PR_TRUE
;
123 mIsActive
[i
] = PR_FALSE
;
130 nsProbingState
nsSBCSGroupProber::HandleData(const char* aBuf
, PRUint32 aLen
)
135 PRUint32 newLen1
= 0;
137 //apply filter to original buffer, and we got new buffer back
138 //depend on what script it is, we will feed them the new buffer
139 //we got after applying proper filter
140 //this is done without any consideration to KeepEnglishLetters
141 //of each prober since as of now, there are no probers here which
142 //recognize languages with English characters.
143 if (!FilterWithoutEnglishLetters(aBuf
, aLen
, &newBuf1
, newLen1
))
147 goto done
; // Nothing to see here, move on.
149 for (i
= 0; i
< NUM_OF_SBCS_PROBERS
; i
++)
153 st
= mProbers
[i
]->HandleData(newBuf1
, newLen1
);
160 else if (st
== eNotMe
)
162 mIsActive
[i
] = PR_FALSE
;
178 float nsSBCSGroupProber::GetConfidence(void)
181 float bestConf
= 0.0, cf
;
186 return (float)0.99; //sure yes
188 return (float)0.01; //sure no
190 for (i
= 0; i
< NUM_OF_SBCS_PROBERS
; i
++)
194 cf
= mProbers
[i
]->GetConfidence();
206 void nsSBCSGroupProber::DumpStatus()
211 cf
= GetConfidence();
212 printf(" SBCS Group Prober --------begin status \r\n");
213 for (i
= 0; i
< NUM_OF_SBCS_PROBERS
; i
++)
216 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers
[i
]->GetCharSetName());
218 mProbers
[i
]->DumpStatus();
220 printf(" SBCS Group found best match [%s] confidence %f.\r\n",
221 mProbers
[mBestGuess
]->GetCharSetName(), cf
);