1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * Pierre Phaneuf <pp@ludusdesign.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
38 #include "nsISupports.h"
39 #include "nsIComponentManager.h"
40 #include "nsICharsetDetector.h"
41 #include "nsICharsetDetectionObserver.h"
42 #include "nsComponentManagerUtils.h"
43 #include "nsServiceManagerUtils.h"
47 #if defined(XP_WIN) || defined(XP_OS2)
58 virtual ~nsStatis() { };
59 virtual PRBool
HandleData(const char* aBuf
, PRUint32 aLen
) = 0;
60 virtual void DataEnd() = 0;
61 virtual void Report()=0;
64 class nsBaseStatis
: public nsStatis
{
66 nsBaseStatis(unsigned char aL
, unsigned char aH
, float aR
) ;
67 virtual ~nsBaseStatis() {};
68 virtual PRBool
HandleData(const char* aBuf
, PRUint32 aLen
);
69 virtual void DataEnd() ;
70 virtual void Report();
72 unsigned char mLWordHi
;
73 unsigned char mLWordLo
;
75 PRUint32 mNumOf2Bytes
;
78 PRUint32 mLWordLength
;
79 PRUint32 mLWordLen
[10];
84 nsBaseStatis::nsBaseStatis(unsigned char aL
, unsigned char aH
, float aR
)
86 mNumOf2Bytes
= mNumOfLWord
= mLWordLength
= mNumOfLChar
= 0;
87 mTailByte
= mLastLChar
= PR_FALSE
;
88 for(PRUint32 i
=0;i
< 20; i
++)
94 PRBool
nsBaseStatis::HandleData(const char* aBuf
, PRUint32 aLen
)
96 for(PRUint32 i
=0; i
< aLen
; i
++)
102 mTailByte
= (0x80 == ( aBuf
[i
] & 0x80));
106 unsigned char a
= (unsigned char) aBuf
[i
];
107 PRBool thisLChar
= (( mLWordLo
<= a
) && (a
<= mLWordHi
));
115 mLWordLen
[ (mLWordLength
> 10) ? 9 : (mLWordLength
-1)]++;
119 mLastLChar
= thisLChar
;
123 mLWordLen
[ (mLWordLength
> 10) ? 9 : (mLWordLength
-1)]++;
125 mLastLChar
= PR_FALSE
;
132 void nsBaseStatis::DataEnd()
136 mLWordLen
[ (mLWordLength
> 10) ? 9 : (mLWordLength
-1)]++;
139 void nsBaseStatis::Report()
144 printf("LChar Ratio = %d : %d ( %5.3f)\n",
147 ((float)mNumOfLChar / (float)mNumOf2Bytes) * 100);
149 float rate
= (float) mNumOfLChar
/ (float) mNumOf2Bytes
;
150 float delta
= (rate
- mR
) / mR
;
151 delta
*= delta
* 1000;
153 printf("Exp = %f \n",delta
);
160 printf("LWord Word = %d : %d (%5.3f)\n",
163 ((float)mNumOfLWord / (float)mNumOfLChar) * 100);
167 for(PRUint32 i=0;i<10;i++)
170 printf("LWord Word Length[%d]= %d -> %5.3f%% %5.3f%%\n", i+1,
172 (((float)mLWordLen[i] / (float)mNumOfLWord) * 100),
173 (((float)ac / (float)mNumOfLWord) * 100));
180 class nsSimpleStatis
: public nsStatis
{
182 nsSimpleStatis(unsigned char aL
, unsigned char aH
, float aR
,const char* aCharset
) ;
183 virtual ~nsSimpleStatis() {};
184 virtual PRBool
HandleData(const char* aBuf
, PRUint32 aLen
);
185 virtual void DataEnd() ;
186 virtual void Report();
188 unsigned char mLWordHi
;
189 unsigned char mLWordLo
;
191 PRUint32 mNumOf2Bytes
;
192 PRUint32 mNumOfLChar
;
194 const char* mCharset
;
197 nsSimpleStatis::nsSimpleStatis(unsigned char aL
, unsigned char aH
, float aR
, const char* aCharset
)
199 mNumOf2Bytes
= mNumOfLChar
= 0;
200 mTailByte
= PR_FALSE
;
206 PRBool
nsSimpleStatis::HandleData(const char* aBuf
, PRUint32 aLen
)
208 for(PRUint32 i
=0; i
< aLen
; i
++)
211 mTailByte
= PR_FALSE
;
214 mTailByte
= (0x80 == ( aBuf
[i
] & 0x80));
218 unsigned char a
= (unsigned char) aBuf
[i
];
219 PRBool thisLChar
= (( mLWordLo
<= a
) && (a
<= mLWordHi
));
227 void nsSimpleStatis::DataEnd()
230 void nsSimpleStatis::Report()
234 float rate
= (float) mNumOfLChar
/ (float) mNumOf2Bytes
;
235 float delta
= (rate
- mR
) / mR
;
236 delta
= delta
* delta
* (float)100;
238 printf("Exp = %f \n",delta
);
240 printf("This is %s\n" ,mCharset
);
245 //==========================================================
248 #define MAXBSIZE (1L << 13)
251 printf("Usage: DetectFile detector blocksize\n"
253 "ja_parallel_state_machine,"
254 "ko_parallel_state_machine,"
255 "zhcn_parallel_state_machine,"
256 "zhtw_parallel_state_machine,"
257 "zh_parallel_state_machine,"
258 "cjk_parallel_state_machine,"
261 "\n blocksize: 1 ~ %ld\n"
262 " Data are passed in from STDIN\n"
266 class nsReporter
: public nsICharsetDetectionObserver
271 virtual ~nsReporter() { };
273 NS_IMETHOD
Notify(const char* aCharset
, nsDetectionConfident aConf
)
275 printf("RESULT CHARSET : %s\n", aCharset
);
276 printf("RESULT Confident : %d\n", aConf
);
282 NS_IMPL_ISUPPORTS1(nsReporter
, nsICharsetDetectionObserver
)
284 nsresult
GetDetector(const char* key
, nsICharsetDetector
** det
)
287 strcpy(buf
, NS_CHARSET_DETECTOR_CONTRACTID_BASE
);
289 return CallCreateInstance(buf
, det
);
293 nsresult
GetObserver(nsICharsetDetectionObserver
** aRes
)
296 nsReporter
* rep
= new nsReporter();
298 return rep
->QueryInterface(NS_GET_IID(nsICharsetDetectionObserver
) ,
301 return NS_ERROR_OUT_OF_MEMORY
;
304 int main(int argc
, char** argv
) {
310 printf("Need 2 arguments\n");
314 if((bs
<1)||(bs
>MAXBSIZE
))
317 printf("blocksize out of range - %s\n", argv
[2]);
320 nsresult rev
= NS_OK
;
321 nsICharsetDetector
*det
= nsnull
;
322 rev
= GetDetector(argv
[1], &det
);
323 if(NS_FAILED(rev
) || (nsnull
== det
) ){
325 printf("Invalid Detector - %s\n", argv
[1]);
326 printf("XPCOM ERROR CODE = %x\n", rev
);
329 nsICharsetDetectionObserver
*obs
= nsnull
;
330 rev
= GetObserver(&obs
);
331 if(NS_SUCCEEDED(rev
)) {
332 rev
= det
->Init(obs
);
336 printf("XPCOM ERROR CODE = %x\n", rev
);
340 printf("XPCOM ERROR CODE = %x\n", rev
);
345 PRBool done
= PR_FALSE
;
346 nsSimpleStatis
ks(0xb0,0xc8, (float)0.95952, "EUC-KR");
347 nsSimpleStatis
js(0xa4,0xa5, (float)0.45006, "EUC-JP");
348 nsStatis
* stat
[2] = {&ks
, &js
};
352 sz
= read(0, buf
, bs
);
355 #ifdef DEBUG_DetectCharset
356 printf("call DoIt %d\n",sz
);
358 rev
= det
->DoIt( buf
, sz
, &done
);
359 #ifdef DEBUG_DetectCharset
360 printf("DoIt return Done = %d\n",done
);
364 printf("XPCOM ERROR CODE = %x\n", rev
);
369 stat
[i
]->HandleData(buf
, sz
);
371 // } while((sz > 0) && (!done) );
375 #ifdef DEBUG_DetectCharset
376 printf("Done = %d\n",done
);
377 printf("call Done %d\n",sz
);
382 printf("XPCOM ERROR CODE = %x\n", rev
);
390 #ifdef DEBUG_DetectCharset
395 #ifdef DEBUG_DetectCharset