1 #ifndef __SOFTCOUNT_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __SOFTCOUNT_H__
4 #ifndef __DICTIONARY_H__
5 #include "dictionary.h"
20 #ifndef BOOST_SHARED_PTR_HPP_INCLUDED
21 #include <boost/shared_ptr.hpp>
24 #include <libsrilm/NgramStats.h>
26 typedef FloatCount NgramFractionalCount
;
28 class NgramFractionalStats
: public NgramCounts
<NgramFractionalCount
>
31 NgramFractionalStats(Vocab
&vocab
, unsigned int order
) :
32 NgramCounts
<NgramFractionalCount
>(vocab
, order
) {};
33 virtual ~NgramFractionalStats() {};
36 class NgramsFractionalIter
: public NgramCountsIter
<NgramFractionalCount
>
39 NgramsFractionalIter(NgramFractionalStats
&ngrams
, VocabIndex
*keys
, unsigned order
= 1,
40 int (*sort
)(VocabIndex
, VocabIndex
) = 0) :
41 NgramCountsIter
<NgramFractionalCount
>(ngrams
, keys
, order
, sort
) {};
42 NgramsFractionalIter(NgramFractionalStats
&ngrams
, const VocabIndex
*start
,
43 VocabIndex
*keys
, unsigned order
= 1,
44 int (*sort
)(VocabIndex
, VocabIndex
) = 0) :
45 NgramCountsIter
<NgramFractionalCount
>(ngrams
, start
, keys
, order
, sort
) {};
50 Based on the article "Discovering Chinese Words from Unsegmented Text".
51 The idea is that we count all possible words in a sentence with a fraction count
52 instead of just count the words of the best segmentation in the sentence (count=1)
58 void count(const Lattice
&words
,NgramFractionalStats
&stats
);