1 #ifndef __SOFTCOUNT_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __SOFTCOUNT_H__
4 #ifndef __DICTIONARY_H__
5 #include "dictionary.h"
23 #ifndef BOOST_SHARED_PTR_HPP_INCLUDED
24 #include <boost/shared_ptr.hpp>
27 #include <NgramStats.h>
29 typedef FloatCount NgramFractionalCount
;
31 class NgramFractionalStats
: public NgramCounts
<NgramFractionalCount
>
34 NgramFractionalStats(Vocab
&vocab
, unsigned int order
) :
35 NgramCounts
<NgramFractionalCount
>(vocab
, order
) {};
36 virtual ~NgramFractionalStats() {};
39 class NgramsFractionalIter
: public NgramCountsIter
<NgramFractionalCount
>
42 NgramsFractionalIter(NgramFractionalStats
&ngrams
, VocabIndex
*keys
, unsigned order
= 1,
43 int (*sort
)(VocabIndex
, VocabIndex
) = 0) :
44 NgramCountsIter
<NgramFractionalCount
>(ngrams
, keys
, order
, sort
) {};
45 NgramsFractionalIter(NgramFractionalStats
&ngrams
, const VocabIndex
*start
,
46 VocabIndex
*keys
, unsigned order
= 1,
47 int (*sort
)(VocabIndex
, VocabIndex
) = 0) :
48 NgramCountsIter
<NgramFractionalCount
>(ngrams
, start
, keys
, order
, sort
) {};
53 Based on the article "Discovering Chinese Words from Unsegmented Text".
54 The idea is that we count all possible words in a sentence with a fraction count
55 instead of just count the words of the best segmentation in the sentence (count=1)
61 void count(const Lattice
&words
,NgramFractionalStats
&stats
);
62 void count(const DAG
&words
,NgramFractionalStats
&stats
);
63 void count(const DAG
&words
,NgramStats
&stats
);