libvspell/softcount.h

   1 #ifndef __SOFTCOUNT_H__ // -*- tab-width: 2 mode: c++ -*-
   2 #define __SOFTCOUNT_H__
   3
   4 #ifndef __DICTIONARY_H__
   5 #include "dictionary.h"
   6 #endif
   7
   8 #ifndef __WORDNODE_H__
   9 #include "wordnode.h"
  10 #endif
  11
  12 #ifndef __SPELL_H__
  13 #include "spell.h"
  14 #endif
  15
  16 #ifndef __VECTOR__
  17 #include <vector>
  18 #endif
  19
  20 #ifndef BOOST_SHARED_PTR_HPP_INCLUDED
  21 #include <boost/shared_ptr.hpp>
  22 #endif
  23
  24 #include <libsrilm/NgramStats.h>
  25
  26 typedef FloatCount NgramFractionalCount;
  27
  28 class NgramFractionalStats: public NgramCounts<NgramFractionalCount>
  29 {
  30 public:
  31   NgramFractionalStats(Vocab &vocab, unsigned int order) :
  32     NgramCounts<NgramFractionalCount>(vocab, order) {};
  33   virtual ~NgramFractionalStats() {};
  34 };
  35
  36 class NgramsFractionalIter: public NgramCountsIter<NgramFractionalCount>
  37 {
  38 public:
  39   NgramsFractionalIter(NgramFractionalStats &ngrams, VocabIndex *keys, unsigned order = 1,
  40                        int (*sort)(VocabIndex, VocabIndex) = 0) :
  41     NgramCountsIter<NgramFractionalCount>(ngrams, keys, order, sort) {};
  42   NgramsFractionalIter(NgramFractionalStats &ngrams, const VocabIndex *start,
  43                        VocabIndex *keys, unsigned order = 1,
  44                        int (*sort)(VocabIndex, VocabIndex) = 0) :
  45     NgramCountsIter<NgramFractionalCount>(ngrams, start, keys, order, sort) {};
  46 };
  47
  48 /**
  49    Soft counter.
  50    Based on the article "Discovering Chinese Words from Unsegmented Text".
  51    The idea is that we count all possible words in a sentence with a fraction count
  52    instead of just count the words of the best segmentation in the sentence (count=1)
  53  */
  54
  55 class SoftCounter
  56 {
  57 public:
  58         void count(const Lattice &words,NgramFractionalStats &stats);
  59 };
  60
  61 #endif