softcount: tolerate zero ngrams
[vspell.git] / libvspell / wfst.h
blob61fbeb9b5f0b17b137deaba3c2d4834c7ab969c1
1 #ifndef __WFST_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __WFST_H__
4 #ifndef __DICTIONARY_H__
5 #include "dictionary.h"
6 #endif
8 #ifndef __WORDNODE_H__
9 #include "wordnode.h"
10 #endif
12 #ifndef __SPELL_H__
13 #include "spell.h"
14 #endif
16 #ifndef __VECTOR__
17 #include <vector>
18 #endif
20 #ifndef BOOST_SHARED_PTR_HPP_INCLUDED
21 #include <boost/shared_ptr.hpp>
22 #endif
24 #define SEGM_SEPARATOR 1
25 #define NGRAM_LENGTH 2
27 struct Section {
28 uint segment;
29 uint start;
30 uint len;
31 void segment_best(const Lattice&,Segmentation &final_seg);
34 class Sections: public std::vector<Section> {
35 public:
36 Sentence const * st; /// This is used for operator<< only
37 void construct(const Lattice &words);
38 friend std::ostream& operator << (std::ostream &os,const Sections &s);
42 /**
43 Segmentor takes a Sentence, a Lattice and a range, then try to generate
44 all possible Segmentation.
47 class Segmentor
49 private:
50 struct Trace
52 Segmentation s;
53 int next_syllable;
54 Trace(boost::shared_ptr<WordEntries> _we):s(_we),next_syllable(0) {}
56 int nr_syllables;
57 std::vector<Trace> segs;
58 Lattice const *_words;
59 int from,to;
61 public:
62 void init(const Lattice &words,
63 int from,
64 int to);
65 bool step(Segmentation &seg);
66 void done();
69 class WFST
71 protected:
72 bool ngram_enabled;
74 public:
75 WFST():ngram_enabled(false) {}
77 void enable_ngram(bool enable = true) { ngram_enabled = enable; }
79 void segment_best(const Lattice &words,Segmentation &seps);
80 void segment_best_no_fuzzy(const Lattice &words,Segmentation &seps);
81 void segment_all(const Sentence &sent,std::vector<Segmentation> &result);
83 //private:
84 public: // for testing purpose
85 void generate_misspelled_words(const std::vector<uint> &pos,
86 int len,
87 Segmentation& final_seg);
88 // variables needed when run wfst
89 Lattice const *p_words;
92 #endif