Shhh.. dont know what i did
[vspell.git] / wfst.h
blobd3392ff0cf941eb9f7deaa9c7dbb3f9dc92937a4
1 #ifndef __WFST_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __WFST_H__
4 #include <vector>
5 #include <string>
6 #include <iostream>
7 #include "dictionary.h"
9 #define SEGM_SEPARATOR 1
11 class Sentence
13 public:
14 class Syllable
16 private:
17 public:
18 int start;
19 Dictionary::strid id,cid;
20 //std::string::iterator start,end;
21 Sentence *sent_;
22 int category;
23 int span;
24 int sid;
26 Dictionary::strid get_id() const { return sid >= 0 ? sid : id; }
27 Dictionary::strid get_cid() const { return sid >= 0 ? sid : cid; }
30 private:
31 std::string sent_;
32 std::vector<Syllable> syllables;
33 friend class Syllable;
35 void tokenize_punctuation(const std::string &s,std::vector<std::string> &ss);
37 public:
38 Sentence(const std::string &st):sent_(st) {}
39 void set_sentence(const std::string &st) { sent_ = st; syllables.clear(); }
40 void tokenize();
41 void standardize();
42 int get_syllable_count() const { return syllables.size(); }
43 // void get_word_number() { return word.size(); }
44 Syllable& operator[] (int i) { return syllables[i]; }
45 Syllable operator[] (int i) const { return syllables[i]; }
46 // Syllable& operator[] (int i) { return syllables[i]; }
49 struct Segmentation
51 struct Item {
52 int flags; // Separator mark
53 int distance; // from ed() or fuzzy syllable
54 Dictionary::WordNodePtr state; // used to get prob.
56 Item():flags(0),distance(0),state(NULL) {}
59 std::vector<Item> items;
60 float prob; // total prob
61 int distance; // total distance
63 Segmentation():prob(0),distance(0) {}
64 void print(std::ostream &os,const Sentence &st);
67 struct WordInfo {
68 Dictionary::WordNode::DistanceNode exact_match;
69 std::vector<Dictionary::WordNode::DistanceNode> fuzzy_match;
71 typedef std::vector<WordInfo> WordInfos;
72 typedef std::vector<WordInfos> Words;
74 class WFST
76 protected:
77 Dictionary::WordNodePtr wl;
78 bool ngram_enabled;
80 public:
81 WFST():wl(NULL),ngram_enabled(false) {}
83 bool set_wordlist(Dictionary::WordNodePtr _wl) {
84 wl = _wl;
85 return true;
88 void enable_ngram(bool enable = true) { ngram_enabled = enable; }
90 void get_all_words(const Sentence &sent,
91 Words &words);
92 void segment_best(const Sentence &sent,
93 const std::vector<WordInfos> &words,
94 Segmentation &seps);
95 void segment_all(const Sentence &sent,
96 std::vector<Segmentation> &result);
97 private:
98 void segment_all1(const Sentence &sent,
99 const std::vector<WordInfos> &words,
100 int from,int to,
101 std::vector<Segmentation> &result);
105 #endif