softcount: tolerate zero ngrams
[vspell.git] / libvspell / dictionary.h
blob0a7cc7b27d49c2667416063ffd83d0f45a917338
1 #ifndef __DICTIONARY_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __DICTIONARY_H__
4 #ifndef __STRING__
5 #include <string>
6 #endif
7 #ifndef __VECTOR__
8 #include <vector>
9 #endif
10 #ifndef __MAP__
11 #include <map>
12 #endif
13 extern "C" {
14 #include "lm/lm_3g.h"
16 #include "debug.h"
18 #define sarch get_ngram()
20 namespace Dictionary {
23 typedef int32 VocabIndex;
24 typedef int32 strid; // for easy migration
25 #ifndef __MYSTRING_H__
26 #include "mystring.h"
27 #endif
29 typedef struct {
30 strid id; // real id
31 strid cid; // case-insensitive comparision
32 } strpair;
35 class LM {
36 private:
37 lm_t *lm;
38 hash_t oov_ht;
39 std::vector<const char *> oov;
40 VocabIndex blocked;
42 public:
43 bool read(const char *f);
44 double wordProb(VocabIndex w1, const VocabIndex *wcontext);
45 const lm_t* get_lm() const { return lm; }
47 public:
48 LM();
49 ~LM();
50 void set_blocked(bool _blocked);
51 void clear_oov();
53 strid operator[] (const char* s);
54 strid operator[] (const std::string &s) { return (*this)[s.c_str()]; }
55 const char* operator[] (strid i);
56 bool in_dict(const char* s);
57 bool in_dict(const std::string &s) { return in_dict(s.c_str()); }
58 bool in_dict(strid i) { return i < lm3g_word_str_size(lm); }
60 void dump();
63 //strpair make_strpair(strid id);
65 double LogPtoProb(double v);
67 int viet_toupper(int ch);
68 int viet_tolower(int ch);
69 bool viet_isupper(int ch);
70 bool viet_islower(int ch);
71 bool viet_isalpha(int ch);
72 bool viet_isdigit (int ch);
73 bool viet_isxdigit(int ch);
74 bool viet_isspace(int ch);
75 bool viet_ispunct(int ch);
77 bool dic_init();
78 void dic_clean();
79 bool is_syllable_exist(strid);
80 bool is_syllable_exist(const std::string &syll);
81 float get_syllable(const std::string &syll);
82 bool is_word_exist(const std::string &word);
83 float get_word(const std::string &word);
85 #define UNK_ID 0
86 #define PUNCT_ID 1
87 #define PROPER_NAME_ID 2
88 #define START_ID 3
89 #define STOP_ID 4
90 #define POEM_ID 5
91 #define NUMBER_ID 6
92 #define LEAF_ID 7
93 #define TOTAL_ID 8
94 const std::map<strid,strid_string>& get_pnames();
95 LM& get_ngram();
96 LM& get_syngram();
97 strid get_id(int id);
99 inline bool is_syllable_exist(const std::string &syll) {
100 return is_syllable_exist(get_ngram()[syll]);
107 #endif