fixed bugs in Text::penalty2_construct(), Penalty2DAG::set_syllable_weights()
[vspell.git] / libvspell / dictionary.cpp
blobed892655c0097f033e97de0697d4c6012ef54bda
1 #include "config.h" // -*- tab-width: 2 -*-
2 #include <iostream>
3 #include <stdlib.h>
4 #include <libsrilm/File.h>
5 #include <fstream>
6 #include <algorithm>
7 #include <vector>
8 #include "dictionary.h"
9 #include "wordnode.h"
10 #include "distance.h"
11 #include "propername.h"
12 #include <math.h>
13 #include <iterator>
14 #ifndef _SArray_cc_
15 #include <libsrilm/SArray.cc>
16 #endif
18 #define ED_THRESOLD1 1
19 #define ED_THRESOLD2 2
21 using namespace std;
24 namespace Dictionary {
27 typedef SArray<strid,float> syllable_dict_type;
28 typedef SArrayIter<strid,float> syllable_dict_iterator;
29 static syllable_dict_type syllable_dict;
30 static strid special_ids[TOTAL_ID];
31 static LeafNode* special_leaves[TOTAL_ID];
33 #ifdef sarch
34 #undef sarch
35 #endif
37 static StringArchive sarch;
38 static Ngram ngram(sarch.get_dict(),3);
39 static Ngram syngram(sarch.get_dict(),2);
40 static map<strid,strid_string> pnames;
41 WordArchive warch;
43 bool syllable_init();
44 void viet_init();
46 bool dic_init()
48 viet_init();
49 syllable_init();
50 ed_init();
51 sarch["<reserved>"]; // 0, don't use
52 int i;
53 char *specials[TOTAL_ID] = {"<opaque>","<punct>","<prop>","<s>","</s>","<poem>","<digit>","<leaf>"};
54 for (i = 0;i < TOTAL_ID;i ++) {
55 special_ids[i] = sarch[specials[i]];
56 special_leaves[i] = warch.add_special_entry(special_ids[i]);
58 proper_name_init();
59 return true;
62 void StringArchive::dump()
64 FILE *fp = fopen("dic.dump","wt");
65 int i,n = dict.numWords();
66 for (i = 0;i < n;i ++)
67 fprintf(fp,"%d %s\n",i,sarch[i]);
68 fclose(fp);
71 void dic_clean()
75 bool is_syllable_exist(strid syll)
77 float* pprob = syllable_dict.find(syll);
78 return (pprob != NULL);
81 float get_syllable(strid syll)
83 float* pprob = syllable_dict.find(syll);
84 if(pprob == NULL)
85 return 0;
86 return *pprob;
89 bool is_word_exist(const std::string &word)
91 return false;
94 float get_word(const std::string &word)
96 return 0;
100 strid StringArchive::operator[] (VocabString s)
102 VocabIndex vi = dict.getIndex(s);
103 if (vi != Vocab_None)
104 return vi;
105 if (blocked) {
106 vi = rest->getIndex(s);
107 if (vi == Vocab_None) {
108 int i = rest->addWord(s)+dict.numWords();
109 //cerr << "New word " << s << " as " << i << endl;
110 return i;
112 return vi+dict.numWords();
114 return dict.addWord(s);
117 VocabString StringArchive::operator[] (strid i)
119 if (i >= dict.numWords())
120 return rest->getWord(i-dict.numWords());
121 return dict.getWord(i);
124 void StringArchive::set_blocked(bool _blocked)
126 blocked = _blocked;
127 if (blocked && rest == NULL)
128 rest = new Vocab;
129 if (!blocked && rest != NULL) {
130 delete rest;
131 rest = NULL;
135 void StringArchive::clear_rest()
137 if (rest) {
138 delete rest;
139 rest = new Vocab;
143 bool StringArchive::in_dict(VocabString s)
145 VocabIndex vi = dict.getIndex(s);
146 return vi != Vocab_None;
149 strpair make_strpair(strid str)
151 const char *s = sarch[str];
152 string st(s);
153 int i,len = st.size();
154 for (i = 0;i < len;i ++) {
155 st[i] = viet_tolower(st[i]);
157 strpair pair;
158 pair.id = str;
159 pair.cid = sarch[st];
160 return pair;
162 StringArchive& get_sarch()
164 return sarch;
167 Ngram& get_ngram()
169 return ngram;
172 Ngram& get_syngram()
174 return syngram;
177 strid get_id(int id)
179 if (id < TOTAL_ID)
180 return special_ids[id];
181 else
182 return special_ids[UNK_ID];
185 LeafNode* get_special_node(int id)
187 if (id < TOTAL_ID)
188 return special_leaves[id];
189 else
190 return special_leaves[UNK_ID];
193 const std::map<strid,strid_string>& get_pnames()
195 return pnames;