1 #include "config.h" // -*- tab-width: 2 -*-
4 #include <libsrilm/File.h>
8 #include "dictionary.h"
11 #include "propername.h"
15 #include <libsrilm/SArray.cc>
18 #define ED_THRESOLD1 1
19 #define ED_THRESOLD2 2
24 namespace Dictionary {
27 typedef SArray
<strid
,float> syllable_dict_type
;
28 typedef SArrayIter
<strid
,float> syllable_dict_iterator
;
29 static syllable_dict_type syllable_dict
;
30 static strid special_ids
[TOTAL_ID
];
31 static LeafNode
* special_leaves
[TOTAL_ID
];
37 static StringArchive sarch
;
38 static Ngram
ngram(sarch
.get_dict(),3);
39 static Ngram
syngram(sarch
.get_dict(),2);
40 static map
<strid
,strid_string
> pnames
;
51 sarch
["<reserved>"]; // 0, don't use
53 char *specials
[TOTAL_ID
] = {"<opaque>","<punct>","<prop>","<s>","</s>","<poem>","<digit>","<leaf>"};
54 for (i
= 0;i
< TOTAL_ID
;i
++) {
55 special_ids
[i
] = sarch
[specials
[i
]];
56 special_leaves
[i
] = warch
.add_special_entry(special_ids
[i
]);
62 void StringArchive::dump()
64 FILE *fp
= fopen("dic.dump","wt");
65 int i
,n
= dict
.numWords();
66 for (i
= 0;i
< n
;i
++)
67 fprintf(fp
,"%d %s\n",i
,sarch
[i
]);
75 bool is_syllable_exist(strid syll
)
77 float* pprob
= syllable_dict
.find(syll
);
78 return (pprob
!= NULL
);
81 float get_syllable(strid syll
)
83 float* pprob
= syllable_dict
.find(syll
);
89 bool is_word_exist(const std::string
&word
)
94 float get_word(const std::string
&word
)
100 strid
StringArchive::operator[] (VocabString s
)
102 VocabIndex vi
= dict
.getIndex(s
);
103 if (vi
!= Vocab_None
)
106 vi
= rest
->getIndex(s
);
107 if (vi
== Vocab_None
) {
108 int i
= rest
->addWord(s
)+dict
.numWords();
109 //cerr << "New word " << s << " as " << i << endl;
112 return vi
+dict
.numWords();
114 return dict
.addWord(s
);
117 VocabString
StringArchive::operator[] (strid i
)
119 if (i
>= dict
.numWords())
120 return rest
->getWord(i
-dict
.numWords());
121 return dict
.getWord(i
);
124 void StringArchive::set_blocked(bool _blocked
)
127 if (blocked
&& rest
== NULL
)
129 if (!blocked
&& rest
!= NULL
) {
135 void StringArchive::clear_rest()
143 bool StringArchive::in_dict(VocabString s
)
145 VocabIndex vi
= dict
.getIndex(s
);
146 return vi
!= Vocab_None
;
149 strpair
make_strpair(strid str
)
151 const char *s
= sarch
[str
];
153 int i
,len
= st
.size();
154 for (i
= 0;i
< len
;i
++) {
155 st
[i
] = viet_tolower(st
[i
]);
159 pair
.cid
= sarch
[st
];
162 StringArchive
& get_sarch()
180 return special_ids
[id
];
182 return special_ids
[UNK_ID
];
185 LeafNode
* get_special_node(int id
)
188 return special_leaves
[id
];
190 return special_leaves
[UNK_ID
];
193 const std::map
<strid
,strid_string
>& get_pnames()