1 #include "wordnode.h" // -*- tab-width: 2 coding: viscii mode: c++ -*-
6 static strid mainleaf_id
,caseleaf_id
;
8 LeafNode
* BranchNode::get_leaf(strid leaf
) const
10 node_map::const_iterator iter
;
11 iter
= nodes
.find(leaf
);
12 if (iter
!= nodes
.end())
13 return((LeafNode
*)iter
->second
.get());
18 void BranchNode::get_leaves(std::vector
<LeafNode
*> &_nodes
) const
20 const vector
<strid
> leaf_id
= warch
.get_leaf_id();
21 node_map::const_iterator iter
;
22 uint i
,n
= leaf_id
.size();
23 for (i
= 0;i
< n
;i
++) {
24 iter
= nodes
.find(leaf_id
[i
]);
25 if (iter
!= nodes
.end())
26 _nodes
.push_back((LeafNode
*)iter
->second
.get());
30 void BranchNode::get_branches(strid _id
,std::vector
<BranchNode
*> &_nodes
) const
33 range
= nodes
.equal_range(_id
);
34 node_map::const_iterator iter
;
35 for (iter
= range
.first
;iter
!= range
.second
; ++iter
)
36 if (!iter
->second
->is_leaf())
37 _nodes
.push_back((BranchNode
*)iter
->second
.get());
40 BranchNode
* BranchNode::get_branch(strid _id
) const
43 range
= nodes
.equal_range(_id
);
44 node_map::const_iterator iter
;
45 for (iter
= range
.first
;iter
!= range
.second
; ++iter
)
46 if (!iter
->second
->is_leaf())
47 return (BranchNode
*)iter
->second
.get();
51 void BranchNode::add(strid _id
,NodeRef _branch
)
53 nodes
.insert(make_pair(_id
,_branch
));
56 BranchNode
* BranchNode::add_path(std::vector
<strid
> toks
)
58 uint i
,n
= toks
.size();
59 BranchNode
*me
= this;
60 for (i
= 0;i
< n
;i
++) {
61 BranchNode
*next
= me
->get_branch(toks
[i
]);
63 NodeRef
branch(new BranchNode());
64 me
->add(toks
[i
],branch
);
65 next
= (BranchNode
*)branch
.get();
72 WordArchive::WordArchive():root(new BranchNode
)
74 mainleaf_id
= sarch
["<mainleaf>"];
75 caseleaf_id
= sarch
["<caseleaf>"];
76 register_leaf(mainleaf_id
);
77 register_leaf(caseleaf_id
);
80 bool WordArchive::load(const char* filename
)
82 File
ifs(filename
,"rt");
89 int start
,len
,tmp_char
;
90 string::size_type pos
;
93 while ((line
= ifs
.getline())) {
98 str_pos
= strchr(line
+start
,'#');
102 pos
= str_pos
- line
;
103 tmp_char
= line
[pos
];
105 toks
.push_back(line
+start
);
106 line
[pos
] = tmp_char
;
111 continue; // unrecoverable error
113 toks
.push_back("N"); // assume N
115 toks
.push_back("0"); // assume 0
118 add_case_entry(toks
);
126 LeafNode
* WordArchive::add_special_entry(strid tok
)
128 LeafNode
*leaf
= new LeafNode
;
129 NodeRef
noderef(leaf
);
133 //leaf->set_mask(MAIN_LEAF);
134 get_root()->add(tok
,noderef
);
138 void WordArchive::add_entry(vector
<string
> toks
)
140 unsigned start
,len
,pos
;
142 len
= toks
[0].size();
143 vector
<VocabIndex
> syllables
;
144 while (start
< len
) {
145 pos
= toks
[0].find(' ',start
);
146 if (pos
== string::npos
)
148 string s
= toks
[0].substr(start
,pos
-start
);
149 VocabIndex id
= sarch
[get_std_syllable(s
)];
150 syllables
.push_back(id
);
154 vector
<strid
> path
= syllables
;
155 BranchNode
* branch
= get_root()->add_path(path
);
156 NodeRef
noderef(new LeafNode
);
157 LeafNode
*leaf
= (LeafNode
*)noderef
.get();
158 //leaf->set_mask(MAIN_LEAF);
159 branch
->add(mainleaf_id
,noderef
);
160 leaf
->set_id(syllables
);
164 void WordArchive::add_case_entry(vector
<string
> toks2
)
166 vector
<string
> toks
= toks2
;
167 unsigned start
,len
,pos
;
169 len
= toks
[0].size();
171 transform(toks
[0].begin(),toks
[0].end(),toks
[0].begin(),viet_tolower
);
173 if (toks
[0] == toks2
[0])
177 vector
<VocabIndex
> syllables
;
178 while (start
< len
) {
179 pos
= toks
[0].find(' ',start
);
180 if (pos
== string::npos
)
182 string s
= toks
[0].substr(start
,pos
-start
);
183 VocabIndex id
= sarch
[get_std_syllable(s
)];
184 syllables
.push_back(id
);
188 start
= 0; // real syllable
189 vector
<VocabIndex
> real_syllables
;
190 while (start
< len
) {
191 pos
= toks2
[0].find(' ',start
);
192 if (pos
== string::npos
)
194 string s
= toks2
[0].substr(start
,pos
-start
);
195 VocabIndex id
= sarch
[get_std_syllable(s
)];
196 real_syllables
.push_back(id
);
200 vector
<strid
> path
= syllables
;
201 BranchNode
* branch
= get_root()->add_path(path
);
202 NodeRef
noderef(new LeafNode
);
203 LeafNode
*leaf
= (LeafNode
*)noderef
.get();
204 //leaf->set_mask(CASE_LEAF);
205 branch
->add(caseleaf_id
,noderef
);
206 leaf
->set_id(real_syllables
);
209 void WordArchive::register_leaf(strid id
)
211 if (find(leaf_id
.begin(),leaf_id
.end(),id
) == leaf_id
.end())
212 leaf_id
.push_back(id
);
215 void LeafNode::set_mask(uint maskval
,bool mask
)
223 void LeafNode::set_id(const vector
<strid
> &_syllables
)
225 syllables
= _syllables
;
227 int i
,nr_syllables
= syllables
.size();
228 for (i
= 0;i
< nr_syllables
;i
++) {
231 word
+= sarch
[syllables
[i
]];
236 std::ostream
& operator << (std::ostream
&os
,const LeafNode
&node
)
238 std::vector
<strid
> syll
;
239 node
.get_syllables(syll
);
240 for (std::vector
<strid
>::size_type i
= 0;i
< syll
.size();i
++) {
243 os
<< sarch
[syll
[i
]] << "(" << syll
[i
] << ") ";