1 #include "wordnode.h" // -*- tab-width: 2 coding: viscii mode: c++ -*-
6 #include <boost/format.hpp>
9 static strid mainleaf_id
,caseleaf_id
;
10 std::map
<strid
,LeafNNode
*> LeafNNode::leaf_index
;
12 LeafNNode
* BranchNNode::get_leaf(strid leaf
) const
14 node_map::const_iterator iter
;
15 iter
= nodes
.find(leaf
);
16 if (iter
!= nodes
.end())
17 return((LeafNNode
*)iter
->second
.get());
22 void BranchNNode::get_leaves(std::vector
<LeafNNode
*> &_nodes
) const
24 const vector
<strid
> leaf_id
= warch
.get_leaf_id();
25 node_map::const_iterator iter
;
26 uint i
,n
= leaf_id
.size();
27 for (i
= 0;i
< n
;i
++) {
28 iter
= nodes
.find(leaf_id
[i
]);
29 if (iter
!= nodes
.end())
30 _nodes
.push_back((LeafNNode
*)iter
->second
.get());
34 void BranchNNode::get_branches(strid _id
,std::vector
<BranchNNode
*> &_nodes
) const
37 range
= nodes
.equal_range(_id
);
38 node_map::const_iterator iter
;
39 for (iter
= range
.first
;iter
!= range
.second
; ++iter
)
40 if (!iter
->second
->is_leaf())
41 _nodes
.push_back((BranchNNode
*)iter
->second
.get());
44 BranchNNode
* BranchNNode::get_branch(strid _id
) const
47 range
= nodes
.equal_range(_id
);
48 node_map::const_iterator iter
;
49 for (iter
= range
.first
;iter
!= range
.second
; ++iter
)
50 if (!iter
->second
->is_leaf())
51 return (BranchNNode
*)iter
->second
.get();
55 void BranchNNode::add(strid _id
,NNodeRef _branch
)
57 nodes
.insert(make_pair(_id
,_branch
));
60 BranchNNode
* BranchNNode::add_path(const std::vector
<strid
> &toks
)
62 uint i
,n
= toks
.size();
63 BranchNNode
*me
= this;
64 for (i
= 0;i
< n
;i
++) {
65 BranchNNode
*next
= me
->get_branch(toks
[i
]);
67 NNodeRef
branch(new BranchNNode());
68 me
->add(toks
[i
],branch
);
69 next
= (BranchNNode
*)branch
.get();
76 void WordArchive::init()
78 mainleaf_id
= sarch
["<mainleaf>"];
79 caseleaf_id
= sarch
["<caseleaf>"];
80 register_leaf(mainleaf_id
);
81 register_leaf(caseleaf_id
);
84 bool WordArchive::load(const char* filename
)
86 if (filename
!= NULL
) {
87 ifstream
ifs(filename
);
94 add_entry(word
.c_str());
95 add_case_entry(word
.c_str());
99 const lm_t
* lm
= get_ngram().get_lm();
100 for (int i
= 0;i
< lm
->ucount
;i
++) {
101 add_entry(lm
->word_str
[i
]);
102 add_case_entry(lm
->word_str
[i
]);
108 LeafNNode
* WordArchive::add_special_entry(strid tok
)
110 LeafNNode
*leaf
= new LeafNNode
;
111 NNodeRef
noderef(leaf
);
115 //leaf->set_mask(MAIN_LEAF);
116 get_root()->add(tok
,noderef
);
120 void WordArchive::add_entry(const char *w
)
123 const char *pos
,*start
;
127 buf
= (char *)malloc(len
+1);
128 vector
<VocabIndex
> syllables
;
130 pos
= strchr(start
,'_');
131 wlen
= pos
? pos
- start
: len
- (start
- w
);
132 memcpy(buf
,start
,wlen
);
134 VocabIndex id
= sarch
[buf
];
135 syllables
.push_back(id
);
140 vector
<strid
> path
= syllables
;
141 BranchNNode
* branch
= get_root()->add_path(path
);
142 NNodeRef
noderef(new LeafNNode
);
143 LeafNNode
*leaf
= (LeafNNode
*)noderef
.get();
144 //leaf->set_mask(MAIN_LEAF);
145 branch
->add(mainleaf_id
,noderef
);
146 leaf
->set_id(syllables
);
149 void WordArchive::add_case_entry(const char *w2
)
151 unsigned i
,same
,len
,wlen
;
152 const char *pos
,*start
;
156 w
= (char *)malloc(len
+1);
158 for (i
= 0;i
< len
;i
++) {
159 w
[i
] = (char)viet_tolower(w2
[i
]);
160 if (same
&& w
[i
] != w2
[i
])
168 buf
= (char *)malloc(len
+1);
169 vector
<VocabIndex
> syllables
,real_syllables
;
172 pos
= strchr(start
,'_');
173 wlen
= pos
? pos
- start
: len
- (start
- w
);
174 memcpy(buf
,start
,wlen
);
176 VocabIndex id
= sarch
[buf
];
177 syllables
.push_back(id
);
184 pos
= strchr(start
,'_');
185 wlen
= pos
? pos
- start
: len
- (start
- w2
);
186 memcpy(buf
,start
,wlen
);
188 VocabIndex id
= sarch
[buf
];
189 real_syllables
.push_back(id
);
194 vector
<strid
> path
= syllables
;
195 BranchNNode
* branch
= get_root()->add_path(path
);
196 NNodeRef
noderef(new LeafNNode
);
197 LeafNNode
*leaf
= (LeafNNode
*)noderef
.get();
198 //leaf->set_mask(CASE_LEAF);
199 branch
->add(caseleaf_id
,noderef
);
200 leaf
->set_id(real_syllables
);
203 void WordArchive::register_leaf(strid id
)
205 if (find(leaf_id
.begin(),leaf_id
.end(),id
) == leaf_id
.end())
206 leaf_id
.push_back(id
);
209 void LeafNNode::set_mask(uint maskval
,bool mask
)
217 void LeafNNode::set_id(const vector
<strid
> &_syllables
)
219 syllables
= _syllables
;
221 int i
,nr_syllables
= syllables
.size();
222 for (i
= 0;i
< nr_syllables
;i
++) {
225 word
+= sarch
[syllables
[i
]];
228 leaf_index
[id
] = this;
231 LeafNNode
* LeafNNode::find_leaf(const vector
<strid
> &syllables
)
234 int i
,nr_syllables
= syllables
.size();
235 for (i
= 0;i
< nr_syllables
;i
++) {
238 word
+= sarch
[syllables
[i
]];
240 strid id
= sarch
[word
];
241 map
<strid
,LeafNNode
*>::iterator iter
= leaf_index
.find(id
);
242 return iter
!= leaf_index
.end() ? iter
->second
: NULL
;
245 std::ostream
& operator << (std::ostream
&os
,const LeafNNode
&node
)
247 std::vector
<strid
> syll
;
248 node
.get_syllables(syll
);
249 os
<< boost::format("%04x %d") % node
.bitmask
% syll
.size();
250 for (std::vector
<strid
>::size_type i
= 0;i
< syll
.size();i
++) {
252 os
<< sarch
[syll
[i
]];
257 std::istream
& operator >> (std::istream
&is
,LeafNNode
* &node
)
259 std::vector
<strid
> syll
;
262 is
>> hex
>> bitmask
>> dec
>> n
;
264 for (std::vector
<strid
>::size_type i
= 0;i
< syll
.size();i
++) {
267 syll
[i
] = get_ngram()[s
];
269 node
= LeafNNode::find_leaf(syll
);