1 // Implementation of the cMT class
2 // Copyright © 2009 The University of Chicago
6 #include <Q3TextStream>
8 #include <Q3SortedList>
10 #include "cMTModel2Norm.h"
11 #include "cMTModel1.h"
16 //////////////////////////////////////////////////////////////////////
17 // Construction/Destruction
18 //////////////////////////////////////////////////////////////////////
20 cMT::cMT(LinguisticaMainWindow
* parent
, QString projectDirectory
)
22 m_projectDirectory(projectDirectory
),
26 m_MTLog("LinguisticaMTLog.txt") { }
35 void cMT::readTrainingCorpus()
38 m_Volca
= new mTVolca(this, m_projectDirectory
);
40 m_Volca
->initVolList();
42 m_Volca
->readSentences();
44 m_Volca
->setFastSearchPairsForT();
46 QMessageBox::information(NULL
, "Status", QString("Words # in language 1 is %1!").arg(m_Volca
->m_language1TotalWords
), "OK");
47 QMessageBox::information(NULL
, "Status", QString("Words # in language 2 is %1!").arg(m_Volca
->m_language2TotalWords
), "OK");
51 void cMT::trainModel1(int model1Iterations
)
54 m_Model1
= new cMTModel1(this, model1Iterations
);
58 m_Model1
->EMLoops(m_Model1
->m_Iterations
);
60 QMessageBox::information(NULL
, "Status", "Logging T After Model1...!", "OK");
62 QMessageBox::information(NULL
, "Status", "Done logging T After Model1...!", "OK");
67 void cMT::logTAfterModel1()
71 QString language1Word
;
72 QString language2Word
;
74 IntToIntToDouble::iterator IntToIntToDoubleIt
;
76 IntToDouble::iterator IntToDoubleIt
;
77 mTSortedList sortPlatForm
;
78 mTForSortingItem
* oneSortItem
;
79 int outputTopLimit
= 20;
81 QFile
file( m_MTLog
);
83 if ( !file
.open( QIODevice::WriteOnly
| QIODevice::Append
) )
85 QMessageBox::information(NULL
, "Error", "Can't Open the MT Log file!", "OK");
89 Q3TextStream
outf( &file
);
91 outf
<< "******T Tables After Model1******" << endl
<<endl
;
94 sortPlatForm
.setAutoDelete(TRUE
);
96 for ( IntToIntToDoubleIt
= (m_Model1
->m_T
).begin(); IntToIntToDoubleIt
!= (m_Model1
->m_T
).end(); IntToIntToDoubleIt
++)
98 language1WordId
= IntToIntToDoubleIt
.key();
99 language1Word
= (m_Volca
->m_language1WordIndex
)[language1WordId
] ;
100 outf
<< language1Word
<< " :"<<endl
;
102 oneList
= IntToIntToDoubleIt
.data();
104 for (IntToDoubleIt
= oneList
->begin(); IntToDoubleIt
!= oneList
->end(); IntToDoubleIt
++)
106 language2WordId
= IntToDoubleIt
.key();
107 language2Word
= (m_Volca
->m_language2WordIndex
)[language2WordId
] ;
109 TValue
= IntToDoubleIt
.data();
111 oneSortItem
= new mTForSortingItem(language2Word
, TValue
, 1);
113 sortPlatForm
.append(oneSortItem
);
119 for ( oneSortItem
=sortPlatForm
.first(); oneSortItem
!= 0; oneSortItem
=sortPlatForm
.next())
123 language2Word
= oneSortItem
->m_name
;
124 TValue
= oneSortItem
->m_doubleValue
;
126 outf
<<" "<< outputIndex
<<" : " <<language2Word
<< " " << TValue
<<endl
;
128 if ( outputIndex
>= outputTopLimit
) break;
132 sortPlatForm
.clear();
141 void cMT::trainModel2Norm(int model2Iterations
, bool getTFromModel1
)
144 m_Model2Norm
= new cMTModel2Norm(this, model2Iterations
, getTFromModel1
);
146 m_Model2Norm
->initTandA();
148 m_Model2Norm
->EMLoops(m_Model2Norm
->m_Iterations
);
150 QMessageBox::information(NULL
, "Status", "Doing Viterbi for Model2Norm...", "OK");
152 m_Model2Norm
->viterbiAll();
154 QMessageBox::information(NULL
, "Status", "Logging TandA After Model2Norm...!", "OK");
156 logTandAAfterModel2Norm();
158 QMessageBox::information(NULL
, "Status", "Done logging TandA After Model2Norm...!", "OK");
163 void cMT::logTandAAfterModel2Norm()
167 QString language1Word
;
168 QString language2Word
;
169 int language1ChunkId
;
170 int language2ChunkId
;
171 QString language1ChunkStr
;
172 QString language2ChunkStr
;
175 IntToIntToDouble::iterator IntToIntToDoubleIt
;
176 IntToDouble
* oneList
;
177 IntToDouble::iterator IntToDoubleIt
;
178 mTSortedList sortPlatForm
;
179 mTForSortingItem
* oneSortItem
;
180 int outputTopLimit
= 20;
182 QFile
file( m_MTLog
);
184 if ( !file
.open( QIODevice::WriteOnly
| QIODevice::Append
) )
186 QMessageBox::information(NULL
, "Error", "Can't Open the MT Log file!", "OK");
190 Q3TextStream
outf( &file
);
193 outf
<< "******T Tables After Model2Norm******" << endl
<<endl
;
196 sortPlatForm
.setAutoDelete(TRUE
);
198 for ( IntToIntToDoubleIt
= (m_Model2Norm
->m_T
).begin(); IntToIntToDoubleIt
!= (m_Model2Norm
->m_T
).end(); IntToIntToDoubleIt
++)
200 language1WordId
= IntToIntToDoubleIt
.key();
201 language1Word
= (m_Volca
->m_language1WordIndex
)[language1WordId
] ;
202 outf
<< language1Word
<< " :"<<endl
;
204 oneList
= IntToIntToDoubleIt
.data();
206 for (IntToDoubleIt
= oneList
->begin(); IntToDoubleIt
!= oneList
->end(); IntToDoubleIt
++)
208 language2WordId
= IntToDoubleIt
.key();
209 language2Word
= (m_Volca
->m_language2WordIndex
)[language2WordId
] ;
211 TValue
= IntToDoubleIt
.data();
213 oneSortItem
= new mTForSortingItem(language2Word
, TValue
, 1);
215 sortPlatForm
.append(oneSortItem
);
221 for ( oneSortItem
=sortPlatForm
.first(); oneSortItem
!= 0; oneSortItem
=sortPlatForm
.next())
225 language2Word
= oneSortItem
->m_name
;
226 TValue
= oneSortItem
->m_doubleValue
;
228 outf
<<" "<< outputIndex
<<" : " <<language2Word
<< " " << TValue
<<endl
;
230 if ( outputIndex
>= outputTopLimit
) break;
234 sortPlatForm
.clear();
238 outf
<< "******A Tables After Model2Norm******" << endl
<<endl
;
241 sortPlatForm
.setAutoDelete(TRUE
);
243 for ( IntToIntToDoubleIt
= (m_Model2Norm
->m_A
).begin(); IntToIntToDoubleIt
!= (m_Model2Norm
->m_A
).end(); IntToIntToDoubleIt
++)
245 language2ChunkId
= IntToIntToDoubleIt
.key();
246 language2ChunkStr
= QString("%1").arg(language2ChunkId
);
247 outf
<< language2ChunkStr
<< " :"<<endl
;
249 oneList
= IntToIntToDoubleIt
.data();
251 for (IntToDoubleIt
= oneList
->begin(); IntToDoubleIt
!= oneList
->end(); IntToDoubleIt
++)
253 language1ChunkId
= IntToDoubleIt
.key();
254 language1ChunkStr
= QString("%1").arg(language1ChunkId
);
256 AValue
= IntToDoubleIt
.data();
258 oneSortItem
= new mTForSortingItem(language1ChunkStr
, AValue
, 1);
260 sortPlatForm
.append(oneSortItem
);
266 outputTopLimit
= 100;
267 for ( oneSortItem
=sortPlatForm
.first(); oneSortItem
!= 0; oneSortItem
=sortPlatForm
.next())
271 language1ChunkStr
= oneSortItem
->m_name
;
272 AValue
= oneSortItem
->m_doubleValue
;
274 outf
<<" " <<language1ChunkStr
<< " " << AValue
<<endl
;
276 if ( outputIndex
>= outputTopLimit
) break;
280 sortPlatForm
.clear();
284 // Log Viterbi Alignments after model 2
285 outf
<< "******Viterbi Alignments After Model2Norm******" << endl
<<endl
;
290 IntToInt
* oneLan1Sentence
;
291 IntToInt
* oneLan2Sentence
;
292 IntToInt
* oneAlignment
;
293 int language1SentenceLen
;
294 int language2SentenceLen
;
302 for ( i
=0; i
< myVolca
->m_countOfSentences
; i
++)
304 outf
<<"Sentence " << i
<< " : " << endl
;
306 oneLan1Sentence
= myVolca
->m_language1Sentences
[i
];
307 oneLan2Sentence
= myVolca
->m_language2Sentences
[i
];
309 language1SentenceLen
= oneLan1Sentence
->size();
310 language2SentenceLen
= oneLan2Sentence
->size();
312 // Output sentence in lan1
313 outf
<< "Sentence 1 -> ";
314 for ( l
=0; l
< language1SentenceLen
; l
++)
316 language1WordId
= (*oneLan1Sentence
)[l
];
317 oneWordStr
= (m_Volca
->m_language1WordIndex
)[language1WordId
] ;
318 outf
<< l
<< " : " << oneWordStr
<< " ";
323 // Output sentence in lan2
324 outf
<< "Sentence 2 -> ";
325 for ( m
=0; m
< language2SentenceLen
; m
++)
327 language2WordId
= (*oneLan2Sentence
)[m
];
328 oneWordStr
= (m_Volca
->m_language2WordIndex
)[language2WordId
] ;
329 outf
<< m
<< " : " << oneWordStr
<< " ";
334 // Output the alignment
335 outf
<< "Alignments are language2Word -> language1Word" << endl
;
336 oneAlignment
= (myVolca
->m_sentenceAlignments
)[i
];
338 for ( m
=0; m
< language2SentenceLen
; m
++)
340 language2WordId
= (*oneLan2Sentence
)[m
];
341 twoWordStr
= (m_Volca
->m_language2WordIndex
)[language2WordId
] ;
343 language1WordId
= (*oneLan1Sentence
)[(*oneAlignment
)[m
]];
344 oneWordStr
= (m_Volca
->m_language1WordIndex
)[language1WordId
] ;
346 outf
<< " " << twoWordStr
<< " --> " << oneWordStr
<< endl
;