1 // Implementation of CStem phonology methods
2 // Copyright © 2009 The University of Chicago
6 #include "BiphoneCollection.h"
7 #include "PhoneCollection.h"
8 #include "WordCollection.h"
11 void CStem::ComputeProbabilities(CWordCollection
* Words
)
16 m_Tier2_LocalMI_Score
= 0;
17 m_Tier2_DistantMI_Score
= 0;
19 m_LocalMI_TotalBoltzmannScore
= 0;
20 m_DistantMI_TotalBoltzmannScore
= 0;
22 CPhone
* prevPhone
= 0;
23 for (int i
= 1; i
<= m_Phonology_Tier1
.Size(); ++i
) {
24 CPhone
* pPhone
= *Words
->GetPhones() ^=
25 m_Phonology_Tier1
.GetPiece(i
);
26 Q_ASSERT(pPhone
!= 0);
32 m_UnigramLogProb
+= pPhone
->m_LogFreq
;
33 CBiphone
* pBiphone
= Words
->GetPhones()->GetMyBiphones()
34 ->GetBiphone(prevPhone
, pPhone
);
38 m_BigramLogProb
+= pPhone
->m_LogFreq
- pBiphone
->m_MI
;
42 m_UnigramComplexity
= m_UnigramLogProb
/ (m_Phonology_Tier1
.Size()-1);
43 m_BigramComplexity
= m_BigramLogProb
/ (m_Phonology_Tier1
.Size()-1);
45 if (m_Phonology_Tier2
.Size() == 0)
48 for (int i
= 1; i
<= m_Phonology_Tier2
.Size(); ++i
) {
49 CPhone
* pPhone
= *Words
->GetPhones() ^=
50 m_Phonology_Tier2
.GetPiece(i
);
55 CBiphone
* pBiphone
= Words
->GetPhones_Tier2()->GetMyBiphones()
56 ->GetBiphone(prevPhone
, pPhone
);
60 m_Tier2_LocalMI_Score
+= pBiphone
->m_MI
;
64 m_LocalMI_TotalBoltzmannScore
= m_BigramLogProb
- m_Tier2_LocalMI_Score
;
65 m_Tier2_DistantMI_Score
= 0;
67 for (int i
= 1; i
<= m_Phonology_Tier2
.Size(); ++i
) {
68 CPhone
* pPhone
= *Words
->GetPhones() ^=
69 m_Phonology_Tier2
.GetPiece (i
);
70 for (int j
= i
+1; j
<= m_Phonology_Tier2
.Size(); ++j
) {
71 CPhone
* qPhone
= *Words
->GetPhones() ^=
72 m_Phonology_Tier2
.GetPiece (j
);
73 CBiphone
* pBiphone
= Words
->m_Phones_Tier2
->GetMyBiphones()
74 ->GetBiphone(pPhone
, qPhone
);
76 m_Tier2_DistantMI_Score
+= pBiphone
->m_MI
/ (j
-i
);
79 m_DistantMI_TotalBoltzmannScore
= m_BigramLogProb
-
80 m_Tier2_DistantMI_Score
;
83 void CStem::ComputeBoltzmannProbabilities(double Z
, double ZStar
)
85 // XXX. what if Z or Zstar == 0.0?
88 m_LocalMI_Plog
= m_LocalMI_TotalBoltzmannScore
+ log2(Z
);
91 m_DistantMI_Plog
= m_BigramLogProb
- m_Tier2_DistantMI_Score
+
95 void CStem::GetPhonogyTier1InfoForGraph(CWordCollection
* Words
)
98 CPhone
* pPhone
, *prevPhone
;
105 m_phonologies
.clear();
109 m_countofunigrams
= 0;
114 for (i
= 1; i
<= m_Phonology_Tier1
.Size(); i
++)
116 QString temp2
= m_Phonology_Tier1
.GetPiece (i
).Display();
117 pPhone
= *Words
->GetPhones() ^= m_Phonology_Tier1
.GetPiece (i
);
124 ugram
= pPhone
->m_LogFreq
;
126 pBiphone
= Words
->GetPhones()->GetMyBiphones()->GetBiphone (prevPhone
, pPhone
);
133 m_phonologies
.insert(m_countofunigrams
, temp2
);
134 m_unigrams
.insert(m_countofunigrams
, ugram
);
135 m_mis
.insert(m_countofmis
, mi
);
144 if ( ugram
> m_maxpositive
)
146 m_maxpositive
= ugram
;
152 if ( mi
> m_maxpositive
)
159 if ( mi
< m_maxnegative
)
168 m_donephonology
= true;
171 QString
CStem::GetProbabilityInformation()
173 return QString("\nUnigram log probability %1"
174 "\nUnigram complexity %2"
175 "\nBigram log probability %3"
176 "\nBigram complexity %4"
177 "\nTier 2 MI score %5"
178 "\nLocal tier 2 model score: %6"
179 "\nLocal tier 2 model log probability: %7"
180 "\nDistant tier 2 MI: %8"
181 "\nDistant tier 2 model score %9")
182 .arg(m_UnigramLogProb
)
183 .arg(m_UnigramComplexity
)
184 .arg(m_BigramLogProb
)
185 .arg(m_BigramComplexity
)
186 .arg(m_Tier2_LocalMI_Score
)
187 .arg(m_LocalMI_TotalBoltzmannScore
)
189 .arg(m_Tier2_DistantMI_Score
)
190 .arg(m_DistantMI_TotalBoltzmannScore
);
193 void CStem::SplitPhonologyToTiers(enum ePhonologySplitType Type
,
194 CParse
& PhonesToMove
)
196 const QString DummySymbol
= "*";
198 m_Phonology_Tier2
.ClearParse();
199 for (int i
= 1; i
<= m_Phonology_Tier1
.Size(); ++i
)
200 if (PhonesToMove
.Contains(m_Phonology_Tier1
.GetPiece(i
))) {
201 m_Phonology_Tier2
.Append(m_Phonology_Tier1
.GetPiece(i
));
203 if (Type
== Split_LeaveSlot
) {
204 CStringSurrogate dummy
= DummySymbol
;
205 m_Phonology_Tier1
.Replace(i
, dummy
);
210 // this is here specifically to do probabilistic tests on projections to C and V.
211 void CStem::CreateCVTemplate(CParse
* Vowels
)
213 QString
V ("V"), C("C"); QString
boundary ("#");
215 m_Phonology_Tier1_Skeleton
.ClearParse();
216 m_Phonology_Tier1_Skeleton
.Append ( boundary
);
218 CParse
VowelsAndAsterisk ( *Vowels
);
219 VowelsAndAsterisk
.Append ('*');
222 for (int i
= 2; i
< m_Phonology_Tier1
.Size(); i
++)
224 QString b
; b
= m_Phonology_Tier1
.GetPiece(i
).Display();
225 if (VowelsAndAsterisk
.Contains ( m_Phonology_Tier1
.GetPiece(i
) ) )
227 m_Phonology_Tier1_Skeleton
.Append ( V
);
231 m_Phonology_Tier1_Skeleton
.Append ( C
);
234 m_Phonology_Tier1_Skeleton
.Append ( boundary
);
237 void CStem::CreatePhonologyFromOrthography(eAddBoundarySymbols AddBoundaries
)
239 if (m_Phonology_Tier1
.GetKeyLength() > 0 ) return;
241 if (AddBoundaries
== BOUNDARIES
)
242 m_Phonology_Tier1
.Append(QChar('#'));
244 for (int i
= 0; i
< GetKeyLength(); ++i
)
245 m_Phonology_Tier1
.Append(CStringSurrogate(m_Key
, i
, 1));
247 if (AddBoundaries
== BOUNDARIES
)
248 m_Phonology_Tier1
.Append(QChar('#'));
251 void CStem::SetPhonology_Tier1(CParse
* PhonoRep
)
253 m_Phonology_Tier1
.Append(QChar('#'));
254 m_Phonology_Tier1
.Append(*PhonoRep
);
255 m_Phonology_Tier1
.Append(QChar('#'));