1 // Implementation of AffixAlignment, SignatureAlignment methods
2 // Copyright © 2009 The University of Chicago
3 #include "Allomorphy.h"
7 #include <QtAlgorithms>
9 #include "StringSurrogate.h"
11 #include "StringFunc.h"
14 //---------------------------------------------------------------------//
16 QStringList m_stringlist
;
18 LxStringList(CParse
*);
19 void SortBySizeRightAlphabetical();
20 int size() { return m_stringlist
.size(); }
21 QString
at(int stringno
);
22 QString
operator[](int i
){ return m_stringlist
[i
];};
23 QString
LongestCommonPrefix();
25 bool contains (QString string
) { return m_stringlist
.contains(string
); }
29 //---------------------------------------------------------------------//
30 LxStringList::LxStringList(CParse
* parse
)
32 parse
->CreateQStringList (m_stringlist
);
33 for (int affixno
= 0; affixno
< m_stringlist
.size(); affixno
++)
35 if (m_stringlist
[affixno
]=="NULL")
36 m_stringlist
[affixno
].clear();
43 //---------------------------------------------------------------------//
44 // str1 is ranked before str2 if str1 is longer than str2; if they are the same length, then if it is alphabetically prior,
45 // scanning both strings from right to left.
46 bool comparefunction1(QString str1
, QString str2
){
47 if (str1
.length() > str2
.length() ) return TRUE
;
48 if (str2
.length() > str2
.length() ) return FALSE
;
49 for (int i
= str1
.length()-1; i
>= 0 ; i
--)
51 if (str1
[i
] < str2
[i
] ) return TRUE
;
52 if (str1
[i
] > str2
[i
] ) return FALSE
;
56 //---------------------------------------------------------------------//
57 void LxStringList::SortBySizeRightAlphabetical()
59 qSort(m_stringlist
.begin(), m_stringlist
.end(), comparefunction1
);
61 //---------------------------------------------------------------------//
67 //---------------------------------------------------------------------//
68 QString
LxStringList::at(int stringno
)
70 return m_stringlist
[stringno
];
73 //---------------------------------------------------------------------//
74 bool StringSubtractFromRight(QString string1
, QString string2
, QString
& difference
)
76 // If string2 is not a suffix of string1, return FALSE;
77 // Else, difference = string1.left (string1.length - string2.length )
79 if ( string1
.endsWith(string2
) ){
80 difference
= string1
.left( string1
.length() - string2
.length() );
87 //---------------------------------------------------------------------//
88 QString
LxStringList::LongestCommonPrefix()
90 QString prefix
= m_stringlist
[0];
91 prefixlength
= prefix
.length();
93 for (int morphno
= 1; morphno
< m_stringlist
.size(); morphno
++)
95 QString morph
= m_stringlist
[morphno
];
96 int morphlength
= morph
.length();
98 for (letterno
= 0; letterno
< morph
.length() && letterno
< prefix
.length(); letterno
++)
100 if ( prefix
[letterno
] == morph
[letterno
] ) continue;
101 if ( letterno
== 0) { prefix
.clear(); return prefix
; }
103 if (letterno
== morphlength
-1) // so match is as good as it can get...
105 prefix
= prefix
.left(letterno
+1);
106 prefixlength
= prefix
.length();
108 else if (letterno
== prefixlength
-1) // also, match is as good as it can get
112 else // we have a non-zero match with this morph; we'll note that and go on to next morph.
114 prefix
= prefix
.left(letterno
+1);
115 prefixlength
= prefix
.length();
117 } // end of loop over morphs in StringList
120 //---------------------------------------------------------------------//
121 void LxStringList::Remove(QString string
)
123 m_stringlist
.remove(string
);
128 //---------------------------------------------------------------------//
129 AffixAlignment::AffixAlignment(QString Affix1
, QString Affix2
)
130 : m_OriginalAffix1(), m_OriginalAffix2(),
131 m_Affix1(Affix1
), m_Affix2(Affix2
),
132 m_Margin1(), m_Margin2(),
133 m_Shift1(), m_Shift2(),
134 m_Status(Affix1
== Affix2
? IDENTICAL
: DIFFERENT
),
135 m_Agreement_unigram(0.0),
136 m_Agreement_bigram(0.0),
137 m_Disagreement_unigram(0.0),
138 m_Disagreement_bigram(0.0) { }
140 AffixAlignment::AffixAlignment(QString Margin1
, QString Affix1
,
141 QString Margin2
, QString Affix2
)
142 : m_OriginalAffix1(), m_OriginalAffix2(),
143 m_Affix1(Affix1
), m_Affix2(Affix2
),
144 m_Margin1(Margin1
), m_Margin2(Margin2
),
145 m_Shift1(), m_Shift2(),
146 m_Status(Affix1
== Affix2
? IDENTICAL
: DIFFERENT
),
147 m_Agreement_unigram(0.0),
148 m_Agreement_bigram(0.0),
149 m_Disagreement_unigram(0.0),
150 m_Disagreement_bigram(0.0) { }
151 //---------------------------------------------------------------------//
153 //---------------------------------------------------------------------//
154 CSignatureAlignment::CSignatureAlignment(CSignature
* Sig1
, CSignature
* Sig2
)
155 : m_SigPtr1(Sig1
), m_SigPtr2(Sig2
),
157 m_Sig1AlignedAffixes(), m_Sig2AlignedAffixes()
158 { LxStringList
TempList1(Sig1
), TempList2(Sig2
);
159 int smallersize
= TempList1
.size();
160 if (TempList2
.size() < smallersize
)
162 smallersize
= TempList2
.size();
165 int sig1length
=0, sig2length
= 0;
166 for (int affixno
= 0; affixno
< smallersize
; affixno
++)
168 sig1length
+= TempList1
[affixno
].length();
169 sig2length
+= TempList2
[affixno
].length();
171 if (sig1length
<= sig2length
)
181 //---------------------------------------------------------------------//
182 bool CSignatureAlignment::FindWhetherOneIsSuffixOfTheOther()
184 // First, sort both by Size and Right-side alphabetical:
186 // We don't need to keep these local variables here, now that the signatures are kept in the SigAlignment. Fix? JG.
187 LxStringList
* ShorterSig
, *LongerSig
;
188 QString suffix1
, suffix2
, ShorterSuffix
, LongerSuffix
;
190 LxStringList
SigList1 (m_SigPtr1
);
191 LxStringList
SigList2 (m_SigPtr2
);
192 SigList1
.SortBySizeRightAlphabetical();
193 SigList2
.SortBySizeRightAlphabetical();
194 suffix1
= SigList1
.at(0);
195 suffix2
= SigList2
.at(0);
196 int size
= SigList1
.size();
197 if (suffix1
.length() == suffix2
.length()) {return FALSE
;}
198 else if (suffix1
.length() < suffix2
.length()) {
199 LongerSig
= &SigList2
;
200 ShorterSig
= &SigList1
;
202 m_LongerSig
= m_SigPtr2
;
203 m_ShorterSig
= m_SigPtr1
;
205 LongerSuffix
= suffix2
;
206 ShorterSuffix
= suffix1
;
208 LongerSig
= &SigList1
;
209 ShorterSig
= &SigList2
;
211 m_LongerSig
= m_SigPtr1
;
212 m_ShorterSig
= m_SigPtr2
;
214 LongerSuffix
= suffix1
;
215 ShorterSuffix
= suffix2
;
219 bool b_diff
= StringSubtractFromRight(LongerSuffix
, ShorterSuffix
,m_difference
);
220 if (b_diff
== FALSE
) {
223 // this checks that the difference between the corresponding suffixes is identical across the signature's suffixes:
224 for (int suffixno
= 1; suffixno
< size
; suffixno
++){
225 if ( LongerSig
->at(suffixno
) != m_difference
+ ShorterSig
->at(suffixno
) )
236 void CSignatureAlignment::FindBestAlignment()
239 QString Morph
, OtherMorph
, MarginPiece
;
240 AffixAlignment
* pAlign
;
241 CParse Margins1
, Margins2
, Suffixes1
, Suffixes2
;
242 int morphlength
, othermorphlength
;
243 CSS ssMorph
, ssOtherMorph
;
245 LxStringList
SigList1 (m_SigPtr1
);
246 LxStringList
SigList2 (m_SigPtr2
);
247 SigList1
.SortBySizeRightAlphabetical();
248 SigList2
.SortBySizeRightAlphabetical();
250 QMap
<QString
,int> AffixesMatched1
, AffixesMatched2
, MarginsFound1
, MarginsFound2
;
253 //-------------------------------- Step 1 --------------------------------------------------------//
254 // first, look for identical affixes in the sigs;
255 for (int i
= 0; i
< SigList1
.size(); i
++)
258 if (AffixesMatched1
.contains(Morph
) ) continue;
259 for (int j
= 0; j
< SigList2
.size(); j
++)
261 OtherMorph
= SigList2
[j
];
262 if ( AffixesMatched2
.contains(OtherMorph
) ) continue;
263 if (Morph
==OtherMorph
)
265 if (Morph
.length() == 0) { continue; }
266 pAlign
= new AffixAlignment (Morph
, Morph
);
267 m_AffixAlignments
.append( pAlign
);
268 AffixesMatched1
[Morph
] = 1;
269 AffixesMatched2
[Morph
] = 1;
275 //-------------------------------- Step 2 --------------------------------------------------------//
276 // now look for non-identical but end-matching affix pairs, like "es/s"
277 // Don't consider any suffixes which have been identified in Step 1 above (where there was an identical homolog)
279 for (int i
= 0; i
< SigList1
.size(); i
++)
282 if (AffixesMatched1
.contains(Morph
) ) continue;
283 morphlength
= Morph
.length();
284 if (morphlength
== 0) continue;
285 for (int j
= 0; j
< SigList2
.size(); j
++)
287 OtherMorph
= SigList2
[j
];
288 if ( AffixesMatched2
.contains(OtherMorph
) ) continue;
289 othermorphlength
= OtherMorph
.length();
290 if (morphlength
== 0) continue;
291 if ( OtherMorph
.right( morphlength
) == Morph
)
293 MarginPiece
= OtherMorph
.left(othermorphlength
- morphlength
);
294 pAlign
= new AffixAlignment (TheStringNULL
, Morph
,
295 MarginPiece
, Morph
);
296 m_AffixAlignments
.append(pAlign
);
297 AffixesMatched1
[Morph
] = 1;
298 AffixesMatched2
[OtherMorph
] = 1;
299 MarginsFound1
[TheStringNULL
] = 1;
300 MarginsFound2
[MarginPiece
] = 1;
303 if (morphlength
>0 && othermorphlength
> 0 && Morph
.right(othermorphlength
) == OtherMorph
)
305 MarginPiece
= Morph
.left( morphlength
- othermorphlength
);
306 pAlign
= new AffixAlignment ( MarginPiece
, OtherMorph
,
307 TheStringNULL
, OtherMorph
);
308 m_AffixAlignments
.append(pAlign
);
309 AffixesMatched1
[Morph
] = 1;
310 AffixesMatched2
[OtherMorph
] = 1;
311 MarginsFound1
[MarginPiece
] = 1;
312 MarginsFound2
[TheStringNULL
] =1 ;
318 //-------------------------------- Step 3 --------------------------------------------------------//
319 // if one of the signatures has an X in its margin region, and it also has an X as an affix and the other
320 // signature has a NULL, then we can align the X and the NULL:
322 //if (m_SigPtr1->ContainsNULL())
323 if (SigList1
.contains("") )
325 for (int suffixno
= 0; suffixno
< SigList2
.size(); suffixno
++)
327 Morph
= SigList2
[suffixno
];
329 if (Morph
.length() == 0) continue;
330 if ( AffixesMatched2
.contains(Morph
) ) continue;
331 if ( MarginsFound2
.contains( Morph
) )
333 pAlign
= new AffixAlignment ( TheStringNULL
, TheStringNULL
, Morph
, TheStringNULL
);
334 m_AffixAlignments
.append(pAlign
);
335 AffixesMatched1
[""] = 1;
336 AffixesMatched2
[Morph
];
341 if (SigList2
.contains("") && ! AffixesMatched2
.contains("") )
343 for (int suffixno
= 0; suffixno
< SigList1
.size(); suffixno
++)
345 Morph
= SigList1
[suffixno
] ;
346 if (Morph
.length() == 0) continue;
347 if (AffixesMatched1
.contains(Morph
) ) continue;
348 if ( MarginsFound1
.contains( Morph
) )
350 pAlign
= new AffixAlignment ( Morph
, TheStringNULL
, TheStringNULL
, TheStringNULL
);
351 m_AffixAlignments
.append(pAlign
);
352 AffixesMatched2
[""] = 1;
353 AffixesMatched1
[Morph
] = 1;
358 //-------------------------------- Step 4 --------------------------------------------------------//
360 // match NULL against NULL
361 if (SigList1
.contains("") && ! AffixesMatched1
.contains("") && SigList2
.contains("") && !AffixesMatched2
.contains("") )
363 pAlign
= new AffixAlignment ( TheStringNULL
, TheStringNULL
, TheStringNULL
, TheStringNULL
);
364 m_AffixAlignments
.append(pAlign
);
365 //MarginsFound1[TheStringNULL] = 1 ;
366 AffixesMatched1
[TheStringNULL
] = 1;
367 AffixesMatched2
[TheStringNULL
] = 1;
374 // Deprecated: (use following function)
375 void CSignatureAlignment::Display(QTextStream
& LogStream
)
377 AffixAlignment
* pAlign
;
379 if (m_AffixAlignments
.count() > 0)
385 TableData (m_SigPtr1
->Display('-')) <<
386 TableData (m_SigPtr2
->Display('-')) << EndTableRow
<<
388 MakeTableHeader ("Shift region 1") <<
389 MakeTableHeader ("Margin region 1") <<
390 MakeTableHeader ("Affix 1") <<
391 MakeTableHeader ("Shift region 2") <<
392 MakeTableHeader ("Margin region 2") <<
393 MakeTableHeader ("Affix 2") <<
396 for (int i
= 0; i
< m_AffixAlignments
.size(); i
++)
398 pAlign
= m_AffixAlignments
.at(i
);
401 TableData( pAlign
->GetShift1() ) <<
402 TableData( pAlign
->GetMargin1()) <<
403 TableData( pAlign
->GetAffix1() ) <<
404 TableData( pAlign
->GetShift2() ) <<
405 TableData( pAlign
->GetMargin2()) <<
406 TableData( pAlign
->GetAffix2() ) <<
412 LogStream
<< EndTable
;
416 void CSignatureAlignment::Display(CMiniLexicon
* mini
)
418 AffixAlignment
* pAlign
;
420 if (m_AffixAlignments
.count() > 0)
422 mini
->LogFileStartTable();
423 mini
->LogFileStartRow();
424 mini
->LogFile (m_SigPtr1
->Display('-'));
425 mini
->LogFile (m_SigPtr2
->Display('-'));
426 mini
->LogFileEndRow();
427 mini
->LogFileStartRow();
428 mini
->LogFileHeader ("Shift region 1");
429 mini
->LogFileHeader ("Margin region 1");
430 mini
->LogFileHeader ("Affix 1");
431 mini
->LogFileHeader ("Shift region 2");
432 mini
->LogFileHeader ("Margin region 2");
433 mini
->LogFileHeader ("Affix 2");
434 mini
->LogFileEndRow();
437 for (int i
= 0; i
< m_AffixAlignments
.size(); i
++)
439 pAlign
= m_AffixAlignments
.at(i
);
440 mini
->LogFileStartRow();
441 mini
->LogFileSimpleString( pAlign
->GetShift1() );
442 mini
->LogFileSimpleString( pAlign
->GetMargin1() );
443 mini
->LogFileSimpleString( pAlign
->GetAffix1() );
444 mini
->LogFileSimpleString( pAlign
->GetShift2() );
445 mini
->LogFileSimpleString( pAlign
->GetMargin2() );
446 mini
->LogFileSimpleString( pAlign
->GetAffix2() );
447 mini
->LogFileEndRow();