1 // Implementation of CTemplate methods
2 // Copyright © 2009 The University of Chicago
6 #include <Q3TextStream>
10 #include "Alignment.h"
11 #include "TemplateCollection.h"
12 #include "WordCollection.h"
13 #include "generaldefinitions.h"
17 extern double g_Lambda
;
19 CTemplate::CTemplate(int NumberOfColumns
)
21 m_NumberOfColumns
= NumberOfColumns
;
22 m_Columns
= new CParse
*[ m_NumberOfColumns
];
24 for (int i
= 0; i
< NumberOfColumns
; i
++)
26 m_Columns
[i
] = new CParse();
29 m_WordsTotalComplexity
= 0;
33 m_ModifiedColumn
= -1;
34 m_IsNewAfterCollapse1
= false;
35 m_StemColumnInCollapse1
=-1;
36 m_SwitchOfSortingValue
= false;
40 CTemplate::CTemplate(CTemplate
& Template
) : CStem ( Template
.Display() )
43 m_NumberOfColumns
= Template
.m_NumberOfColumns
;
44 m_Columns
= new CParse
* [ m_NumberOfColumns
];
45 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
48 m_Columns
[i
] = new CParse ( *Template
.m_Columns
[i
]);
49 Q_ASSERT ( m_Columns
[i
]->Size() > 0 );
52 m_WordsTotalComplexity
= Template
.GetWordsTotalComplexity();
53 m_Complexity
= Template
.GetComplexity();
54 m_TemplateNumber
= Template
.m_TemplateNumber
;
56 m_IsDeleted
= Template
.m_IsDeleted
;
57 m_ModifiedColumn
= Template
.m_ModifiedColumn
;
58 m_IsNewAfterCollapse1
= Template
.m_IsNewAfterCollapse1
;
59 m_StemColumnInCollapse1
= Template
.m_StemColumnInCollapse1
;
60 m_SwitchOfSortingValue
= Template
.m_SwitchOfSortingValue
;
64 CTemplate::CTemplate(CAlignment
* pAlign
) : CStem ( pAlign
->SpellOut() )
68 int StartLoc1
, StartLoc2
;
71 if ( pAlign
->m_Slips
!= 1 ) { return; }
73 QString debugstring1
, debugstring2
;
74 const char* CCDebugString1
, *CCDebugString2
;
76 debugstring1
= pAlign
->m_Str1
->GetKey().Display();
77 CCDebugString1
= debugstring1
.ascii();
78 debugstring2
= pAlign
->m_Str2
->GetKey().Display();
79 CCDebugString2
= debugstring2
.ascii();
82 m_NumberOfColumns
= pAlign
->m_Spans
;
83 m_Columns
= new CParse
*[ m_NumberOfColumns
];
85 for (int c
= 0; c
< m_NumberOfColumns
; c
++)
87 m_Columns
[c
] = new CParse();
91 // it always does this loop at least once, because of initial "#" which is shared
92 while ( pAlign
->PerfectMatch (loc1
, loc2
) )
99 { // there is an initial shared span...
100 m_Columns
[col
]->Append( CStringSurrogate (pAlign
->m_Str1
->GetKeyPointer(),0 , loc1
) );
105 else // there is no initial shared span, and we'll back up to put the # in each piece
107 StartLoc1
= loc1
- 1;
108 StartLoc2
= loc2
- 1;
113 while ( loc1
< pAlign
->m_Length1
&&
114 pAlign
->m_Str2
->GetChar( pAlign
->m_Match1
[loc1
] ) != pAlign
->m_Str1
->GetChar( loc1
)
120 while ( loc2
< pAlign
->m_Length2
&&
121 pAlign
->m_Str1
->GetChar( pAlign
->m_Match2
[loc2
] ) != pAlign
->m_Str2
->GetChar(loc2
) )
127 CStringSurrogate
Piece1 ( pAlign
->m_Str1
->GetKeyPointer(), StartLoc1
, loc1
- StartLoc1
);
128 CStringSurrogate
Piece2 ( pAlign
->m_Str2
->GetKeyPointer(), StartLoc2
, loc2
- StartLoc2
);
131 if ( Piece1
.GetLength () == 0 )
133 // m_Columns[col]->AppendInAlphabeticalOrder( CStringSurrogate(QString("NULL").unicode(), 0, 4));
134 m_Columns
[col
]->Append(CStringSurrogate(QString("NULL").unicode(), 0, 4));
138 m_Columns
[col
]->Append(Piece1
);
139 // m_Columns[col]->AppendInAlphabeticalOrder( Piece1, true);
143 if ( Piece2
.GetLength () == 0 )
145 // m_Columns[col]->AppendInAlphabeticalOrder( CStringSurrogate(QString("NULL").unicode(), 0, 4), true );
146 m_Columns
[col
]->Append(
147 CStringSurrogate(QString("NULL").unicode(), 0, 4));
151 // m_Columns[col]->AppendInAlphabeticalOrder( Piece2, true);
152 m_Columns
[col
]->Append(Piece2
);
155 // yuhuask when slip == 1 Only have three cases: yn or yny
156 if ( col
+ 1 < m_NumberOfColumns
)
159 // m_Columns[col]->AppendInAlphabeticalOrder( CStringSurrogate (pAlign->m_Str1->GetKeyPointer(), loc1,pAlign->m_Str1->GetKeyLength() - loc1 ), true );
160 m_Columns
[col
]->Append(CStringSurrogate (pAlign
->m_Str1
->GetKeyPointer(), loc1
,pAlign
->m_Str1
->GetKeyLength() - loc1
));
163 for (int i
= 1; i
< m_NumberOfColumns
; i
++)
165 Q_ASSERT(m_Columns
[i
]->Size() > 0 );
169 Q_ASSERT (m_Columns
[0]->GetChar(0) == '#');
170 m_TemplateNumber
= 0;
173 m_ModifiedColumn
= -1;
175 m_IsNewAfterCollapse1
= false;
176 m_StemColumnInCollapse1
=-1;
177 m_SwitchOfSortingValue
= false;
183 void CTemplate::operator= (CAlignment
* pAlign
)
187 int StartLoc1
, StartLoc2
;
190 if ( m_NumberOfColumns
)
192 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
194 if ( m_Columns
[i
] ) delete m_Columns
[i
];
198 if ( pAlign
->m_Slips
!= 1 ) { return; }
200 m_NumberOfColumns
= pAlign
->m_Spans
;
202 m_Columns
= new CParse
*[ m_NumberOfColumns
];
204 for (int c
= 0; c
< m_NumberOfColumns
; c
++)
206 m_Columns
[c
] = new CParse();
210 while ( pAlign
->m_Str2
->GetChar( loc2
) == pAlign
->m_Str1
->GetChar( loc1
) )
216 if ( loc1
> 1 ) // they agree at the beginning, up to loc1 - 1;
218 m_Columns
[col
]->Append( CStringSurrogate (pAlign
->m_Str1
->GetKeyPointer(), 0, loc1
) ); // yuhuask should be loc1 not loc1 -1
229 while ( loc1
< pAlign
->m_Length1
&&
230 pAlign
->m_Str2
->GetChar( pAlign
->m_Match1
[loc1
] ) != pAlign
->m_Str1
->GetChar( loc1
)
236 while ( loc2
< pAlign
->m_Length2
&&
237 pAlign
->m_Str1
->GetChar( pAlign
->m_Match2
[loc2
] ) != pAlign
->m_Str2
->GetChar( loc2
) )
243 CStringSurrogate
Piece1 ( pAlign
->m_Str1
->GetKeyPointer(), StartLoc1
, loc1
- StartLoc1
); // yuhuask same problem StartLoc1 - 1 ?
244 CStringSurrogate
Piece2 ( pAlign
->m_Str2
->GetKeyPointer(), StartLoc2
, loc2
- StartLoc2
);
248 if ( Piece1
.GetLength () == 0 )
250 m_Columns
[col
]->Append( CStringSurrogate(QString("NULL").unicode(), 0, 4) );
254 m_Columns
[col
]->Append( Piece1
);
258 if ( Piece2
.GetLength () == 0 )
260 m_Columns
[col
]->Append( CStringSurrogate(QString("NULL").unicode(), 0, 4) );
264 m_Columns
[col
]->Append( Piece2
);
269 m_Columns
[col
]->Append( CStringSurrogate (pAlign
->m_Str1
->GetKeyPointer(), loc1
, pAlign
->m_Str1
->GetKeyLength() - loc1
) );
271 m_TemplateNumber
= 0;
279 CTemplate::~CTemplate(void)
282 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
290 //------------------------------------------------------
292 CParse
* CTemplate::GetColumn(int n
)
298 if ( n
< m_NumberOfColumns
&& n
>= 0 )
306 //------------------------------------------------------
313 QString
CTemplate::Display()
317 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
319 Return
+= m_Columns
[i
]->Display() + "_";
325 /* From JG's original:
327 void CTemplate::Display(CListCtrl& List, int& LineNumber)
329 int NumberOfRows = 0;
330 for (int col = 0; col < m_NumberOfColumns; col++)
332 if (m_Columns[col]->Size() > NumberOfRows ) { NumberOfRows = m_Columns[col]->Size(); }
335 for (int row = 0; row < NumberOfRows+1; row++)
337 List.InsertItem (LineNumber + row, CString ("") );
340 List.SetItemText ( LineNumber, 0, IntToString( (int) GetSortingQuantity() ) );
345 for ( row = 0; row < NumberOfRows; row++)
347 for (int col = 0; col < m_NumberOfColumns; col++)
349 if ( m_Columns[col]->Size() > row )
351 List.SetItemText(LineNumber+row, col + 1, m_Columns[col]->GetAtCString(row+1) );
363 QString CTemplate::DisplayFirstLine()
367 for (int i = 0; i < m_NumberOfColumns; i++)
369 Return += m_Columns[i]->GetPiece(1).Display() + "_";
376 void CTemplate::Display(CListCtrl& List, int& LineNumber)
379 int NumberOfRows = 0;
380 for (int col = 0; col < m_NumberOfColumns; col++)
382 if (m_Columns[col]->Size() > NumberOfRows ) { NumberOfRows = m_Columns[col]->Size(); }
385 for (int row = 0; row < NumberOfRows+1; row++)
387 List.InsertItem (LineNumber + row, CString ("") );
390 // if ( GetSortingQuantity() > 15000 || GetSortingQuantity() < 0 )
392 // ofstream out ("c:\\4_tests\\IntToString.txt", ios::app );
393 // out << endl << endl; OutputForFile (out);
394 // out << endl << GetSortingQuantity();
395 // out << " " << IntToString ( (int) GetSortingQuantity () );
397 List.SetItemText ( LineNumber, 0, IntToString( (int) GetSortingQuantity() ) );
399 for ( row = 0; row < NumberOfRows; row++)
401 for (int col = 0; col < m_NumberOfColumns; col++)
403 if ( m_Columns[col]->Size() > row )
405 List.SetItemText(LineNumber+row, col + 1, m_Columns[col]->GetAtCString(row+1) );
416 void CTemplate::ListDisplay(Q3ListView
* List
)
418 GetSortingQuantity();
419 static_cast<void>(new CTemplateListViewItem(List
, this));
422 int CTemplate::GetVerticalColumn()
425 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
427 if ( m_Columns
[i
]->Size() > 1 ) return i
;
432 void CTemplate::AddToColumn(CParse
& Morphemes
, int n
)
435 for (int i
= 1; i
<= Morphemes
.Size(); i
++)
437 // m_Columns[n]->AppendInAlphabeticalOrder ( Morphemes.GetPiece(i), true );
438 m_Columns
[n
]->Append(Morphemes
.GetPiece(i
));
442 void CTemplate::AddToColumn(CStringSurrogate
& SS
, int n
)
444 // m_Columns[n]->AppendInAlphabeticalOrder ( SS, true );
445 m_Columns
[n
]->Append(SS
);
452 float CTemplate::GetSortingQuantity() const
456 { ComputeComplexity(); }
458 { ComputeWordsTotalComplexity(); }
461 Q_ASSERT ( m_WordsTotalComplexity
- m_Complexity
< 55000);
464 m_CurrentSortComplexity
= m_WordsTotalComplexity
- m_Complexity
;
466 // I set up a switch flag to indicate that we switch the sorting value
467 if ( m_SwitchOfSortingValue
)
469 int NumberOfStems
=0;
475 // Roughly get the number of stems
476 for ( i
= 0; i
< m_NumberOfColumns
; i
++)
478 OneColumn
= m_Columns
[i
];
479 if ( OneColumn
->Size() > NumberOfStems
)
481 NumberOfStems
= OneColumn
->Size();
486 m_NewSortComplexity
= (m_WordsTotalComplexity
- m_Complexity
)/(float)NumberOfStems
;
490 m_NewSortComplexity
= m_WordsTotalComplexity
- m_Complexity
;
495 return float ( m_NewSortComplexity
);
496 //return float ( m_WordsTotalComplexity - m_Complexity );
504 float CTemplate::GetSortingQuantity()
507 // if ( m_Complexity <= 0 )
508 { ComputeComplexity(); }
510 // if ( m_WordsTotalComplexity <= 0 )
511 { ComputeWordsTotalComplexity(); }
513 // ofstream out ("c:\\4_tests\\TemplateDisplay.txt", ios::app );
515 Q_ASSERT ( m_WordsTotalComplexity - m_Complexity < 55000);
518 // if ( m_WordsTotalComplexity < 0 || m_WordsTotalComplexity > 15000)
520 // OutputForFile(out);
522 // if ( m_Complexity < 0 || m_Complexity > 15000)
524 // OutputForFile(out);
527 m_SortingComplexity = m_WordsTotalComplexity - m_Complexity;
529 return float ( m_WordsTotalComplexity - m_Complexity );
535 void CTemplate::AddAlignment(CAlignment
* pAlignment
)
537 struct not_implemented
{ };
538 throw not_implemented();
539 static_cast<void>(pAlignment
);
541 // m_Alignments.AddTail( pAlignment);
545 bool CTemplate::ShouldConflate ( CTemplate
* pAlignment
, int& ConflateColumn
)
548 /* Works only with templates of 2 or 3 columns.
549 If two alignments share a column, they are collapsed. */
552 if ( m_NumberOfColumns
!= pAlignment
->m_NumberOfColumns
) { return FALSE
; }
553 if ( m_NumberOfColumns
== 2 )
555 if (// m_Columns[0]->Size() > 1 &&
556 *m_Columns
[0] == pAlignment
->m_Columns
[0] )
561 else if (// m_Columns[1]->Size() > 1 &&
562 *m_Columns
[1] == pAlignment
->m_Columns
[1] )
571 } // end of Size = 2;
572 else if ( m_NumberOfColumns
== 3 )
574 if ( //m_Columns[0]->Size() > 1 &&
575 *m_Columns
[0] == pAlignment
->m_Columns
[0] &&
576 //m_Columns[2]->Size() > 1 &&
577 *m_Columns
[2] == pAlignment
->m_Columns
[2]
583 else if (// m_Columns[1]->Size() > 1 &&
584 *m_Columns
[1] == pAlignment
->m_Columns
[1] &&
585 // m_Columns[2]->Size() > 1 &&
586 *m_Columns
[2] == pAlignment
->m_Columns
[2] )
591 else if (// m_Columns[0]->Size() > 1 &&
592 *m_Columns
[0] == pAlignment
->m_Columns
[0] &&
593 // m_Columns[1]->Size() > 1 &&
594 *m_Columns
[1] == pAlignment
->m_Columns
[1] )
604 } // end of Size = 3;
611 void CTemplate::ConflateWith ( CTemplate
* pOther
, int Column
)
613 CParse
& their_column
= *pOther
->m_Columns
[Column
];
614 CParse
& my_column
= *m_Columns
[Column
];
616 for (int i
= 1; i
<= their_column
.Size(); ++i
)
617 my_column
.Append(their_column
.GetPiece(i
));
620 bool CTemplate::Readjust(CTemplateCollection
* TempTemplates
, int MinCount
)
622 // bool AllSame = TRUE;
624 CWordCollection Words
;
628 ComputeWordsTotalComplexity();
631 for (i
= 1; i
< m_NumberOfColumns
; i
++)
633 if ( (int) m_Columns
[i
]->Size() < MinCount
)
639 for (int w
= 1; w
<= (int) m_Columns
[i
]->Size(); w
++)
641 Words
<< m_Columns
[i
]->GetPiece(w
).Display();
643 CStringSurrogate ssPrefix
= Words
.FindMaximalMajorityPrefix();
644 if ( ssPrefix
.GetLength() == 0 ) { continue; }
646 //------------------------------------------
647 // if we have some material that might potentially be
648 // moved leftward, let's calculate the complexity in that
651 //------------------------------------------
654 CParse RightColumn
= *m_Columns
[ i
];
655 CParse RightColumnOtherMorphemes
= RightColumn
;
656 CParse LeftColumn
= *m_Columns
[i
-1];
658 RightColumn
.RemovePiecesThatDoNotBegin ( ssPrefix
);
659 RightColumn
.RemovePrefixFromAllPieces ( ssPrefix
);
660 RightColumnOtherMorphemes
661 .RemovePiecesThatBegin( ssPrefix
);
663 LeftColumn
.SuffixToAllPieces2 ( ssPrefix
);
665 double OldComplexity
= m_Columns
[i
]->ComputeComplexity() +
666 m_Columns
[i
-1]->ComputeComplexity();
667 double NewComplexity
= RightColumn
.ComputeComplexity() +
668 LeftColumn
.ComputeComplexity();
670 if (RightColumnOtherMorphemes
.Size() > 0 )
672 NewComplexity
+= RightColumnOtherMorphemes
.ComputeComplexity() +
673 m_Columns
[i
-1]->ComputeComplexity();
678 if ( NewComplexity
< OldComplexity
)
680 if ( RightColumnOtherMorphemes
.Size() > 0 )
682 CTemplate
TempTemplate ( *this );
683 *TempTemplate
.GetColumn( i
-1 ) = *m_Columns
[ i
-1 ];
684 *TempTemplate
.GetColumn( i
) = RightColumnOtherMorphemes
;
686 TempTemplates
->AddTemplate ( &TempTemplate
);
689 *m_Columns
[i
] = RightColumn
;
690 *m_Columns
[i
-1] = LeftColumn
;
696 CStringSurrogate TempSS
;
699 for ( i
= 1; i
< m_NumberOfColumns
; i
++)
702 if ( (int) m_Columns
[i
]->Size() < MinCount
)
708 for (int w
= 1; w
<= (int) m_Columns
[i
-1]->Size(); w
++)
710 TempSS
= m_Columns
[i
-1]->GetPiece(w
);
711 TempSS
.SetBackwards();
714 TempSS
= Words
.FindMaximalMajorityPrefix();
715 TempSS
.SetBackwards();
716 CStringSurrogate
ssSuffix ( TempSS
);
718 if ( ssSuffix
.GetLength() == 0 ) { continue; }
720 //------------------------------------------
722 // if we have some material that might potentially be
723 // moved rightward, let's calculate the complexity in that
726 //------------------------------------------
729 CParse RightColumn
= *m_Columns
[ i
];
730 CParse LeftColumn
= *m_Columns
[i
-1];
731 CParse LeftColumnOtherMorphemes
= LeftColumn
;
733 LeftColumn
.RemovePiecesThatDoNotEnd ( ssSuffix
);
734 LeftColumn
.RemoveSuffixFromAllPieces ( ssSuffix
);
735 LeftColumnOtherMorphemes
736 .RemovePiecesThatEnd ( ssSuffix
);
738 RightColumn
.PrefixToAllPieces2 ( ssSuffix
);
740 double OldComplexity
= m_Columns
[i
] ->ComputeComplexity() +
741 m_Columns
[i
-1]->ComputeComplexity();
743 double NewComplexity
= RightColumn
.ComputeComplexity() +
744 LeftColumn
.ComputeComplexity();
746 if (LeftColumnOtherMorphemes
.Size() > 0 )
748 NewComplexity
+= LeftColumnOtherMorphemes
.ComputeComplexity() +
749 m_Columns
[i
]->ComputeComplexity();
754 if ( NewComplexity
< OldComplexity
)
756 if ( LeftColumnOtherMorphemes
.Size() > 0 )
758 CTemplate
TempTemplate ( *this );
759 *TempTemplate
.GetColumn( i
-1 ) = LeftColumnOtherMorphemes
;
760 *TempTemplate
.GetColumn( i
) = *m_Columns
[ i
];
762 TempTemplates
->AddTemplate ( &TempTemplate
);
765 *m_Columns
[i
] = RightColumn
;
766 *m_Columns
[i
-1] = LeftColumn
;
781 void CTemplate::OutputForFile(ofstream& out )
783 int MaxSize = (int) m_Columns[0]->Size();
785 for ( int i = 1; i < m_NumberOfColumns; i++)
787 if ( (int) m_Columns[i]->Size() > MaxSize )
789 MaxSize = m_Columns[i]->Size();
792 for (int row = 0; row < MaxSize; row++)
795 for (int col = 0; col < m_NumberOfColumns; col++)
798 if ( row < (int) m_Columns[col]->Size() )
800 out << m_Columns[col]->GetAt(row+1) << ' ';
808 out << endl << endl << "Complexity: "<< endl;
809 out << "From length: "<< base2log (m_NumberOfColumns) << endl;
810 for ( i = 0; i < m_NumberOfColumns; i++)
812 out << i << " " << m_Columns[i]->ComputeComplexity() << endl;
818 int CTemplate::GetNumberOfLetters()
823 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
825 Total
+= m_Columns
[i
]->GetKeyLength();
831 float CTemplate::ComputeComplexity() const
834 m_Complexity
+= float ( base2log (m_NumberOfColumns
) +
835 g_Lambda
* m_NumberOfColumns
* (m_NumberOfColumns
- 1 ) / 2 );
836 Q_ASSERT (m_Complexity
< 20 );
837 for (int i
= 0; i
< m_NumberOfColumns
; i
++)
839 m_Complexity
+= (float) m_Columns
[ i
]->ComputeComplexity();
840 Q_ASSERT ( m_Complexity
< 55000 );
842 Q_ASSERT ( m_Complexity
> 0 );
843 Q_ASSERT ( m_Complexity
< 55000 );
850 float CTemplate::ComputeWordsTotalComplexity() const
855 // first figure out the words:
856 if (m_NumberOfColumns
< 1)
858 m_WordsTotalComplexity
= 0;
859 //out << " No columns." ;
860 return m_WordsTotalComplexity
;
863 CParse Words
= *m_Columns
[0];
864 for (int col
= 1; col
< m_NumberOfColumns
; col
++)
866 NewWords
.ClearParse();
867 for (int row
= 1; row
<= (int) m_Columns
[col
]->Size(); row
++)
870 CStringSurrogate ss
= m_Columns
[col
]->GetPiece(row
);
871 if (ss
== CStringSurrogate(QString("NULL").unicode(), 0, 4) )
873 // TempWords = Words;
876 TempWords
.SuffixToAllPieces2 ( m_Columns
[col
]->GetPiece(row
) );
878 NewWords
.Append (&TempWords
);
884 m_WordsTotalComplexity
= (float) Words
.ComputeComplexity();
886 Q_ASSERT (m_WordsTotalComplexity
< 50000 );
888 return m_WordsTotalComplexity
;
894 float CTemplate:: GetComplexity()
896 if (m_Complexity
<= 0)
902 float CTemplate:: GetWordsTotalComplexity()
904 if (m_WordsTotalComplexity
<= 0)
906 ComputeWordsTotalComplexity();
908 return m_WordsTotalComplexity
;
913 void CTemplate::GetWordsAndParses(StringToParse
& OneWordAndParse
)
918 StringToParse TempWordsAndParses
;
919 StringToParse SwapWordsAndParses
;
920 StringToParse::Iterator StringToParseIt
;
921 QString TheWord
, OneWord
, NewWord
, DumpWord
;
927 CurrentSize
= (int) m_Columns
[0]->Size();
928 for ( row
= 0; row
<CurrentSize
; row
++)
930 TheWord
= m_Columns
[0]->GetPiece(row
+1).Display();
932 TheWord
= TheWord
.right((TheWord
.length()-1));
933 if ( TheWord
.length() == 0)
935 TheWord
= QString("NULL");
938 TheParse
= new CParse(CStringSurrogate(TheWord
));
939 TempWordsAndParses
.insert(TheWord
,TheParse
);
943 for ( int i
= 1; i
< m_NumberOfColumns
; i
++)
947 CurrentSize
= (int) m_Columns
[Column
]->Size();
949 for ( StringToParseIt
= TempWordsAndParses
.begin();StringToParseIt
!= TempWordsAndParses
.end(); StringToParseIt
++)
951 TheWord
= StringToParseIt
.key();
952 TheParse
= StringToParseIt
.data();
954 // It only works for first col;
955 if (TheWord
== QString("NULL"))
964 for ( row
= 0; row
<CurrentSize
; row
++)
966 OneWord
= m_Columns
[Column
]->GetPiece(row
+1).Display();
969 if ( OneWord
== QString("NULL"))
971 // Impossible We get "NULL" + "NULL"
973 NewParse
= new CParse(*TheParse
);
979 NewWord
= TheWord
+ OneWord
;
980 NewParse
= new CParse(*TheParse
);
981 NewParse
->Append(CStringSurrogate(OneWord
));
986 NewParse
= new CParse();
987 NewParse
->Append(CStringSurrogate(OneWord
));
992 if (! SwapWordsAndParses
.contains(NewWord
))
994 SwapWordsAndParses
.insert(NewWord
,NewParse
);
998 QMessageBox::information( NULL
, "debug","Impossible Here. Please Let Yu Hu Debug it!", "OK") ;
1006 TempWordsAndParses
.clear();
1008 for ( StringToParseIt
= SwapWordsAndParses
.begin();StringToParseIt
!= SwapWordsAndParses
.end(); StringToParseIt
++)
1010 TheWord
= StringToParseIt
.key();
1011 TheParse
= StringToParseIt
.data();
1013 TempWordsAndParses
.insert(TheWord
, TheParse
);
1016 SwapWordsAndParses
.clear();
1022 for ( StringToParseIt
= TempWordsAndParses
.begin();StringToParseIt
!= TempWordsAndParses
.end(); StringToParseIt
++)
1024 TheWord
= StringToParseIt
.key();
1025 TheParse
= StringToParseIt
.data();
1026 OneWordAndParse
.insert(TheWord
, TheParse
);
1037 void CTemplate::AdjustMeByMovingCommonTailOrHead2(StringToFloat
& MorphemeAndItsComplexity
, int MaximumSizeOfEachColumn
, int MaximumNumberOfMovingLetters
, bool PrintChangedTemplates
,\
1038 int Loopi
, bool ShouldUseStickNess
, StringToStringToFloat
& GlobalStickNess
, \
1039 float TotalGlobalStickNess
, float TotalWords
)
1045 int backletteri
, headletteri
;
1046 QString TheMorpheme
;
1047 bool PassColumnSizeTest
= false;
1048 CParse
* LeftColumn
, *RightColumn
, *OneColumn
;
1049 bool changed
= false;
1051 int MoveWhichColumn
;
1052 int MoveL2RorR2L
; // 0 --> left to right; 1 --> right to left
1053 int MoveHowmanyLetters
= 0;
1054 float OneMorphemeComplexity
, AlternativeMorphemeComplexity
;
1055 QString CommonTail
, CommonHead
, MyTail
, MyHead
;
1056 QString Newmorpheme
;
1057 float NewmorphemeComplexity
, Diff
, BestDiff
, BestStickNessDiff
;
1058 float StickNessDiff
;
1059 float LeftColumnOldStickness
, LeftColumnNewStickness
;
1060 float RightColumnOldStickness
, RightColumnNewStickness
;
1062 QString TempMorphemei
, TempMorphemej
;
1063 StringToFloat
* oneCollection
;
1065 CParse PossibleNewColumn
;
1069 //Through each column
1070 for ( i
= 0; i
< m_NumberOfColumns
-1; i
++)
1073 if ( i
== m_ModifiedColumn
)
1078 LeftColumn
= m_Columns
[i
];
1079 RightColumn
= m_Columns
[i
+1];
1081 if (( LeftColumn
->Size() <= MaximumSizeOfEachColumn
) && (RightColumn
->Size() <= MaximumSizeOfEachColumn
))
1083 PassColumnSizeTest
= true;
1087 PassColumnSizeTest
= false;
1092 LeftColumnOldStickness
= 0;
1093 LeftColumnNewStickness
= 0;
1094 RightColumnOldStickness
= 0;
1095 RightColumnNewStickness
= 0;
1097 if ( ShouldUseStickNess
)
1100 if ( LeftColumn
->Size() != 1)
1103 for ( Tempi
= 1; Tempi
< LeftColumn
->Size(); Tempi
++)
1105 TempMorphemei
= LeftColumn
->GetPiece(Tempi
).Display();
1109 TempMorphemei
= TempMorphemei
.right(TempMorphemei
.length() -1);
1110 if ( TempMorphemei
.length() ==0)
1112 TempMorphemei
= QString("NULL");
1118 for ( Tempj
= Tempi
+ 1; Tempj
<= LeftColumn
->Size(); Tempj
++)
1120 TempMorphemej
= LeftColumn
->GetPiece(Tempj
).Display();
1124 TempMorphemej
= TempMorphemej
.right(TempMorphemej
.length() -1);
1125 if ( TempMorphemej
.length() ==0)
1127 TempMorphemej
= QString("NULL");
1133 if ( TempMorphemei
> TempMorphemej
)
1135 oneCollection
= GlobalStickNess
[TempMorphemei
];
1136 oneFloat
= (*oneCollection
)[TempMorphemej
];
1137 LeftColumnOldStickness
+= oneFloat
;
1141 oneCollection
= GlobalStickNess
[TempMorphemej
];
1142 oneFloat
= (*oneCollection
)[TempMorphemei
];
1143 LeftColumnOldStickness
+= oneFloat
;
1155 if ( RightColumn
->Size() != 1)
1158 for ( Tempi
= 1; Tempi
< RightColumn
->Size(); Tempi
++)
1160 TempMorphemei
= RightColumn
->GetPiece(Tempi
).Display();
1163 for ( Tempj
= Tempi
+1; Tempj
<= RightColumn
->Size(); Tempj
++)
1165 TempMorphemej
= RightColumn
->GetPiece(Tempj
).Display();
1168 if ( TempMorphemei
> TempMorphemej
)
1170 oneCollection
= GlobalStickNess
[TempMorphemei
];
1171 oneFloat
= (*oneCollection
)[TempMorphemej
];
1172 RightColumnOldStickness
+= oneFloat
;
1176 oneCollection
= GlobalStickNess
[TempMorphemej
];
1177 oneFloat
= (*oneCollection
)[TempMorphemei
];
1178 RightColumnOldStickness
+= oneFloat
;
1189 } // ShouldUseStickNess, then get the LeftColumnOldStickness, RightColumnOldStickness
1198 while ( (backletteri
<= MaximumNumberOfMovingLetters
))
1201 LeftColumnNewStickness
= 0;
1202 RightColumnNewStickness
= 0;
1204 // Need experiment on how to deal with "NULL" since "NULL" is too common
1205 if ( (static_cast <int> ( (LeftColumn
->Size() )) == 1) && (LeftColumn
->GetPiece(1).Display().length()<=backletteri
))
1212 PossibleNewColumn
.ClearParse();
1214 // Check whether all words have backletteri tailletter in common
1215 for ( row
=1; row
<= LeftColumn
->Size(); row
++)
1217 TheMorpheme
= LeftColumn
->GetPiece(row
).Display();
1222 TheMorpheme
= TheMorpheme
.right(TheMorpheme
.length() -1);
1224 if ( TheMorpheme
.length() ==0)
1226 TheMorpheme
= QString("NULL");
1231 if ( TheMorpheme
== QString("NULL"))
1237 if ( static_cast <int> ( TheMorpheme
.length() ) < backletteri
)
1243 MyTail
= TheMorpheme
.right(backletteri
);
1247 CommonTail
= MyTail
;
1251 if ( MyTail
!= CommonTail
)
1263 // Now Calculate the Alternative Complexity
1266 for ( row
=1; row
<= LeftColumn
->Size(); row
++)
1268 TheMorpheme
= LeftColumn
->GetPiece(row
).Display();
1272 TheMorpheme
= TheMorpheme
.right(TheMorpheme
.length() -1);
1274 if ( TheMorpheme
.length() ==0)
1276 TheMorpheme
= QString("NULL");
1281 if (!MorphemeAndItsComplexity
.contains(TheMorpheme
)) //, OneMorphemeComplexity))
1283 QMessageBox::information(NULL
, "Debug", "Can't find this Morpheme", "OK");
1287 OneMorphemeComplexity
= MorphemeAndItsComplexity
[TheMorpheme
];
1289 Newmorpheme
= TheMorpheme
.left(TheMorpheme
.length() - backletteri
);
1291 if ( Newmorpheme
.length() ==0)
1293 Newmorpheme
= QString("NULL");
1296 // Populate the possible Column
1297 PossibleNewColumn
.Append(Newmorpheme
);
1299 if (!MorphemeAndItsComplexity
.contains(Newmorpheme
))//, AlternativeMorphemeComplexity))
1301 NewmorphemeComplexity
= -base2log(1.0/TotalWords
); // Big Penalty
1305 AlternativeMorphemeComplexity
= MorphemeAndItsComplexity
[Newmorpheme
];
1306 NewmorphemeComplexity
= AlternativeMorphemeComplexity
;
1309 Diff
+= NewmorphemeComplexity
- OneMorphemeComplexity
;
1315 // Compute the possible Column Stickness
1316 if ( ShouldUseStickNess
)
1318 if ( PossibleNewColumn
.Size() > 1)
1321 for ( Tempi
= 1; Tempi
< PossibleNewColumn
.Size(); Tempi
++)
1323 TempMorphemei
= PossibleNewColumn
.GetPiece(Tempi
).Display();
1325 for ( Tempj
= Tempi
+ 1; Tempj
<= PossibleNewColumn
.Size(); Tempj
++)
1327 TempMorphemej
= PossibleNewColumn
.GetPiece(Tempj
).Display();
1329 if ( TempMorphemei
> TempMorphemej
)
1331 if ( GlobalStickNess
.contains(TempMorphemei
))
1333 oneCollection
= GlobalStickNess
[TempMorphemei
];
1335 if ( oneCollection
->contains(TempMorphemej
))
1337 oneFloat
= (*oneCollection
)[TempMorphemej
];
1338 LeftColumnNewStickness
+= oneFloat
;
1342 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1349 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1354 if ( GlobalStickNess
.contains(TempMorphemej
))
1356 oneCollection
= GlobalStickNess
[TempMorphemej
];
1357 if ( oneCollection
->contains(TempMorphemei
))
1359 oneFloat
= (*oneCollection
)[TempMorphemei
];
1360 LeftColumnNewStickness
+= oneFloat
;
1364 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1371 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1383 LeftColumnNewStickness
= 0.0;
1388 PossibleNewColumn
.ClearParse();
1389 for ( row
=1; row
<= RightColumn
->Size(); row
++)
1391 TheMorpheme
= RightColumn
->GetPiece(row
).Display();
1393 if (!MorphemeAndItsComplexity
.contains(TheMorpheme
))
1395 QMessageBox::information (NULL
, "Debug", "Can't find this Morpheme", "OK");
1399 OneMorphemeComplexity
= MorphemeAndItsComplexity
[TheMorpheme
];
1401 if ( TheMorpheme
== QString("NULL"))
1403 Newmorpheme
= CommonTail
;
1407 Newmorpheme
= CommonTail
+ TheMorpheme
;
1410 PossibleNewColumn
.Append(Newmorpheme
);
1412 if (!MorphemeAndItsComplexity
.contains(Newmorpheme
)) //, ))
1414 NewmorphemeComplexity
= -base2log(1.0/TotalWords
); // Big Penalty
1418 AlternativeMorphemeComplexity
= MorphemeAndItsComplexity
[Newmorpheme
];
1419 NewmorphemeComplexity
= AlternativeMorphemeComplexity
;
1422 Diff
+= NewmorphemeComplexity
- OneMorphemeComplexity
;
1427 // Compute the possible Column Stickness
1428 if ( ShouldUseStickNess
)
1430 if ( PossibleNewColumn
.Size() > 1)
1433 for ( Tempi
= 1; Tempi
< PossibleNewColumn
.Size(); Tempi
++)
1435 TempMorphemei
= PossibleNewColumn
.GetPiece(Tempi
).Display();
1437 for ( Tempj
= Tempi
+ 1; Tempj
<= PossibleNewColumn
.Size(); Tempj
++)
1439 TempMorphemej
= PossibleNewColumn
.GetPiece(Tempj
).Display();
1441 if ( TempMorphemei
> TempMorphemej
)
1443 if ( GlobalStickNess
.contains(TempMorphemei
))
1445 oneCollection
= GlobalStickNess
[TempMorphemei
];
1446 if ( oneCollection
->contains(TempMorphemej
))
1448 oneFloat
= (*oneCollection
)[TempMorphemej
];
1449 RightColumnNewStickness
+= oneFloat
;
1453 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1460 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1465 if ( GlobalStickNess
.contains(TempMorphemej
))
1467 oneCollection
= GlobalStickNess
[TempMorphemej
];
1468 if ( oneCollection
->contains(TempMorphemei
))
1470 oneFloat
= (*oneCollection
)[TempMorphemei
];
1471 RightColumnNewStickness
+= oneFloat
;
1475 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1482 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1494 RightColumnNewStickness
= 0.0;
1501 // Check whether Diff > 0, if so records these changes
1503 if ( ShouldUseStickNess
)
1506 StickNessDiff
= ( RightColumnNewStickness
+ LeftColumnNewStickness
) - (RightColumnOldStickness
+ LeftColumnOldStickness
) ;
1508 Diff
+= StickNessDiff
;
1517 if ( Diff
< BestDiff
)
1520 BestStickNessDiff
= StickNessDiff
;
1521 MoveWhichColumn
= i
;
1523 MoveHowmanyLetters
= backletteri
;
1524 // Always remember the left one column
1525 m_ModifiedColumn
= i
;
1533 MoveWhichColumn
= i
;
1535 MoveHowmanyLetters
= backletteri
;
1536 // Always remember the left one column
1537 m_ModifiedColumn
= i
;
1547 // Check RightColumn
1552 while ( (headletteri
<= MaximumNumberOfMovingLetters
))
1555 LeftColumnNewStickness
= 0;
1556 RightColumnNewStickness
= 0;
1558 PossibleNewColumn
.ClearParse();
1560 // Need experiment on how to deal with "NULL" since "NULL" is too common
1561 if ( (static_cast <int> ((RightColumn
->Size())) == 1) && (RightColumn
->GetPiece(1).Display().length()<=headletteri
))
1567 // Check whether all words have headletteri headletter in common
1568 for ( row
=1; row
<= RightColumn
->Size(); row
++)
1570 TheMorpheme
= RightColumn
->GetPiece(row
).Display();
1572 if ( TheMorpheme
== QString("NULL"))
1578 if ( static_cast <int> ( TheMorpheme
.length() ) < headletteri
)
1584 MyHead
= TheMorpheme
.left(headletteri
);
1588 CommonHead
= MyHead
;
1592 if ( MyHead
!= CommonHead
)
1604 // Now Calculate the Alternative Complexity
1607 for ( row
=1; row
<= RightColumn
->Size(); row
++)
1609 TheMorpheme
= RightColumn
->GetPiece(row
).Display();
1611 if (!MorphemeAndItsComplexity
.contains(TheMorpheme
))
1613 QMessageBox::information (NULL
, "Debug", "Can't find this Morpheme", "OK");
1618 OneMorphemeComplexity
= MorphemeAndItsComplexity
[TheMorpheme
];
1619 Newmorpheme
= TheMorpheme
.right(TheMorpheme
.length() - headletteri
);
1621 if ( Newmorpheme
.length() ==0)
1623 Newmorpheme
= QString("NULL");
1626 PossibleNewColumn
.Append(Newmorpheme
);
1628 if (!MorphemeAndItsComplexity
.contains(Newmorpheme
))
1630 NewmorphemeComplexity
= -base2log(1.0/TotalWords
); // Big Penalty
1634 AlternativeMorphemeComplexity
= MorphemeAndItsComplexity
[Newmorpheme
];
1635 NewmorphemeComplexity
= AlternativeMorphemeComplexity
;
1638 Diff
+= NewmorphemeComplexity
- OneMorphemeComplexity
;
1642 // Compute the possible Column Stickness
1643 if ( ShouldUseStickNess
)
1645 if ( PossibleNewColumn
.Size() > 1)
1648 for ( Tempi
= 1; Tempi
< PossibleNewColumn
.Size(); Tempi
++)
1650 TempMorphemei
= PossibleNewColumn
.GetPiece(Tempi
).Display();
1652 for ( Tempj
= Tempi
+ 1; Tempj
<= PossibleNewColumn
.Size(); Tempj
++)
1654 TempMorphemej
= PossibleNewColumn
.GetPiece(Tempj
).Display();
1656 if ( TempMorphemei
> TempMorphemej
)
1658 if ( GlobalStickNess
.contains(TempMorphemei
))
1660 oneCollection
= GlobalStickNess
[TempMorphemei
];
1661 if ( oneCollection
->contains(TempMorphemej
))
1663 oneFloat
= (*oneCollection
)[TempMorphemej
];
1664 RightColumnNewStickness
+= oneFloat
;
1668 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1675 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1680 if ( GlobalStickNess
.contains(TempMorphemej
))
1682 oneCollection
= GlobalStickNess
[TempMorphemej
];
1683 if ( oneCollection
->contains(TempMorphemei
))
1685 oneFloat
= (*oneCollection
)[TempMorphemei
];
1686 RightColumnNewStickness
+= oneFloat
;
1690 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1697 RightColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1709 RightColumnNewStickness
= 0.0;
1713 PossibleNewColumn
.ClearParse();
1714 for ( row
=1; row
<= LeftColumn
->Size(); row
++)
1716 TheMorpheme
= LeftColumn
->GetPiece(row
).Display();
1720 TheMorpheme
= TheMorpheme
.right(TheMorpheme
.length() -1);
1721 if ( TheMorpheme
.length() ==0)
1723 TheMorpheme
= QString("NULL");
1728 if (!MorphemeAndItsComplexity
.contains(TheMorpheme
))
1730 QMessageBox::information (NULL
, "Debug", "Can't find this Morpheme", "OK");
1735 OneMorphemeComplexity
= MorphemeAndItsComplexity
[TheMorpheme
];
1737 if ( TheMorpheme
== QString("NULL"))
1739 Newmorpheme
= CommonHead
;
1743 Newmorpheme
= TheMorpheme
+ CommonHead
;
1746 PossibleNewColumn
.Append(Newmorpheme
);
1748 if (!MorphemeAndItsComplexity
.contains(Newmorpheme
))
1750 NewmorphemeComplexity
= -base2log(1.0/TotalWords
); // Big Penalty
1754 AlternativeMorphemeComplexity
= MorphemeAndItsComplexity
[Newmorpheme
];
1755 NewmorphemeComplexity
= AlternativeMorphemeComplexity
;
1758 Diff
+= NewmorphemeComplexity
- OneMorphemeComplexity
;
1762 // Compute the possible Column Stickness
1763 if ( ShouldUseStickNess
)
1765 if ( PossibleNewColumn
.Size() > 1)
1768 for ( Tempi
= 1; Tempi
< PossibleNewColumn
.Size(); Tempi
++)
1770 TempMorphemei
= PossibleNewColumn
.GetPiece(Tempi
).Display();
1772 for ( Tempj
= Tempi
+ 1; Tempj
<= PossibleNewColumn
.Size(); Tempj
++)
1774 TempMorphemej
= PossibleNewColumn
.GetPiece(Tempj
).Display();
1776 if ( TempMorphemei
> TempMorphemej
)
1778 if ( GlobalStickNess
.contains(TempMorphemei
))
1780 oneCollection
= GlobalStickNess
[TempMorphemei
];
1781 if ( oneCollection
->contains(TempMorphemej
))
1783 oneFloat
= (*oneCollection
)[TempMorphemej
];
1784 LeftColumnNewStickness
+= oneFloat
;
1788 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1795 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1800 if ( GlobalStickNess
.contains(TempMorphemej
))
1802 oneCollection
= GlobalStickNess
[TempMorphemej
];
1803 if ( oneCollection
->contains(TempMorphemei
))
1805 oneFloat
= (*oneCollection
)[TempMorphemei
];
1806 LeftColumnNewStickness
+= oneFloat
;
1810 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1817 LeftColumnNewStickness
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
1829 LeftColumnNewStickness
= 0.0;
1834 // Check whether Diff > 0, if so records these changes
1836 if ( ShouldUseStickNess
)
1839 StickNessDiff
= ( RightColumnNewStickness
+ LeftColumnNewStickness
) - (RightColumnOldStickness
+ LeftColumnOldStickness
) ;
1841 Diff
+= StickNessDiff
;
1849 if ( Diff
< BestDiff
)
1852 BestStickNessDiff
= StickNessDiff
;
1853 MoveWhichColumn
= i
+1;
1855 MoveHowmanyLetters
= headletteri
;
1857 // Always remember the left one column
1858 m_ModifiedColumn
= i
;
1866 MoveWhichColumn
= i
+1;
1868 MoveHowmanyLetters
= headletteri
;
1870 // Always remember the left one column
1871 m_ModifiedColumn
= i
;
1882 // Now, we can know whether we adjust this template
1885 // int MaximumOutputMorphemeInOneColumn = 8;
1886 QString DisplayOfOneColumn
;
1891 // Print Original Templates
1892 if (PrintChangedTemplates
)
1895 QFile
file( "AdjustedTemplates.txt" );
1897 if ( !file
.open( QIODevice::WriteOnly
| QIODevice::Append
) )
1899 QMessageBox::information(NULL
, "Error", "Can't Open the file!", "OK");
1903 Q3TextStream
outf( &file
);
1905 //outf.open ("AdjustedTemplates.txt", ofstream::out | ofstream::app);
1907 outf
<< "***********"<<Loopi
<<"************" <<endl
;
1908 outf
<< " Original Template:" << endl
;
1911 for ( i
= 0; i
< m_NumberOfColumns
; i
++)
1913 OneColumn
= m_Columns
[i
];
1915 DisplayOfOneColumn
= QString("{ ");
1916 for (int j
= 1; j
<= OneColumn
->Size();j
++)
1918 DisplayOfOneColumn
+= OneColumn
->GetPiece(j
).Display();
1919 if ( j
!= OneColumn
->Size())
1921 DisplayOfOneColumn
+= QString(" , ");
1925 DisplayOfOneColumn
+= QString(" }");
1926 outf
<< DisplayOfOneColumn
;
1927 if ( i
!= m_NumberOfColumns
-1)
1940 // Modify this tempalte based on former bestdiff record
1942 QString MovedSubString
;
1943 CStringSurrogate TempCSS
;
1945 if ( MoveL2RorR2L
== 0)
1947 // when letters are moved from left to right
1948 LeftColumn
= m_Columns
[MoveWhichColumn
];
1949 RightColumn
= m_Columns
[MoveWhichColumn
+1];
1950 MovedSubString
= LeftColumn
->GetPiece(1).Display().right(MoveHowmanyLetters
);
1952 // Modify left column
1953 TempParse
.ClearParse();
1954 for ( row
=1; row
<= LeftColumn
->Size(); row
++)
1956 TheMorpheme
= LeftColumn
->GetPiece(row
).Display();
1959 if ( MoveWhichColumn
==0)
1961 TheMorpheme
= TheMorpheme
.right(TheMorpheme
.length() -1);
1963 if ( TheMorpheme
.length() ==0)
1965 QMessageBox::information (NULL
, "Debug", "Impossible Here!", "OK");
1966 TheMorpheme
= QString("NULL");
1972 Newmorpheme
= TheMorpheme
.left(TheMorpheme
.length() - MoveHowmanyLetters
);
1974 if ( Newmorpheme
.length() ==0)
1976 Newmorpheme
= QString("NULL");
1980 TempCSS
= CStringSurrogate(Newmorpheme
);
1981 if ( !TempParse
.Contains(TempCSS
))
1983 TempParse
.Append(Newmorpheme
);
1988 if ( TempParse
.Size() ==0)
1990 QMessageBox::information(NULL
, "Debug", "TempParse Size should not be 0", "OK");
1994 // If this is the first column, we put "#" back
1995 if ( MoveWhichColumn
==0)
2001 for ( Swapi
= 1; Swapi
<= TempParse
.Size(); Swapi
++)
2003 Swappiece
= TempParse
.GetPiece(Swapi
).Display();
2005 if ( Swappiece
== QString("NULL"))
2007 Swappiece
= QString("#");
2011 Swappiece
= QString("#") + Swappiece
;
2014 SwapParse
.Append(Swappiece
);
2017 TempParse
= SwapParse
;
2020 (*LeftColumn
) = TempParse
;
2023 // Modify right column
2024 TempParse
.ClearParse();
2025 for ( row
=1; row
<= RightColumn
->Size(); row
++)
2027 TheMorpheme
= RightColumn
->GetPiece(row
).Display();
2029 if ( TheMorpheme
== QString("NULL"))
2031 Newmorpheme
= MovedSubString
;
2035 Newmorpheme
= MovedSubString
+ TheMorpheme
;
2038 TempCSS
= CStringSurrogate(Newmorpheme
);
2039 if ( !TempParse
.Contains(TempCSS
))
2041 TempParse
.Append(Newmorpheme
);
2047 if ( TempParse
.Size() ==0)
2049 QMessageBox::information(NULL
, "Debug", "TempParse Size should not be 0", "OK");
2053 (*RightColumn
) = TempParse
;
2058 // when letters are moved from right to left
2059 LeftColumn
= m_Columns
[MoveWhichColumn
-1];
2060 RightColumn
= m_Columns
[MoveWhichColumn
];
2061 MovedSubString
= RightColumn
->GetPiece(1).Display().left(MoveHowmanyLetters
);
2063 // Modify right column
2064 TempParse
.ClearParse();
2065 for ( row
=1; row
<= RightColumn
->Size(); row
++)
2067 TheMorpheme
= RightColumn
->GetPiece(row
).Display();
2068 Newmorpheme
= TheMorpheme
.right(TheMorpheme
.length() - MoveHowmanyLetters
);
2069 if ( Newmorpheme
.length() ==0)
2071 Newmorpheme
= QString("NULL");
2074 TempCSS
= CStringSurrogate(Newmorpheme
);
2075 if ( !TempParse
.Contains(TempCSS
))
2077 TempParse
.Append(Newmorpheme
);
2082 if ( TempParse
.Size() ==0)
2084 QMessageBox::information(NULL
, "Debug", "TempParse Size should not be 0", "OK");
2088 (*RightColumn
) = TempParse
;
2091 // Modify left column
2092 TempParse
.ClearParse();
2093 for ( row
=1; row
<= LeftColumn
->Size(); row
++)
2095 TheMorpheme
= LeftColumn
->GetPiece(row
).Display();
2097 if ( MoveWhichColumn
== 1)
2099 TheMorpheme
= TheMorpheme
.right(TheMorpheme
.length() -1);
2100 if ( TheMorpheme
.length() ==0)
2102 TheMorpheme
= QString("NULL");
2107 if ( TheMorpheme
== QString("NULL"))
2109 Newmorpheme
= MovedSubString
;
2113 Newmorpheme
= TheMorpheme
+ MovedSubString
;
2116 TempCSS
= CStringSurrogate(Newmorpheme
);
2117 if ( !TempParse
.Contains(TempCSS
))
2119 TempParse
.Append(Newmorpheme
);
2124 if ( TempParse
.Size() ==0)
2126 QMessageBox::information(NULL
, "Debug", "TempParse Size should not be 0", "OK");
2130 // If this is the first column, we put "#" back
2131 if ( MoveWhichColumn
== 1)
2137 for ( Swapi
= 1; Swapi
<= TempParse
.Size(); Swapi
++)
2139 Swappiece
= TempParse
.GetPiece(Swapi
).Display();
2141 if ( Swappiece
== QString("NULL"))
2143 Swappiece
= QString("#");
2147 Swappiece
= QString("#") + Swappiece
;
2150 SwapParse
.Append(Swappiece
);
2153 TempParse
= SwapParse
;
2156 (*LeftColumn
) = TempParse
;
2161 // Check through this template, delete those column, which only has "NULL" or "#"
2163 bool DeleteFirstColumn
;
2166 DeleteFirstColumn
= false;
2168 for ( i
= 0; i
< m_NumberOfColumns
; i
++)
2170 OneColumn
= m_Columns
[i
];
2172 if (( OneColumn
->Size() ==1 )&&(OneColumn
->GetPiece(1).Display() == QString("#")))
2174 DeleteFirstColumn
= true;
2178 if ( ( OneColumn
->Size() ==1 )&&(OneColumn
->GetPiece(1).Display() == QString("NULL")))
2183 if ( DeleteFirstColumn
&& (Newi
== 0))
2189 for ( Swapi
= 1; Swapi
<= OneColumn
->Size(); Swapi
++)
2191 Swappiece
= OneColumn
->GetPiece(Swapi
).Display();
2193 if ( Swappiece
== QString("NULL"))
2195 Swappiece
= QString("#");
2199 Swappiece
= QString("#") + Swappiece
;
2202 SwapParse
.Append(Swappiece
);
2205 (*OneColumn
) = SwapParse
;
2209 m_Columns
[Newi
] = OneColumn
;
2213 m_NumberOfColumns
= Newi
;
2216 // Print Result Templates
2217 if (PrintChangedTemplates
)
2219 QFile
file( "AdjustedTemplates.txt" );
2221 if ( !file
.open( QIODevice::WriteOnly
| QIODevice::Append
) )
2223 QMessageBox::information(NULL
, "Error", "Can't Open the file!", "OK");
2227 Q3TextStream
outf( &file
);
2229 //outf.open ("AdjustedTemplates.txt", ofstream::out | ofstream::app);
2231 outf
<< " Adjusted Template:" << endl
;
2234 for ( i
= 0; i
< m_NumberOfColumns
; i
++)
2236 OneColumn
= m_Columns
[i
];
2238 DisplayOfOneColumn
= QString("{ ");
2239 for (int j
= 1; j
<= OneColumn
->Size(); j
++)
2241 DisplayOfOneColumn
+= OneColumn
->GetPiece(j
).Display();
2242 if ( j
!= OneColumn
->Size())
2244 DisplayOfOneColumn
+= QString(" , ");
2248 DisplayOfOneColumn
+= QString(" }");
2249 outf
<< DisplayOfOneColumn
;
2250 if ( i
!= m_NumberOfColumns
-1)
2258 outf
<< "The Diff in Pointer is " << BestDiff
<< ", The StickNess Diff is " << BestStickNessDiff
<<".";
2272 QFile
file( "AdjustedTemplates.txt" );
2274 if ( !file
.open( QIODevice::WriteOnly
| QIODevice::Append
) )
2276 QMessageBox::information(NULL
, "Error", "Can't Open the file!", "OK");
2280 Q3TextStream
outf( &file
);
2282 outf
<< "No Change for this template !" <<endl
;
2292 QString
omit_first_letter(const QString
& morpheme
)
2294 Q_ASSERT(morpheme
!= TheStringNULL
);
2295 Q_ASSERT(morpheme
.size() >= 1);
2297 if (morpheme
.size() == 1)
2298 return TheStringNULL
;
2300 return morpheme
.mid(1);
2304 void CTemplate::AdjustMeAfterAbsorb1(QMap
<QString
, int>& AbsorbedWords
,
2305 bool Conservative
, int MiniMumPrefixOrSuffixNeedToBeAbsorbed
, int Loopi
)
2307 const bool OpenPrint
= true;
2310 int StemColumn
= -1;
2311 for (int i
= 0; i
< m_NumberOfColumns
; ++i
) {
2312 CParse
& column
= *m_Columns
[i
];
2314 if (column
.Size() > StemSize
) {
2315 StemSize
= column
.Size();
2320 Q_ASSERT(StemSize
>= 0);
2321 Q_ASSERT(StemColumn
>= 0 && StemColumn
< m_NumberOfColumns
);
2322 Q_ASSERT(m_NumberOfColumns
== 2 || m_NumberOfColumns
== 3);
2324 // We don't consider the case where Stem is in middle
2325 if (m_NumberOfColumns
== 3 && StemColumn
== 1)
2328 Q_ASSERT(StemColumn
== 0 || StemColumn
== 1 || StemColumn
== 2);
2330 if (StemColumn
== 1)
2331 // Already returned if number of columns == 3.
2332 Q_ASSERT(m_NumberOfColumns
== 2);
2334 if (StemColumn
== 2)
2335 // it only can be 3-column
2336 Q_ASSERT(m_NumberOfColumns
== 3);
2338 // values are all 1. XXX. use std::set.
2339 QMap
<QString
, int> affix_set
;
2341 if (m_NumberOfColumns
== 3) {
2342 Q_ASSERT(StemColumn
== 0);
2343 GetWordsFromTwoColums(affix_set
, 1);
2345 Q_ASSERT(m_NumberOfColumns
== 2);
2347 CParse
& column
= *m_Columns
[1 - StemColumn
];
2348 for (int row
= 1; row
<= column
.Size(); ++row
) {
2349 QString affix
= column
.GetPiece(row
).Display();
2350 if (StemColumn
== 1)
2351 affix
= omit_first_letter(affix
);
2353 affix_set
[affix
] = 1;
2357 // With these affixes, check the stem column
2358 CParse
& column
= *m_Columns
[StemColumn
];
2360 QMap
<QString
, int> AbsorbedStems
;
2361 int AbsorbedStemSize
= 0;
2362 for (int row
= 1; row
<= column
.Size(); ++row
) {
2363 QString stem
= column
.GetPiece(row
).Display();
2364 if (StemColumn
== 0)
2365 stem
= omit_first_letter(stem
);
2367 int FitHowManyPrefixOrSuffix
= 0;
2368 for (QMap
<QString
, int>::const_iterator iter
=
2369 affix_set
.constBegin();
2370 iter
!= affix_set
.constEnd();
2372 QString affix
= iter
.key();
2373 Q_ASSERT(iter
.value() == 1);
2376 if (stem
!= TheStringNULL
)
2377 MakeUpWord
.append(stem
);
2379 if (affix
!= TheStringNULL
) {
2380 if (StemColumn
== 0) {
2381 MakeUpWord
.append(affix
);
2383 Q_ASSERT(StemColumn
==
2384 m_NumberOfColumns
- 1);
2385 MakeUpWord
.prepend(affix
);
2389 if (AbsorbedWords
.contains(MakeUpWord
))
2390 ++FitHowManyPrefixOrSuffix
;
2393 int CheckLimit
= affix_set
.size();
2394 if (Conservative
== 0)
2395 CheckLimit
= std::min(CheckLimit
,
2396 MiniMumPrefixOrSuffixNeedToBeAbsorbed
);
2398 if (FitHowManyPrefixOrSuffix
< CheckLimit
) {
2399 TempParse
.Append(CStringSurrogate(stem
));
2402 AbsorbedStems
[stem
] = 1;
2406 if (AbsorbedStemSize
== StemSize
)
2408 else if (AbsorbedStemSize
> 0)
2409 std::swap(*(m_Columns
[StemColumn
]), TempParse
);
2416 QFile
file("Absorb1.txt");
2417 if (!file
.open(QIODevice::WriteOnly
| QIODevice::Append
)) {
2418 QMessageBox::information(0, "Error",
2419 "Can't Open the file!", "OK");
2422 QTextStream
outf(&file
);
2424 outf
<< "*************" << Loopi
<< "**************" << endl
;
2425 if (AbsorbedStemSize
== StemSize
) {
2426 outf
<< "\tDelete This Template:" << endl
;
2428 Q_ASSERT(AbsorbedStemSize
> 0);
2429 // Took some stems away from one template
2430 outf
<< "\tAbsorb Some Stems From This Template:" <<
2435 for (int i
= 0; i
< m_NumberOfColumns
; ++i
) {
2436 CParse
& column
= i
!= StemColumn
2438 : TempParse
; // old *m_Columns[i]
2441 for (int j
= 1; j
<= column
.Size(); ++j
) {
2442 outf
<< column
.GetPiece(j
).Display();
2443 if (j
== column
.Size())
2449 if (i
== m_NumberOfColumns
- 1)
2455 if (AbsorbedStemSize
!= StemSize
) {
2456 Q_ASSERT(AbsorbedStemSize
> 0);
2457 outf
<< "Absorbed Stems: ";
2460 if (!AbsorbedStems
.isEmpty()) {
2461 QMap
<QString
, int>::const_iterator iter
=
2462 AbsorbedStems
.constBegin();
2464 outf
<< iter
.key(); // first stem
2465 Q_ASSERT(iter
.value() == 1);
2467 for (++iter
; iter
!= AbsorbedStems
.constEnd();
2469 outf
<< " , " << iter
.key();
2470 Q_ASSERT(iter
.value() == 1);
2473 outf
<< " }" << endl
;
2478 void CTemplate::GetWordsFromTwoColums(StringToInt
& AllPrefixes
, int StartColumn
)
2484 QString TheWord
, OneWord
, NewWord
, DumpWord
;
2485 StringToInt::iterator StringToIntIt
;
2487 StringToInt TempWords
;
2490 if ( GetNumberOfColumns() < StartColumn
+2)
2492 QMessageBox::information(NULL
, "Error", "Impossible Here So far. Please Let Yu Hu Debug it!", "OK") ;
2497 AllPrefixes
.clear();
2499 CurrentSize
= (int) m_Columns
[StartColumn
]->Size();
2500 for ( row
= 0; row
<CurrentSize
; row
++)
2502 TheWord
= m_Columns
[StartColumn
]->GetPiece(row
+1).Display();
2504 if ( StartColumn
== 0)
2506 TheWord
= TheWord
.right((TheWord
.length()-1));
2507 if ( TheWord
.length() ==0)
2509 TheWord
= QString("NULL");
2513 TempWords
.insert(TheWord
,1);
2517 Column
= StartColumn
+ 1;
2519 CurrentSize
= (int) m_Columns
[Column
]->Size();
2521 for ( StringToIntIt
= TempWords
.begin(); StringToIntIt
!= TempWords
.end(); StringToIntIt
++)
2523 TheWord
= StringToIntIt
.key();
2525 // It only works for first col;
2526 if (TheWord
== QString("NULL"))
2535 for ( row
= 0; row
<CurrentSize
; row
++)
2537 OneWord
= m_Columns
[Column
]->GetPiece(row
+1).Display();
2539 if ( OneWord
== QString("NULL"))
2541 // possible We get "NULL" + "NULL"
2548 NewWord
= TheWord
+ OneWord
;
2557 if (! AllPrefixes
.contains(NewWord
))
2559 AllPrefixes
.insert(NewWord
,1);
2563 QMessageBox::information(NULL
, "Error" ,"Impossible Here. Please Let Yu Hu Debug it!", "OK") ;
2576 void CTemplate::FindMorphemePrefixOrSuffixWithParadigmaticGraph(int Loopi
, int MaximumSizeOfStemColumn
, StringToFloat
& GlobalNodeStickNess2
, StringToStringToFloat
& TotalGlobalStickNess2
, float GlobalStickNess2
, float TotalWord2
)
2580 int i
, itemi
, itemj
;
2581 QString Morphemei
, Morphemej
;
2582 bool IsMePrefixOfOthers
, IsMeSuffixOfOthers
;
2583 QString ThePrefixOrSuffix
;
2584 bool FoundAndModified
;
2585 bool HasEverChanged
;
2586 CTemplate
* BackUpTemplate
;
2587 CStringSurrogate TempCSS
, TempCSS1
;
2588 CStringSurrogate NULLCSS
;
2591 NULLCSS
= CStringSurrogate(QString("NULL"));
2595 BackUpTemplate
= new CTemplate(*this);
2598 HasEverChanged
= false;
2599 FoundAndModified
= true;
2601 while( FoundAndModified
)
2604 FoundAndModified
= false;
2605 for ( i
= 0; i
< m_NumberOfColumns
; i
++)
2607 oneColumn
= m_Columns
[i
];
2609 if ( oneColumn
->Size() > MaximumSizeOfStemColumn
)
2614 if ( oneColumn
->Size() == 1)
2620 for ( itemi
= 1; itemi
<= oneColumn
->Size(); itemi
++)
2623 Morphemei
= oneColumn
->GetPiece(itemi
).Display();
2627 Morphemei
= Morphemei
.right(Morphemei
.length() -1);
2628 if ( Morphemei
.length() ==0)
2630 Morphemei
= QString("NULL");
2632 // NULL could not be any prefix or suffix of other morphemes
2637 if ( Morphemei
== QString("NULL"))
2640 // NULL could not be any prefix or suffix of other morphemes
2643 if ( Morphemei
.length() <= 1)
2646 // too short morpheme is suspicious for prefix and suffix of others
2649 ThePrefixOrSuffix
= Morphemei
; // Morphemei could be others' prefix or suffix
2651 // Check through the other morpheme to see whether this morphemei could be prefix
2652 IsMePrefixOfOthers
= false;
2653 for ( itemj
= 1; itemj
<= oneColumn
->Size(); itemj
++)
2655 Morphemej
= oneColumn
->GetPiece(itemj
).Display();
2659 Morphemej
= Morphemej
.right(Morphemej
.length() -1);
2660 if ( Morphemej
.length() ==0)
2662 Morphemej
= QString("NULL");
2669 if ( Morphemej
.length() <= Morphemei
.length())
2674 if ( Morphemej
.left(Morphemei
.length()) != Morphemei
)
2679 IsMePrefixOfOthers
= true;
2683 // If found morphemei is prefix of at least one other morpheme
2684 if ( IsMePrefixOfOthers
)
2686 CParse OriginalColumn
, NewColumnOne
, NewColumnTwo
;
2687 CParse
* ReplaceFirstColumn
;
2689 QString oneOriginalMoprheme
, oneNewMorpheme
;
2690 float OlderRobustNess
, NewRobustNess
;
2691 float RobustNessDiff
;
2693 CParse
** TempColumns
;
2694 CParse
* ReplaceOneColumn
;
2698 // Try to get the NewColumnOne and NewColumnTwo
2699 for ( Originali
= 1; Originali
<= oneColumn
->Size(); Originali
++)
2701 oneOriginalMoprheme
= oneColumn
->GetPiece(Originali
).Display();
2705 oneOriginalMoprheme
= oneOriginalMoprheme
.right(oneOriginalMoprheme
.length() -1);
2707 if ( oneOriginalMoprheme
.length() ==0)
2709 oneOriginalMoprheme
= QString("NULL");
2714 TempCSS
= CStringSurrogate(oneOriginalMoprheme
);
2715 OriginalColumn
.Append(TempCSS
);
2717 if ( oneOriginalMoprheme
== QString("NULL"))
2720 NewColumnOne
.Append(TempCSS
);
2724 if ( oneOriginalMoprheme
.length() <= ThePrefixOrSuffix
.length())
2726 NewColumnOne
.Append(TempCSS
);
2730 if ( oneOriginalMoprheme
.left(ThePrefixOrSuffix
.length()) != ThePrefixOrSuffix
)
2732 NewColumnOne
.Append(TempCSS
);
2736 if (oneOriginalMoprheme
.left(ThePrefixOrSuffix
.length()) == ThePrefixOrSuffix
)
2738 oneNewMorpheme
= oneOriginalMoprheme
.right(oneOriginalMoprheme
.length() - ThePrefixOrSuffix
.length());
2740 TempCSS1
= CStringSurrogate(oneNewMorpheme
);
2742 NewColumnTwo
.Append(TempCSS1
);
2747 NewColumnTwo
.Append(NULLCSS
);
2749 // Get Original Robustness
2750 OlderRobustNess
= GetRobustNessWithParadigmaticGraph(OriginalColumn
, GlobalNodeStickNess2
, TotalGlobalStickNess2
, GlobalStickNess2
, TotalWord2
);
2752 // Calculate the alternative robustness to do
2753 NewRobustNess
= GetRobustNessWithParadigmaticGraph(NewColumnOne
, GlobalNodeStickNess2
, TotalGlobalStickNess2
, GlobalStickNess2
, TotalWord2
);
2754 NewRobustNess
+= GetRobustNessWithParadigmaticGraph(NewColumnTwo
, GlobalNodeStickNess2
, TotalGlobalStickNess2
, GlobalStickNess2
, TotalWord2
);
2756 // If Diff satisfy the condition, we make the modification, then break;
2757 RobustNessDiff
= NewRobustNess
- OlderRobustNess
;
2759 if (1) // RobustNessDiff < 0)
2762 // Change the template by replacing the two columns for the original one
2763 TempColumns
= new CParse
*[ m_NumberOfColumns
+ 1];
2765 for ( Replacei
= m_NumberOfColumns
-1; Replacei
>=0; Replacei
--)
2767 ReplaceOneColumn
= m_Columns
[Replacei
];
2771 TempColumns
[Replacei
+1] = ReplaceOneColumn
;
2776 TempColumns
[Replacei
+1] = new CParse(NewColumnTwo
);
2777 TempColumns
[Replacei
] = new CParse(NewColumnOne
);
2778 delete m_Columns
[Replacei
];
2783 TempColumns
[Replacei
] = ReplaceOneColumn
;
2789 m_Columns
= TempColumns
;
2790 m_NumberOfColumns
++;
2794 // If necessary, Put back the "#" back for the first column
2798 ReplaceFirstColumn
= m_Columns
[0];
2799 for ( Replacei
= 1; Replacei
<= ReplaceFirstColumn
->Size(); Replacei
++)
2801 Swappiece
= ReplaceFirstColumn
->GetPiece(Replacei
).Display();
2803 if ( Swappiece
== QString("NULL"))
2805 Swappiece
= QString("#");
2809 Swappiece
= QString("#") + Swappiece
;
2812 TempCSS
= CStringSurrogate(Swappiece
);
2813 SwapParse
.Append(TempCSS
);
2816 (*ReplaceFirstColumn
)= SwapParse
;
2819 FoundAndModified
= true;
2820 HasEverChanged
= true;
2827 // Check through the other morpheme to see whether this morphemei could be suffix
2828 IsMeSuffixOfOthers
= false;
2829 for ( itemj
= 1; itemj
<= oneColumn
->Size(); itemj
++)
2831 Morphemej
= oneColumn
->GetPiece(itemj
).Display();
2835 Morphemej
= Morphemej
.right(Morphemej
.length() -1);
2836 if ( Morphemej
.length() ==0)
2838 Morphemej
= QString("NULL");
2845 if ( Morphemej
.length() <= Morphemei
.length())
2850 if ( Morphemej
.right(Morphemei
.length()) != Morphemei
)
2855 IsMeSuffixOfOthers
= true;
2859 // If found morphemei is prefix of at least one other morpheme
2860 if ( IsMeSuffixOfOthers
)
2862 CParse OriginalColumn
, NewColumnOne
, NewColumnTwo
;
2863 CParse
* ReplaceFirstColumn
;
2865 QString oneOriginalMoprheme
, oneNewMorpheme
;
2866 float OlderRobustNess
, NewRobustNess
;
2867 float RobustNessDiff
;
2869 CParse
** TempColumns
;
2870 CParse
* ReplaceOneColumn
;
2874 // Try to get the NewColumnOne and NewColumnTwo
2875 for ( Originali
= 1; Originali
<= oneColumn
->Size(); Originali
++)
2877 oneOriginalMoprheme
= oneColumn
->GetPiece(Originali
).Display();
2881 oneOriginalMoprheme
= oneOriginalMoprheme
.right(oneOriginalMoprheme
.length() -1);
2883 if ( oneOriginalMoprheme
.length() ==0)
2885 oneOriginalMoprheme
= QString("NULL");
2889 TempCSS
= CStringSurrogate(oneOriginalMoprheme
);
2890 OriginalColumn
.Append(TempCSS
);
2892 if ( oneOriginalMoprheme
== QString("NULL"))
2894 NewColumnTwo
.Append(TempCSS
);
2898 if ( oneOriginalMoprheme
.length() <= ThePrefixOrSuffix
.length())
2900 NewColumnTwo
.Append(TempCSS
);
2904 if ( oneOriginalMoprheme
.right(ThePrefixOrSuffix
.length()) != ThePrefixOrSuffix
)
2906 NewColumnTwo
.Append(TempCSS
);
2910 if (oneOriginalMoprheme
.right(ThePrefixOrSuffix
.length()) == ThePrefixOrSuffix
)
2912 oneNewMorpheme
= oneOriginalMoprheme
.left(oneOriginalMoprheme
.length() - ThePrefixOrSuffix
.length());
2914 TempCSS1
= CStringSurrogate(oneNewMorpheme
);
2915 NewColumnOne
.Append(TempCSS1
);
2919 NewColumnOne
.Append(NULLCSS
);
2921 // Get Original Robustness
2922 OlderRobustNess
= GetRobustNessWithParadigmaticGraph(OriginalColumn
, GlobalNodeStickNess2
, TotalGlobalStickNess2
, GlobalStickNess2
, TotalWord2
);
2924 // Calculate the alternative robustness
2925 NewRobustNess
= GetRobustNessWithParadigmaticGraph(NewColumnOne
, GlobalNodeStickNess2
, TotalGlobalStickNess2
, GlobalStickNess2
, TotalWord2
);
2926 NewRobustNess
+= GetRobustNessWithParadigmaticGraph(NewColumnTwo
, GlobalNodeStickNess2
, TotalGlobalStickNess2
, GlobalStickNess2
, TotalWord2
);
2928 // If Diff satisfy the condition, we make the modification, then break;
2929 RobustNessDiff
= NewRobustNess
- OlderRobustNess
;
2931 if (1) //RobustNessDiff < 0)
2934 // Change the template by replacing the two columns for the original one
2935 TempColumns
= new CParse
*[ m_NumberOfColumns
+ 1];
2937 for ( Replacei
= m_NumberOfColumns
-1; Replacei
>=0; Replacei
--)
2939 ReplaceOneColumn
= m_Columns
[Replacei
];
2943 TempColumns
[Replacei
+1] = ReplaceOneColumn
;
2948 TempColumns
[Replacei
+1] = new CParse(NewColumnTwo
);
2949 TempColumns
[Replacei
] = new CParse(NewColumnOne
);
2950 delete m_Columns
[Replacei
];
2955 TempColumns
[Replacei
] = ReplaceOneColumn
;
2961 m_Columns
= TempColumns
;
2962 m_NumberOfColumns
++;
2966 // if necessary, Put back the "#" back for the first column
2971 ReplaceFirstColumn
= m_Columns
[0];
2972 for ( Replacei
= 1; Replacei
<= ReplaceFirstColumn
->Size(); Replacei
++)
2974 Swappiece
= ReplaceFirstColumn
->GetPiece(Replacei
).Display();
2976 if ( Swappiece
== QString("NULL"))
2978 Swappiece
= QString("#");
2982 Swappiece
= QString("#") + Swappiece
;
2985 TempCSS
= CStringSurrogate(Swappiece
);
2986 SwapParse
.Append(TempCSS
);
2989 (*ReplaceFirstColumn
)= SwapParse
;
2992 FoundAndModified
= true;
2993 HasEverChanged
= true;
3002 if ( FoundAndModified
) break;
3007 }// According to FoundAndModified, while loop;
3010 // If this template is modified, output to logf
3011 if ( HasEverChanged
)
3013 StringToParse OriginalWords
, NewWords
;
3014 StringToParse::iterator StringToParseIt
;
3015 StringToInt NewCreatedWords
;
3016 StringToInt::iterator StringToIntIt
;
3017 QString TempWordString
;
3019 QString DisplayOfOneColumn
;
3022 QFile
file( "PrefixOrSuffixMorphemeAdjust.txt" );
3024 if ( !file
.open( QIODevice::WriteOnly
| QIODevice::Append
) )
3026 QMessageBox::information(NULL
, "Error", "Can't Open the file!", "OK");
3030 Q3TextStream
outf( &file
);
3035 BackUpTemplate
->GetWordsAndParses(OriginalWords
);
3038 for ( StringToParseIt
= OriginalWords
.begin(); StringToParseIt
!= OriginalWords
.end(); StringToParseIt
++)
3040 dummyParse
= StringToParseIt
.data();
3044 this ->GetWordsAndParses(NewWords
);
3046 for ( StringToParseIt
= NewWords
.begin(); StringToParseIt
!= NewWords
.end(); StringToParseIt
++)
3048 dummyParse
= StringToParseIt
.data();
3049 TempWordString
= StringToParseIt
.key();
3053 if ( !OriginalWords
.contains(TempWordString
))
3055 NewCreatedWords
.insert(TempWordString
, NULL
);
3062 outf
<< "***********"<<Loopi
<<"************" <<endl
;
3063 outf
<< " Original Template:" << endl
;
3067 for ( i
= 0; i
< BackUpTemplate
->m_NumberOfColumns
; i
++)
3069 oneColumn
= BackUpTemplate
->m_Columns
[i
];
3071 DisplayOfOneColumn
= QString("{ ");
3072 for (int j
= 1; j
<= oneColumn
->Size();j
++)
3074 DisplayOfOneColumn
+= oneColumn
->GetPiece(j
).Display();
3075 if ( j
!= oneColumn
->Size())
3077 DisplayOfOneColumn
+= QString(" , ");
3081 DisplayOfOneColumn
+= QString(" }");
3082 outf
<< DisplayOfOneColumn
;
3083 if ( i
!= m_NumberOfColumns
-1)
3093 outf
<< " New Template:" << endl
;
3096 for ( i
= 0; i
< m_NumberOfColumns
; i
++)
3098 oneColumn
= m_Columns
[i
];
3100 DisplayOfOneColumn
= QString("{ ");
3101 for (int j
= 1; j
<= oneColumn
->Size();j
++)
3103 DisplayOfOneColumn
+= oneColumn
->GetPiece(j
).Display();
3104 if ( j
!= oneColumn
->Size())
3106 DisplayOfOneColumn
+= QString(" , ");
3110 DisplayOfOneColumn
+= QString(" }");
3111 outf
<< DisplayOfOneColumn
;
3112 if ( i
!= m_NumberOfColumns
-1)
3121 outf
<< " New Created Words:" << endl
;
3124 for ( StringToIntIt
= NewCreatedWords
.begin(); StringToIntIt
!= NewCreatedWords
.end(); StringToIntIt
++)
3126 TempWordString
= StringToIntIt
.key();
3127 outf
<< TempWordString
<< " , ";
3130 outf
<< " }"<< endl
;
3140 delete BackUpTemplate
;
3146 float CTemplate::GetRobustNessWithParadigmaticGraph(CParse
& oneColumn
, StringToFloat
& MorphemeAndItsComplexity
, StringToStringToFloat
& GlobalStickNess
, float TotalGlobalStickNess
, float TotalWords
)
3151 QString oneMorpheme
, anotherMorpheme
;
3152 QString HostMorpheme
, SlaveMorpheme
;
3153 float NodeRobustNess
;
3154 float EdgeRobustNess
;
3156 StringToFloat
* oneCollection
;
3159 NodeRobustNess
=0.0;
3160 EdgeRobustNess
= 0.0;
3163 for ( i
=1; i
<= oneColumn
.Size(); i
++)
3165 oneMorpheme
= oneColumn
.GetPiece(i
).Display();
3167 if (MorphemeAndItsComplexity
.contains(oneMorpheme
))
3169 oneFloat
= MorphemeAndItsComplexity
[oneMorpheme
];
3170 NodeRobustNess
+= oneFloat
;
3174 NodeRobustNess
+= (-base2log(1.0/TotalWords
)); // Big Penalty
3178 for ( j
=i
+1; j
<= oneColumn
.Size(); j
++)
3180 anotherMorpheme
= oneColumn
.GetPiece(j
).Display();
3182 if ( oneMorpheme
> anotherMorpheme
)
3184 if ( GlobalStickNess
.contains(oneMorpheme
))
3186 oneCollection
= GlobalStickNess
[oneMorpheme
];
3188 if ( oneCollection
->contains(anotherMorpheme
))
3190 oneFloat
= (*oneCollection
)[anotherMorpheme
];
3191 EdgeRobustNess
+= oneFloat
;
3195 EdgeRobustNess
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
3202 EdgeRobustNess
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
3207 if ( GlobalStickNess
.contains(anotherMorpheme
))
3209 oneCollection
= GlobalStickNess
[anotherMorpheme
];
3211 if ( oneCollection
->contains(oneMorpheme
))
3213 oneFloat
= (*oneCollection
)[oneMorpheme
];
3214 EdgeRobustNess
+= oneFloat
;
3218 EdgeRobustNess
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
3225 EdgeRobustNess
+= -base2log(1.0/TotalGlobalStickNess
); // Big Penalty
3234 return (NodeRobustNess
+ EdgeRobustNess
);
3245 // Answer the Q: does this template describe prefixes, or suffixes?
3246 eAffixationType CTemplate::DetermineAffixationSide()
3248 float threshold = 2;
3249 int MinimumNumberOfStems = 3;
3250 int ColumnNumber = 0;
3251 int MinimumStemLength = 4;
3253 if ( m_NumberOfColumns != 2 ) return TYPE_Unknown;
3255 //#############################################################/
3256 // initial templates have only 1 column with more than 1 entry; deal with them first
3257 int NumberOfColumnsWithMoreThan1Entry = 0;
3258 for (int i = 0; i < m_NumberOfColumns; i++)
3259 { if ( m_Columns[i]->Size() > 1 )
3260 NumberOfColumnsWithMoreThan1Entry ++;
3263 //###########################################################//
3264 if ( NumberOfColumnsWithMoreThan1Entry == 1 )
3266 int OtherColumn = 2 - ColumnNumber;
3268 //###########################################################//
3273 // We consider regular templates, with more than one column containing more than 1 entry
3274 float N1 = (float) m_Columns[0]->GetLength();
3275 float N2 = (float) m_Columns[1]->GetLength();
3278 if ( N1/N2 > threshold && m_Columns[0]->Size() >= MinimumNumberOfStems )
3282 if ( N2/N1 > threshold && m_Columns[1]->Size() >= MinimumNumberOfStems)
3286 return TYPE_Unknown;
3289 //#########################################################//
3291 return TYPE_Unknown;
3294 void CTemplate::AddToCollections ( eAffixationType ThisType, CStemCollection* Stems, CAffixCollection* Affixes, CSignatureCollection* Signatures)
3301 CStringSurrogate ssPiece;
3302 CString WordBoundary("#"),
3304 // Right now, there is a spurious '#' at the beginning of words, so we have to
3305 // remove this from each prefix and prefix signature. The # should be removed,
3306 // and this code simplified, so that prefixes are just like suffixes.
3308 if (ThisType == TYPE_Prefix)
3310 for (int i = 1; i <= m_Columns[0]->Size(); i++)
3312 Sig1 = m_Columns[0]->GetAt_SS(i);
3313 for (int i=1; i <= Sig1.Size(); i++)
3315 ssPiece = Sig1.GetAt_SS(i);
3316 if ( ssPiece == WordBoundary )
3318 Sig2.AppendInAlphabeticalOrder (Null );
3321 else if ( ssPiece[0] == '#' )
3323 ssPiece = ssPiece.Mid(1);
3324 Sig2.AppendInAlphabeticalOrder ( ssPiece );
3326 *Affixes << ssPiece;
3328 qSig = *Signatures << &Sig2;
3329 qSig->SetAffixLocation ( WORD_INITIAL ); //this is probably no longer necessary; todo
3331 for (i = 1; i <= m_Columns[1]->Size(); i++)
3333 pStem = *Stems << m_Columns[1]->GetAt_SS(i);
3334 qSig->GetStemPtrList()->AddTail(pStem);
3335 pStem->SetSuffixSignature (qSig);
3340 else if ( ThisType == TYPE_Suffix)
3342 pSig = *Signatures << m_Columns[1];
3344 for (int i = 1; i <= m_Columns[1]->Size(); i++)
3346 *Affixes << m_Columns[1]->GetAt_SS(i);
3348 for (i = 1; i <= m_Columns[0]->Size(); i++)
3350 pStem = *Stems << m_Columns[0]->GetAt_SS(i);
3351 pSig->GetStemPtrList()->AddTail(pStem);
3352 pStem->SetSuffixSignature (pSig);
3368 //////////////////////////////////////////////////////////////////
3369 ////// CStateListViewItem Implementation Section
3371 CTemplateListViewItem::CTemplateListViewItem(Q3ListView
*parent
,
3372 CTemplate
* templ
, QString label
)
3373 : Q3ListViewItem(parent
, label
),
3375 m_MaxMumShownMorphemes(4) { }
3377 CTemplateListViewItem::CTemplateListViewItem(Q3ListViewItem
* parent
,
3378 CTemplate
* templ
, QString label
)
3379 : Q3ListViewItem(parent
, label
),
3381 m_MaxMumShownMorphemes(4) { }
3383 QString
CTemplateListViewItem::key( int column
, bool ascending
) const
3387 QString QSstring
= "";
3388 int NumberOfMorphemes
;
3394 return QString("%1").arg(m_Template
->m_TemplateNumber
,10);
3396 return QString("%1").arg(m_Template
->m_NumberOfColumns
,10);
3398 return QString("%1").arg(m_Template
->m_NewSortComplexity
, 10);
3401 if (m_Template
-> m_Columns
[0] )
3403 NumberOfMorphemes
= m_Template
->m_Columns
[0]->Size() ;
3407 NumberOfMorphemes
= 0;
3409 return QString("%1").arg(NumberOfMorphemes
,10);
3413 if (m_Template
-> m_Columns
[1] )
3415 NumberOfMorphemes
= m_Template
->m_Columns
[1]->Size() ;
3419 NumberOfMorphemes
= 0;
3421 return QString("%1").arg(NumberOfMorphemes
,10);
3425 if (m_Template
->m_NumberOfColumns
>= 3 )
3427 NumberOfMorphemes
= m_Template
->m_Columns
[2]->Size() ;
3431 NumberOfMorphemes
= 0;
3433 return QString("%1").arg(NumberOfMorphemes
,10);
3437 if (m_Template
->m_NumberOfColumns
>= 4 )
3439 NumberOfMorphemes
= m_Template
->m_Columns
[3]->Size() ;
3443 NumberOfMorphemes
= 0;
3445 return QString("%1").arg(NumberOfMorphemes
,10);
3448 return Q3ListViewItem::text( column
);
3451 else return Q3ListViewItem::key( column
, ascending
);
3456 QString
CTemplateListViewItem::text( int column
) const
3461 QString QSstring
= "";
3463 int NumberOfMorphemes
;
3464 int NumberOfDisplayedMorphemes
;
3466 // int PrintedMorphemes=0;
3467 QString QSOneMorpheme
, QSReversedOneMorpheme
;
3468 CStringSurrogate SSOneMorpheme
;
3469 // const char* CCDebugString1, *CCDebugString2;
3475 return QString("%1").arg( m_Template
->m_TemplateNumber
);
3477 return QString("%1").arg( m_Template
->m_NumberOfColumns
);
3479 return QString("%1").arg( m_Template
->m_NewSortComplexity
);
3483 if ( m_Template
->m_NumberOfColumns
< 1)
3488 NumberOfMorphemes
= m_Template
->m_Columns
[0] ->Size() ;
3490 NumberOfDisplayedMorphemes
= m_MaxMumShownMorphemes
;
3492 if(NumberOfMorphemes
<= m_MaxMumShownMorphemes
)
3494 NumberOfDisplayedMorphemes
= NumberOfMorphemes
;
3498 for( i
= 1; i
<= NumberOfDisplayedMorphemes
; i
++ )
3500 QSstring
.append( m_Template
->m_Columns
[0] ->GetPiece(i
).Display() + ", ");
3502 QSstring
= QSstring
.left( QSstring
.length() - 2 );
3512 if ( m_Template
->m_NumberOfColumns
< 2)
3517 NumberOfMorphemes
= m_Template
->m_Columns
[1] ->Size() ;
3519 NumberOfDisplayedMorphemes
= m_MaxMumShownMorphemes
;
3521 if(NumberOfMorphemes
<= m_MaxMumShownMorphemes
)
3523 NumberOfDisplayedMorphemes
= NumberOfMorphemes
;
3527 for( i
= 1; i
<= NumberOfDisplayedMorphemes
; i
++ )
3529 QSstring
.append( m_Template
->m_Columns
[1] ->GetPiece(i
).Display() + ", ");
3531 QSstring
= QSstring
.left( QSstring
.length() - 2 );
3541 if ( m_Template
->m_NumberOfColumns
< 3)
3546 NumberOfMorphemes
= m_Template
->m_Columns
[2] ->Size() ;
3548 NumberOfDisplayedMorphemes
= m_MaxMumShownMorphemes
;
3550 if(NumberOfMorphemes
<= m_MaxMumShownMorphemes
)
3552 NumberOfDisplayedMorphemes
= NumberOfMorphemes
;
3556 for( i
= 1; i
<= NumberOfDisplayedMorphemes
; i
++ )
3558 QSstring
.append( m_Template
->m_Columns
[2] ->GetPiece(i
).Display() + ", ");
3560 QSstring
= QSstring
.left( QSstring
.length() - 2 );
3570 if ( m_Template
->m_NumberOfColumns
< 4)
3575 NumberOfMorphemes
= m_Template
->m_Columns
[3] ->Size() ;
3577 NumberOfDisplayedMorphemes
= m_MaxMumShownMorphemes
;
3579 if(NumberOfMorphemes
<= m_MaxMumShownMorphemes
)
3581 NumberOfDisplayedMorphemes
= NumberOfMorphemes
;
3585 for( i
= 1; i
<= NumberOfDisplayedMorphemes
; i
++ )
3587 QSstring
.append( m_Template
->m_Columns
[3] ->GetPiece(i
).Display() + ", ");
3589 QSstring
= QSstring
.left( QSstring
.length() - 2 );
3599 return Q3ListViewItem::text( column
);
3602 else return Q3ListViewItem::text( column
);