1 // Implementation of CAlignment methods
2 // Copyright © 2009 The University of Chicago
4 #include "StringEditGrid.h"
5 #include "StringSurrogate.h"
8 // construction/destruction.
10 CAlignment::CAlignment(const QString String1
, const QString String2
)
11 : m_Str1(new CParse(CStringSurrogate(QString('#') + String1
))),
12 m_Str2(new CParse(CStringSurrogate(QString('#') + String2
))),
13 m_Length1(String1
.size() + 1),
14 m_Length2(String2
.size() + 1),
15 m_Match1(new int[m_Length1
]), // filled below
16 m_Match2(new int[m_Length2
]), // filled below
22 std::fill(&m_Match1
[0], &m_Match1
[m_Length1
], -5);
23 std::fill(&m_Match2
[0], &m_Match2
[m_Length2
], -5);
26 CAlignment::CAlignment(CParse
* Parse1
, CParse
* Parse2
)
27 : m_Str1(), // initialized below
28 m_Str2(), // initialized below
29 m_Length1(Parse1
->GetKeyLength() + 1),
30 m_Length2(Parse2
->GetKeyLength() + 1),
31 m_Match1(new int[m_Length1
]), // filled below
32 m_Match2(new int[m_Length2
]), // filled below
39 m_Str1
= new CParse(QChar('#'));
40 m_Str1
->Append(*Parse1
);
41 m_Str1
->SimplifyParseStructure();
44 m_Str2
= new CParse(QChar('#'));
45 m_Str2
->Append(*Parse2
);
46 m_Str2
->SimplifyParseStructure();
48 std::fill(&m_Match1
[0], &m_Match1
[m_Length1
], -5);
49 std::fill(&m_Match2
[0], &m_Match2
[m_Length2
], -5);
52 CAlignment::CAlignment(const CAlignment
* other
)
53 : m_Str1(new CParse(*other
->m_Str1
)),
54 m_Str2(new CParse(*other
->m_Str2
)),
55 m_Length1(other
->m_Length1
),
56 m_Length2(other
->m_Length2
),
57 m_Match1(new int[m_Length1
]), // filled below
58 m_Match2(new int[m_Length2
]), // filled below
59 m_Score(other
->m_Score
),
60 m_Slips(other
->m_Slips
),
61 m_Spans(other
->m_Spans
),
62 m_Identities(other
->m_Identities
)
64 std::copy(&other
->m_Match1
[0], &other
->m_Match1
[m_Length1
],
66 std::copy(&other
->m_Match2
[0], &other
->m_Match2
[m_Length2
],
70 CAlignment::CAlignment(const CAlignment
& other
)
71 : m_Str1(new CParse(*other
.m_Str1
)),
72 m_Str2(new CParse(*other
.m_Str2
)),
73 m_Length1(other
.m_Length1
),
74 m_Length2(other
.m_Length2
),
75 m_Match1(new int[m_Length1
]), // filled below
76 m_Match2(new int[m_Length2
]), // filled below
77 m_Score(other
.m_Score
),
78 m_Slips(other
.m_Slips
),
79 m_Spans(other
.m_Spans
),
80 m_Identities(other
.m_Identities
)
82 std::copy(&other
.m_Match1
[0], &other
.m_Match1
[m_Length1
],
84 std::copy(&other
.m_Match2
[0], &other
.m_Match2
[m_Length2
],
88 CAlignment::~CAlignment()
96 CAlignment
& CAlignment::operator=(const CAlignment
& other
)
98 *m_Str1
= *other
.m_Str1
;
99 *m_Str2
= *other
.m_Str2
;
100 m_Length1
= other
.m_Length1
;
101 m_Length2
= other
.m_Length2
;
102 std::copy(&other
.m_Match1
[0], &other
.m_Match1
[m_Length1
],
104 std::copy(&other
.m_Match2
[0], &other
.m_Match2
[m_Length2
],
106 m_Score
= other
.m_Score
;
107 m_Slips
= other
.m_Slips
;
108 m_Spans
= other
.m_Spans
;
109 m_Identities
= other
.m_Identities
;
113 bool CAlignment::String1CharMatches(int n
)// true if the char is aligned and identical to matchee
116 if ( m_Match1
[n
] == -1 ) return FALSE
;
117 if ( m_Str1
->GetChar(n
) == m_Str2
->GetChar( m_Match1
[ n
] ) )
126 bool CAlignment::String2CharMatches(int n
)// true if the char is aligned and identical to matchee
129 if ( m_Match2
[n
] == -1 ) return FALSE
;
130 if ( m_Str2
->GetChar(n
) == m_Str1
->GetChar( m_Match2
[ n
] ) )
138 bool CAlignment::PerfectMatch (int n
, int m
)// true of these two chars are identical and aligned
141 if ( m_Match1
[n
] == m
&&
143 m_Str1
->GetChar(n
) == m_Str2
->GetChar(m
)
154 CParse
CAlignment::CalculateDisplay()
159 UINT index = 0; // on the page
160 int locTop = 1, locBottom = 1; // because 0's are for the word boundary symbols #
162 char Top [50], Bottom[50], Middle[50];
164 CParse TopPiece, BottomPiece;
174 while ( locTop < m_Length1 || locBottom < m_Length2 )
178 case 0: // Both match
180 while ( PerfectMatch (locTop, locBottom) &&
184 Top [ index ] = m_Str1->GetChar( locTop++ );
185 Bottom [ index ] = m_Str2->GetChar( locBottom++ );
186 Middle [ index ] = '|';
190 Bottom [ index ] = ' ';
191 Middle [ index ] = ' ';
195 if ( locTop == m_Length1
196 && locBottom == m_Length2 )
198 //----------------------------------------------//
201 if ( locTop == m_Length1 )
206 if ( locBottom == m_Length2 )
212 if (m_Match1 [ locTop ] == locBottom
214 m_Match2 [ locBottom ] == locTop
219 else if ( m_Match1 [ locTop ] == -1 )
233 while ( m_Match1 [ locTop ] == -1 )
235 TopPiece.Append( m_Str1->GetChar( locTop ) );
237 Top [ index ] = m_Str1->GetChar( locTop++ );
238 Bottom [ index ] = ' ';
239 Middle [ index ] = ' ';
243 Bottom [ index ] = ' ';
244 Middle [ index] = ' ';
248 TopPiece.Append (' ');
249 //-----------------------------------------------//
250 if ( PerfectMatch (locTop, locBottom) )
254 else if (m_Match1 [ locTop ] == locBottom
256 m_Match2 [ locBottom ] == locTop
269 while ( m_Match2 [ locBottom ] == -1 )
272 Bottom [ index ] = m_Str2->GetChar( locBottom++ );
273 Middle [ index ] = ' ';
277 Bottom [ index ] = ' ';
278 Middle [ index] = ' ';
281 //-----------------------------------------------//
282 if ( PerfectMatch (locTop, locBottom) )
287 else if (m_Match1 [ locTop ] == locBottom
289 m_Match2 [ locBottom ] == locTop
297 case 3: // Linked but not a match
299 while ( m_Match1 [ locTop ] == locBottom
301 m_Match2 [ locBottom ] == locTop
303 m_Str1->GetChar(locTop ) != m_Str2->GetChar( locBottom )
306 Top [ index] = m_Str1->GetChar( locTop++ );
307 Bottom [ index ] = m_Str2->GetChar( locBottom++ );
308 Middle [ index] = '#';
313 Bottom [ index ] = ' ';
314 Middle [ index] = ' ';
318 //----------------------------------------------//
319 if (m_Match1 [ locTop ] == locBottom
321 m_Match2 [ locBottom ] == locTop
326 else if ( m_Match1 [ locTop ] == -1 )
351 Top [ index ] = '\0';
352 Middle [ index ] = '\0';
353 Bottom [index ] = '\0';
358 Return.Append(Middle);
360 Return.Append(Bottom);
362 Return.Append ("Slips: " );
363 Return.Append( m_Slips);
364 Return.Append ("Spans: " );
365 Return.Append( m_Spans);
373 CParse
CAlignment::FindSubstitution()
374 // this assumes that there is only one part of the alignment
375 // where the two words disagree; and it returns the two
376 // pieces that disagree with each other.
379 int Str1Match
= 0, Str2Match
= 0;
381 if ( m_Slips
!= 1 ) { return Return
; }
384 for (piece
= 1; piece
<= m_Str1
->Size(); piece
++)
386 Str2Match
= Str2MatchForStr1Piece (piece
);
387 if ( Str2Match
<= 0 || m_Str1
->GetPiece(piece
) != m_Str2
->GetPiece(Str2Match
) )
389 Return
.Append( m_Str1
->GetPiece( piece
) ) ;
393 if ( Return
.Size() == 0)
395 Return
= CParse(CStringSurrogate(QString("NULL").unicode(), 0, 4));
397 //---------------------------------------------------------------//
398 for (piece
= 1; piece
<= m_Str2
->Size(); piece
++)
400 Str1Match
= Str1MatchForStr2Piece (piece
);
401 if ( Str1Match
<= 0 || m_Str2
->GetPiece(piece
) != m_Str1
->GetPiece(Str1Match
) )
403 Return
.Append( m_Str2
->GetPiece( piece
) ) ;
407 if ( Return
.Size() == 1)
409 Return
.Append( CStringSurrogate(QString("NULL").unicode(), 0, 4));
416 CParse
CAlignment::SpellOut()
419 CParse
Return ( *m_Str1
);
420 Return
.Append ( *m_Str2
);
427 CParse
CAlignment::FindContext()
429 /* This returns a parse consisting of 2 or 3 pieces; each piece is either something shared by
430 both strings of the alignment, or else it's an underscore, representing the difference(s)
431 between the string. */
437 int StartLoc1
= 0, StartLoc2
= 0;
438 int State
; // 1=matching, 2= not matching
440 if (m_Spans
> 3 ) { return Return
; }
443 if ( PerfectMatch (1,1) )
445 State
= 1; // matching
449 State
= 2; // not matching
452 while ( loc1
< m_Length1
|| loc2
< m_Length2
)
456 while ( loc1
< m_Length1
&& loc2
< m_Length2
&& PerfectMatch (loc1
, loc2
) )
462 CStringSurrogate
Piece1 ( m_Str1
->GetKeyPointer(), StartLoc1
, loc1
- StartLoc1
);
463 CStringSurrogate
Piece2 ( m_Str2
->GetKeyPointer(), StartLoc2
, loc2
- StartLoc2
);
465 if ( Piece1
.GetLength () == 0 ) { Return
.Append (CStringSurrogate (QString("NULL").unicode(), 0, 4 ) ); }
466 else { Return
.Append ( Piece1
); }
467 // yuhuask Could be ?
475 while ( loc1
< m_Length1
&& ! String1CharMatches (loc1
) )
480 while ( loc2
< m_Length2
&& !String2CharMatches (loc2
) )
485 // yuhuask Here, loc2 == match1[Loc1] or Loc2=m_Length2&&Loc1=m_Lenght1
487 Return
.Append(QChar('_'));
503 double CAlignment::FindStringEditDistance()
505 CStringEditGrid
Grid (this);
506 return Grid
.FindBestAlignment ( *this );
511 /// piece number of the piece that corresponds to piece n, or 0 if none
512 int CAlignment::Str2MatchForStr1Piece(int n
)
514 if (n
< 1 || n
> m_Str1
->Size())
517 int Str1Spot
= m_Str1
->GetPositionOfFirstCharOfThisPiece( n
);
518 int Str2Spot
= m_Match1
[ Str1Spot
];
520 if ( Str2Spot
< 0 ) { return 0; }
523 for (int i
= 1; i
<= m_Str2
->Size(); i
++)
525 if ( m_Str2
->GetPositionOfFirstCharOfThisPiece (i
) == Str2Spot
)
531 if ( Str2Piece
< 0 ) { return -1;}
539 /// piece number of the piece that corresponds to piece n, or 0 if none
540 int CAlignment::Str1MatchForStr2Piece(int n
)
542 if (n
< 1 || n
> m_Str2
->Size())
545 int Str2Spot
= m_Str2
->GetPositionOfFirstCharOfThisPiece( n
);
546 int Str1Spot
= m_Match2
[ Str2Spot
];
548 if ( Str1Spot
< 0 ) { return 0; }
551 for (int i
= 1; i
<= m_Str1
->Size(); i
++)
553 if ( m_Str1
->GetPositionOfFirstCharOfThisPiece (i
) == Str1Spot
)
559 if ( Str1Piece
< 0 ) { return -1;}