1 // Implementation of CCompound, CCompoundListViewItem methods
2 // Copyright © 2009 The University of Chicago
10 #include "CompoundCollection.h"
11 #include "LinkerCollection.h"
12 #include "StringFunc.h"
15 CCompoundListViewItem::CCompoundListViewItem( Q3ListView
*parent
,
20 QString mostFreqPiece
,
26 StringToString
* filter
)
27 : Q3ListViewItem( parent
, compound
)
29 m_compound
= pCompound
;
32 m_mostFreqPiece
= mostFreqPiece
;
33 m_MFPCount
= MFPCount
;
34 m_pieceCounts
= pieceCounts
;
35 m_prefixness
= prefixness
;
36 m_suffixness
= suffixness
;
37 m_parseCount
= parseCount
;
42 CCompoundListViewItem::CCompoundListViewItem( Q3ListViewItem
*parent
,
47 QString mostFreqPiece
,
53 StringToString
* filter
)
54 : Q3ListViewItem( parent
, compound
)
56 m_compound
= pCompound
;
59 m_mostFreqPiece
= mostFreqPiece
;
60 m_MFPCount
= MFPCount
;
61 m_pieceCounts
= pieceCounts
;
62 m_prefixness
= prefixness
;
63 m_suffixness
= suffixness
;
64 m_parseCount
= parseCount
;
69 QString
CCompoundListViewItem::key(int col
, bool asc
) const
73 return QString("%1").arg(m_parseCount
, 10);
75 return QString("%1").arg(static_cast<int>(
76 1000 * m_MFPCount
), 10);
78 return QString("%1").arg(static_cast<int>(
79 1000 * m_prefixness
), 10);
81 return QString("%1").arg(static_cast<int>(
82 1000 * m_suffixness
), 10);
84 return Q3ListViewItem::key(col
, asc
);
88 QString
CCompoundListViewItem::text(int col
) const
92 return QString::number(m_parseCount
);
96 return Filter(m_filter
, m_mostFreqPiece
);
100 return QString::number(m_prefixness
, 'f', 4);
104 return QString::number(m_suffixness
, 'f', 4);
106 return Q3ListViewItem::text(col
);
110 CCompound::CCompound( CMiniLexicon
* mini
) : CLParse( mini
)
112 m_MyComponents
= new Components();
113 m_MyLinkers
= new Linkers();
121 CCompound::CCompound ( const CStringSurrogate
& SS
, CMiniLexicon
* mini
) : CLParse (SS
, mini
)
123 m_MyComponents
= new Components();
124 m_MyLinkers
= new Linkers();
131 CCompound::~CCompound()
133 if( m_MyComponents
) delete m_MyComponents
;
134 if( m_MyLinkers
) delete m_MyLinkers
;
135 if( m_Parses
) delete m_Parses
;
140 void CCompound::CompoundListDisplay( Q3ListView
* List
, StringToString
* filter
, QChar separator
)
150 highest_prefixness
= 0.0,
151 highest_suffixness
= 0.0;
156 QStringList pieceCounts
;
157 QString mostFreqStem
;
165 int MINIMUM_STEM_LENGTH
= m_pLexicon
->GetIntParameter( "Main\\MinimumStemLength", 3 );
167 CCompoundListViewItem
* parent
, * item
;
174 if( m_Parses
->count() > 1 )
176 parent
= new CCompoundListViewItem( List
, Display( filter
), this, -1, 0.0, QString::null
, 0.0, QString::null
, 0.0, 0.0, m_Parses
->count(), filter
);
177 parent
->setOpen( TRUE
);
180 for( pEdge
= m_Parses
->first(); pEdge
; pEdge
= m_Parses
->next() )
181 //for (int z = 0; z < m_Parses->size(); z++)
182 { // pEdge = m_Parses->at(z);
183 pEdge
->GetParse( &parse
);
191 for( i
= 1; i
<= parse
.Size(); i
++ )
196 if( parse
.GetPiece(i
).Display().length() >= MINIMUM_STEM_LENGTH
)
198 pStemSet
= m_pLexicon
->GetAllStems()->find( parse
.GetPiece(i
).Display() );
199 if( !pStemSet
) pStemSet
= m_pLexicon
->GetAllWords()->find( parse
.GetPiece(i
).Display() );
202 if( !pStemSet
) pLinker
= *m_pLexicon
->GetLinkers() ^= parse
.GetPiece(i
);
204 if( pStemSet
|| pLinker
)
208 usage
= 2.0; // This gives us the identity (when 1 is subtracted) for linking elements, we don't want to count them
209 pieceCounts
.append( QString("%1").arg( pLinker
->GetCompoundCount(), 0, 'f', 1 ) );
213 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
214 for (int z
= 0; z
< pStemSet
->size(); z
++)
215 { pStem
= pStemSet
->at(z
);
216 usage
= pStem
->GetCompoundCount();
217 pieceCounts
.append( QString("%1").arg( usage
, 0, 'f', 1 ) );
219 if( usage
== 0.0 ) continue;
221 if( usage
> MFPCount
)
223 mostFreqStem
= pStem
->Display();
237 score
*= usage
- 1.0;
239 if( pStemSet
&& i
== 1 ) prefixness
= pStemSet
->at(0)->GetAffixness();
240 if( pStemSet
&& i
== parse
.Size() ) suffixness
= pStemSet
->at(0)->GetAffixness();
243 if( MFPCount
> mostMFPCount
) parent
->SetMFSCount( MFPCount
);
244 if( score
> bestScore
) parent
->SetScore( score
);
245 if( prefixness
> highest_prefixness
) parent
->SetPrefixness( prefixness
);
246 if( suffixness
> highest_suffixness
) parent
->SetSuffixness( suffixness
);
248 if( parseNumber
== GetBestParse() )
250 item
= new CCompoundListViewItem( parent
, "*" + parse
.Display( separator
, filter
),
251 this, index
++, score
, mostFreqStem
, MFPCount
,
252 pieceCounts
.join(", "),
253 prefixness
, suffixness
, 1, filter
);
257 item
= new CCompoundListViewItem( parent
, parse
.Display( separator
, filter
),
258 this, index
++, score
, mostFreqStem
, MFPCount
,
259 pieceCounts
.join(", "),
260 prefixness
, suffixness
, 1, filter
);
267 m_Parses
->first()->GetParse( &parse
);
270 for( i
= 1; i
<= parse
.Size(); i
++ )
276 if( parse
.GetPiece(i
).Display().length() >= MINIMUM_STEM_LENGTH
)
278 pStemSet
= m_pLexicon
->GetAllStems()->find( parse
.GetPiece(i
).Display() );
279 if( !pStemSet
) pStemSet
= m_pLexicon
->GetAllWords()->find( parse
.GetPiece(i
).Display() );
282 if( !pStemSet
) pLinker
= *m_pLexicon
->GetLinkers() ^= parse
.GetPiece(i
);
284 if( pStemSet
|| pLinker
)
288 usage
= 2.0; // This gives us the identity (when 1 is subtracted) for linking elements, we don't want to count them
289 pieceCounts
.append( QString("%1").arg( pLinker
->GetCompoundCount(), 0, 'f', 1 ) );
293 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
294 for (int z
= 0; z
< pStemSet
->size(); z
++)
295 { pStem
= pStemSet
->at(z
);
296 usage
= pStem
->GetCompoundCount();
297 pieceCounts
.append( QString("%1").arg( usage
, 0, 'f', 1 ) );
299 if( usage
== 0.0 ) continue;
301 if( usage
> MFPCount
)
303 mostFreqStem
= pStem
->Display();
317 score
*= usage
- 1.0;
319 if( pStemSet
&& i
== 1 ) prefixness
= pStemSet
->at(0)->GetAffixness();
320 if( pStemSet
&& i
== parse
.Size() ) suffixness
= pStemSet
->at(0)->GetAffixness();
323 m_Parses
->first()->GetParse( &parse
);
324 item
= new CCompoundListViewItem( List
, parse
.Display( separator
, filter
),
325 this, index
++, score
, mostFreqStem
, MFPCount
,
326 pieceCounts
.join(", "),
327 prefixness
, suffixness
, m_Parses
->count(), filter
);
332 item
= new CCompoundListViewItem( List
, Display( separator
, filter
), this );
337 void CCompound::DetachAllPieces()
341 // Not necessary unless we have some tracking of compounds going on in
342 // the stems, which we don't ... yet. (TODO : this + corpus count?)
343 m_MyComponents
->clear();
345 Linkers::Iterator it
;
346 for( it
= m_MyLinkers
->begin(); it
!= m_MyLinkers
->end(); ++it
)
350 pLinker
->IncrementCorpusCount( -1 * GetCorpusCount() );
352 // The linker is not deleted because it may be part of another
353 // parse of the same compound
355 m_MyLinkers
->clear();
359 void CCompound::SetBestParse(int i
)
361 if (i
>= 0 && static_cast<unsigned int>(i
) >= m_Parses
->count())
368 const int MINIMUM_STEM_LENGTH
= m_pLexicon
->GetIntParameter(
369 "Main\\MinimumStemLength", 3);
371 CEdge
* pEdge
= m_Parses
->at(i
);
373 pEdge
->GetParse(&oneParse
);
374 this->Collapse(CStringSurrogate(oneParse
.Display('.')), '.');
376 // Detach components from stems and linkers
379 // Attach components to stems and linkers
380 for (int j
= 1; j
<= Size(); ++j
) {
381 CStringSurrogate piece_surrogate
= GetPiece(j
);
382 QString piece
= piece_surrogate
.Display();
384 QList
<CStem
*>* pStemSet
= 0;
386 if (oneParse
.GetPiece(j
).Display().size() >=
387 MINIMUM_STEM_LENGTH
) {
388 pStemSet
= m_pLexicon
->GetAllStems()->find(piece
);
390 pStemSet
= m_pLexicon
->GetAllWords()->find(
395 m_MyComponents
->insert(j
, pStemSet
);
396 // XXX. corpus count?
398 // it is a linker element
399 if (CLinker
* pLinker
= *m_pLexicon
->GetLinkers() ^=
401 m_MyLinkers
->insert(j
, pLinker
);
402 pLinker
->IncrementCorpusCount(
410 void CCompound::SetParses( Q3PtrList
<CEdge
>* parses
, double* pComponentCount
, double* pLinkerCount
)
420 // unused variable 'componentCoun'
421 // double componentCount = 0.0;
423 ComponentMap
* allComponents
= m_pLexicon
->GetCompounds()->GetComponentMap();
425 int MINIMUM_STEM_LENGTH
= m_pLexicon
->GetIntParameter( "Main\\MinimumStemLength", 3 );
427 // Detach components, stems, and linkers
430 for( pEdge
= m_Parses
->first(); pEdge
; pEdge
= m_Parses
->next() )
431 //for (int z = 0; z < m_Parses->size(); z++)
432 { // pEdge = m_Parses->at(z);
433 pEdge
->GetParse( &oneParse
);
434 compound
= oneParse
.Display('.');
435 ssCompound
= compound
;
436 oneParse
.Collapse( ssCompound
, '.' );
438 for( int j
= 1; j
<= oneParse
.Size(); j
++ )
441 if( allComponents
->find( oneParse
.GetPiece(j
).Display() ) != allComponents
->end() )
443 pStemSet
= allComponents
->find( oneParse
.GetPiece(j
).Display() ).data();
448 // TODO: remove stem to compound links (these don't exist yet)
449 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
450 for (int z
= 0; z
< pStemSet
->size(); z
++)
451 { pStem
= pStemSet
->at(z
);
452 pStem
->IncrementCompoundCount(
453 -double(GetCorpusCount()) /
455 if( pStem
->GetCompoundCount() <= 0.0 )
457 allComponents
->remove( oneParse
.GetPiece(j
).Display() );
458 pStem
->SetCompoundCount( 0.0 );
465 // It is a linker element
466 pLinker
= *m_pLexicon
->GetLinkers() ^= oneParse
.GetPiece(j
);
470 pLinker
->RemoveCompound( this );
471 pLinker
->IncrementCompoundCount(
474 pLinker
->IncrementCorpusCount(
481 if( pLinker
->GetCompoundCount() <= 0.0 )
483 m_pLexicon
->GetLinkers()->RemoveMember( pLinker
);
492 if( m_Parses
) delete m_Parses
;
495 for( pEdge
= m_Parses
->first(); pEdge
; pEdge
= m_Parses
->next() )
496 //for (int z = 0; z < m_Parses->size(); z++)
497 { //pEdge = m_Parses->at(z);
498 pEdge
->GetParse( &oneParse
);
500 compound
= oneParse
.Display('.');
501 ssCompound
= compound
;
502 oneParse
.Collapse( ssCompound
, '.' );
504 // Attach components, stems, and linkers
505 for( int j
= 1; j
<= oneParse
.Size(); j
++ )
509 if( allComponents
->find( oneParse
.GetPiece(j
).Display() ) != allComponents
->end() )
511 pStemSet
= allComponents
->find( oneParse
.GetPiece(j
).Display() ).data();
513 else if( oneParse
.GetPiece(j
).Display().length() >= MINIMUM_STEM_LENGTH
)
515 pStemSet
= m_pLexicon
->GetAllStems()->find( oneParse
.GetPiece(j
).Display() );
517 if( !pStemSet
) pStemSet
= m_pLexicon
->GetAllWords()->find( oneParse
.GetPiece(j
).Display() );
519 if( pStemSet
) allComponents
->insert( oneParse
.GetPiece(j
).Display(), pStemSet
);
524 m_MyComponents
->insert( j
, pStemSet
);
525 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
526 for (int z
= 0; z
< pStemSet
->size(); z
++)
528 pStem
= pStemSet
->at(z
);
529 pStem
->IncrementCompoundCount(
530 double(GetCorpusCount()) /
535 double(1.0) / m_Parses
->count();
539 // It is a linker element
540 pLinker
= *m_pLexicon
->GetLinkers() << oneParse
.GetPiece(j
);
544 m_MyLinkers
->insert( j
, pLinker
);
545 pLinker
->AddCompound( this );
546 pLinker
->IncrementCompoundCount(
549 pLinker
->IncrementCorpusCount(
552 double(GetCorpusCount()) /
554 *pLinkerCount
+= double(1.0) /
563 void CCompound::AddParse( CEdge
* pEdge
)
575 int MINIMUM_STEM_LENGTH
= m_pLexicon
->GetIntParameter( "Main\\MinimumStemLength", 3 );
577 m_Parses
->append( pEdge
);
579 for( qEdge
= m_Parses
->first(); qEdge
; qEdge
= m_Parses
->next() )
580 //for (int z =0; z < m_Parses->size(); z++)
581 { //qEdge = m_Parses->at(z);
582 qEdge
->GetParse( &oneParse
);
583 compound
= oneParse
.Display('.');
584 ssCompound
= compound
;
585 oneParse
.Collapse( ssCompound
, '.' );
587 // Attach components, stems, and linkers
588 // Recalculate counts
589 for( int j
= 1; j
<= oneParse
.Size(); j
++ )
593 if( oneParse
.GetPiece(j
).Display().length() >= MINIMUM_STEM_LENGTH
)
595 pStemSet
= m_pLexicon
->GetAllStems()->find( oneParse
.GetPiece(j
).Display() );
597 if( !pStemSet
) pStemSet
= m_pLexicon
->GetAllWords()->find( oneParse
.GetPiece(j
).Display() );
604 m_MyComponents
->insert( j
, pStemSet
);
605 // TODO: add stem to compound links
609 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
610 for (int z
= 0; z
< pStemSet
->size(); z
++)
611 { pStem
= pStemSet
->at(z
);
612 pStem
->IncrementCompoundCount(
613 -double(GetCorpusCount()) /
614 (double(m_Parses
->count()) -
619 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
620 for (int z
= 0; z
< pStemSet
->size(); z
++)
621 { pStem
= pStemSet
->at(z
);
622 pStem
->IncrementCompoundCount(
623 double(GetCorpusCount()) /
629 // It is a linker element
630 pLinker
= *m_pLexicon
->GetLinkers() << oneParse
.GetPiece(j
);
636 m_MyLinkers
->insert( j
, pLinker
);
637 pLinker
->AddCompound( this );
641 pLinker
->IncrementCompoundCount(
643 (double(m_Parses
->count()) -
645 pLinker
->IncrementCorpusCount(
648 -double(GetCorpusCount()) /
649 (double(m_Parses
->count()) -
653 pLinker
->IncrementCompoundCount(
658 pLinker
->IncrementCorpusCount(
661 double(GetCorpusCount()) /
670 bool CCompound::RemoveParse( CEdge
* pEdge
)
680 if( !pEdge
) return FALSE
;
682 int MINIMUM_STEM_LENGTH
= m_pLexicon
->GetIntParameter( "Main\\MinimumStemLength", 3 );
684 int pos
= m_Parses
->find( pEdge
);
685 if( pos
< 0 ) return FALSE
;
687 for( qEdge
= m_Parses
->first(); qEdge
; qEdge
= m_Parses
->next() )
688 //for (int z = 0; z < m_Parses->size(); z++)
690 // qEdge = m_Parses->at(z);
691 qEdge
->GetParse( &oneParse
);
692 compound
= oneParse
.Display('.');
693 ssCompound
= compound
;
694 oneParse
.Collapse( ssCompound
, '.' );
696 // Attach components, stems, and linkers
697 // Recalculate counts
698 for( int j
= 1; j
<= oneParse
.Size(); j
++ )
702 if( oneParse
.GetPiece(j
).Display().length() >= MINIMUM_STEM_LENGTH
)
704 pStemSet
= m_pLexicon
->GetAllStems()->find( oneParse
.GetPiece(j
).Display() );
706 if( !pStemSet
) pStemSet
= m_pLexicon
->GetAllWords()->find( oneParse
.GetPiece(j
).Display() );
713 // TODO: remove stem to compound links
717 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
718 for (int y
= 0; y
< pStemSet
->size(); y
++)
719 { pStem
= pStemSet
->at(y
);
720 pStem
->IncrementCompoundCount(
721 double(GetCorpusCount()) /
722 (double(m_Parses
->count()) -
727 //for( pStem = pStemSet->first(); pStem; pStem = pStemSet->next() )
728 for (int w
= 0; w
< pStemSet
->size(); w
++)
729 { pStem
= pStemSet
->at(w
);
730 pStem
->IncrementCompoundCount(
731 -double(GetCorpusCount()) /
737 // It is a linker element
738 pLinker
= *m_pLexicon
->GetLinkers() << oneParse
.GetPiece(j
);
744 pLinker
->RemoveCompound( this );
748 pLinker
->IncrementCompoundCount(
750 (double(m_Parses
->count()) -
752 pLinker
->IncrementCorpusCount(
754 double(GetCorpusCount()) /
755 (double(m_Parses
->count()) -
759 pLinker
->IncrementCompoundCount(
760 -double(1.0) / m_Parses
->count());
761 pLinker
->IncrementCorpusCount(
764 -double(GetCorpusCount()) /
767 if( pLinker
->GetCompoundCount() <= 0.0 )
769 m_pLexicon
->GetLinkers()->RemoveMember( pLinker
);
776 m_Parses
->remove( qEdge
) ; //@@@@ check that this is right -- JG
778 if( pos
== m_BestParse
) SetBestParse(-1);
784 StemSet
* CCompound::GetComponent( int i
) const
786 if( m_MyComponents
->find(i
) == m_MyComponents
->end() ) return NULL
;
787 return m_MyComponents
->find(i
).data();
791 CLinker
* CCompound::GetLinker( int i
) const
793 if( m_MyLinkers
->find(i
) == m_MyLinkers
->end() ) return NULL
;
794 return m_MyLinkers
->find(i
).data();
798 double CCompound::GetPrefixness()
800 StemSet
* compound
= GetComponent(0);
801 if( compound
) return compound
->first()->GetAffixness();
806 double CCompound::GetSuffixness()
808 StemSet
* compound
= GetComponent( m_MyComponents
->count() - 1 );
809 if( compound
) return compound
->first()->GetAffixness();
814 QString
CCompound::DisplayParse( int i
, StringToString
* filter
)
816 CEdge
* pEdge
= m_Parses
->at(i
);
817 if( !pEdge
) return "";
819 return pEdge
->DisplayParse( filter
);