1 // Implementation of CAffix methods
2 // Copyright © 2009 The University of Chicago
5 #include "MiniLexicon.h"
10 // construction/destruction.
12 /// skeleton of an affix object owned by mini.
13 /// one should probably initialize the underlying CLParse object with
14 /// CLParse methods afterwards.
15 CAffix::CAffix(CMiniLexicon
* mini
)
19 m_StemPtrList(new QList
<CStem
*>),
20 m_CompressedLength(0.0),
21 m_Deletees(), // initialized below
22 m_Morphees(), // initialized below
23 m_LengthOfPointerToMe(0.0),
24 m_PhonologicalInformationContent(0.0),
25 m_UnigramLogProb(0.0),
28 m_Deletees
.Alphabetize();
29 m_Morphees
.Alphabetize();
32 /// affix object owned by mini, with underlying string str
33 /// Copies in the string str, so it’s okay if str becomes invalid later.
34 CAffix::CAffix(const CStringSurrogate
& str
,
39 m_StemPtrList(new QList
<CStem
*>),
40 m_CompressedLength(0.0),
41 m_Deletees(), // initialized below
42 m_Morphees(), // initialized below
43 m_LengthOfPointerToMe(0.0),
44 m_PhonologicalInformationContent(0.0),
45 m_UnigramLogProb(0.0),
48 m_Deletees
.Alphabetize();
49 m_Morphees
.Alphabetize();
52 CAffix::~CAffix() { delete m_StemPtrList
; }
54 //-------------------------------------------
55 // Public mutator/accessor methods
56 //-------------------------------------------
59 // Increment the count of how many times this
62 // n - the amount to increment
65 Increment the number of stems associated with this CAffix.
67 void CAffix::IncrementUseCount (int n
)
70 Q_ASSERT ( m_UseCount
> 0);
71 Q_ASSERT ( m_UseCount
< 1000000 );
75 // Append a new stem to the string without
76 // adding it to the list of stem pointers
79 // Stem - the new stem
82 Append a new CStringSurrogate stem representation to the list of stems. <kbd>Stem</kbd>
83 is a surrogate of the string to be added.
85 void CAffix::AppendToStemString(const CStringSurrogate
& Stem
)
88 if( !m_StemString
.Contains( Stem
) ) m_StemString
.Append(Stem
);
92 // Add a stem to the list of stems and append
96 // pStem - the stem to be added
99 Add a new CStem pointer to the list of stems. <kbd>pStem</kbd> is a pointer
100 to the stem that will be added.
102 void CAffix::AddStem(CStem
* pStem
)
104 if( ! m_StemPtrList
->contains(pStem
) )
106 m_StemPtrList
->append(pStem
);
107 AppendToStemString( pStem
->GetKey() );
112 // Remove a stem from the list of stem pointers
115 // pStemToRemove - pointer to the stem that
116 // will be removed from the list
119 Remove a CStem pointer from the list of stems. <kbd>pStemToRemove</kbd> is
120 a pointer to the stem that will be removed.
122 void CAffix::RemoveFromStemPtrList(CStem
* pStemToRemove
)
124 m_StemPtrList
->removeAll(pStemToRemove
);
128 // Remove a stem from the string without attempting
129 // to remove it from the pointer list
132 // ssStem - the stem to be removed
135 Remove a CStringSurrogate stem representation from the list of stems. <kbd>ssStem</kbd>
136 is the surrogate string to be removed.
138 void CAffix::RemoveStemString(const CStringSurrogate
& ssStem
)
140 m_StemString
.Remove( ssStem
);
145 Add a CStringSurrogate deletee. Deletees are substrings that are deleted
146 from an allomorph. <kbd>ssDeletee</kbd> is a surrogate representation
149 void CAffix::AddDeletee( CStringSurrogate ssDeletee
)
151 if( !m_Deletees
.Contains( ssDeletee
) )
153 m_Deletees
.Append( ssDeletee
);
158 Add a CStringSurrogate morphee pair. Morphees are substrings that are
159 different in an allomorph. <kbd>y</kbd> is the substring that is changed.
160 <kbd>i</kbd> is the substring <i>y</i> is changed into.
162 void CAffix::AddMorphee ( CStringSurrogate y
, CStringSurrogate i
)
165 New
= y
.Display() + "\\" + i
.Display();
166 if( !m_Morphees
.Contains( New
) )
168 m_Morphees
.Append (New
);
171 QString
CAffix::ExpressAffix( bool ExpressDeletees
)
179 if ( ExpressDeletees
&& m_Deletees
.GetKeyLength() > 0 )
182 Outstring
+= m_Deletees
.Display();
185 if ( ExpressDeletees
&& m_Morphees
.GetKeyLength() > 0 )
187 QString strMorphees
= m_Morphees
.Display();
188 CSS
ssMorphees( strMorphees
);
190 Outstring
+= ssMorphees
.Display();
194 Outstring
+= GetKey().Display() ;
199 double CAffix::GetLengthOfPointerToMe() // problem here jan 1 2010
201 if (m_LengthOfPointerToMe
<= 0)
203 bool CORPUS_BASED_AFFIX_COUNT
= m_pMyMini
->GetIntParameter( "SignatureDL\\CorpusBasedAffixCount", 0 );
204 if ( m_pMyMini
->GetCorpusCount() > 0 && GetCorpusCount() > 0 ) {
205 if (CORPUS_BASED_AFFIX_COUNT
)
207 m_LengthOfPointerToMe
= base2log ( m_pMyMini
->GetCorpusCount () / GetCorpusCount() );
211 m_LengthOfPointerToMe
= base2log ( m_pMyMini
->GetCorpusCount () / GetCorpusCount() );
215 return m_LengthOfPointerToMe
;
219 double CAffix::GetPhonologicalInformationContent()
221 if (m_PhonologicalInformationContent
== 0)
223 bool CORPUS_BASED_AFFIX_COUNT
= m_pMyMini
->GetIntParameter( "SignatureDL\\CorpusBasedAffixCount", 0 );
224 if (CORPUS_BASED_AFFIX_COUNT
)
226 m_PhonologicalInformationContent
= base2log ( m_pMyMini
->GetCorpusCount () / GetCorpusCount() );
230 m_PhonologicalInformationContent
= base2log ( m_pMyMini
->GetCorpusCount () / GetCorpusCount() );
233 return m_PhonologicalInformationContent
;
237 void CAffix::CalculatePhonologicalInformationContent ( CLexicon
* Lexicon
)
239 if (m_BigramLogProb
> 0)
241 m_PhonologicalInformationContent
= m_BigramLogProb
;
245 m_PhonologicalInformationContent
= ComputeDL( Lexicon
->GetNumberOfCharacterTypes() );