1 // Mini-lexicon’s collection of words
2 // Copyright © 2009 The University of Chicago
3 #ifndef WORDCOLLECTION_H
4 #define WORDCOLLECTION_H
6 // See the CMiniLexicon class in MiniLexicon.h for an overview of
7 // suffix/signature-based discovery of morphology.
9 #include "CollectionTemplate.h"
11 #include "StemListViewItem.h"
13 #include "AffixLocation.h"
14 template<class K
, class V
> class QMap
;
15 namespace linguistica
{ namespace ui
{ class status_user_agent
; } }
16 class CSignatureCollection
;
17 class CPhoneCollection
;
19 class CWordCollection
: public TCollection
<CStem
> {
21 enum eAffixLocation m_AffixLocation
;
22 enum CWordListViewItem::display_mode m_DisplayMode
;
23 CPhoneCollection
* m_Phones_Tier1
;
24 CPhoneCollection
* m_Phones_Tier2
;
25 CPhoneCollection
* m_Phones_Tier1_Skeleton
;
27 double m_PhonologicalContent_Unigram
;
28 double m_PhonologicalContent_Bigram
;
29 double m_Tier2_LocalMI_Score
;
30 double m_Tier2_DistantMI_Score
;
32 /// includes Tier1 bigram content
33 double m_LocalMI_TotalBoltzmannScore
;
34 double m_LocalMI_Plog
;
35 /// includes Tier1 bigram content
36 double m_DistantMI_TotalBoltzmannScore
;
37 double m_DistantMI_Plog
;
43 // construction/destruction.
45 explicit CWordCollection(CMiniLexicon
* lexicon
= 0);
50 CWordCollection(const CWordCollection
& x
);
51 CWordCollection
& operator=(const CWordCollection
& x
);
53 CStem
* operator<<(CStem
*);
54 CStem
* operator<<(CStem
&);
55 CStem
* operator<<(CStringSurrogate
&);
56 CStem
* operator<<(CParse
*);
57 CStem
* operator<<(QString
);
58 void AddPointer(CStem
*);
59 CStem
* AddToCollection(CParse
&);
60 CStem
* AddToCollection(CStringSurrogate
&);
64 bool Remove(CStem
*); ///< doesn't delete CStem*
65 bool RemoveMember(CStem
*); ///< deletes CStem*
66 bool RemoveMember(CStringSurrogate
&); ///< deletes CStem*
67 bool RemoveMember(CStringSurrogate
&, bool);
68 void DeleteMarkedMembers();
71 void AssignSignatureFromStemsAffixPointer(enum eAffixLocation
);
72 int HowManyAreAnalyzed(int& HowManyNotAnalyzed
,
73 linguistica::ui::status_user_agent
& status_display
);
75 void OutputWords(QString filename
, QMap
<QString
, QString
>* filter
);
77 void OutputWordsForTesting(QString
);
78 CPhoneCollection
* GetPhones();
79 CPhoneCollection
* GetPhones_Tier2();
80 CPhoneCollection
* GetPhones_Tier1_Skeleton();
82 void ReadWordFile(QString
);
83 void PredecessorFreq1(CStemCollection
*,
84 CPrefixCollection
*, CSignatureCollection
*,
85 enum eSuccessorFrequencyMode
, int);
86 void SuccessorFreq1(CStemCollection
*,
87 CSuffixCollection
*, CSignatureCollection
*,
88 enum eSuccessorFrequencyMode
, int);
89 void FindAllWordNeighbors(CLexicon
*);
91 void CountPhonesAndBiphones(enum eTier tier
);
93 void CreateCVTemplate();
95 void SplitPhonologyToTiers(enum CStem::ePhonologySplitType leave_slot
);
96 void CreatePhonologyFromOrthography();
97 void ComputeProbabilitiesOfWords();
98 void GetPhonologyTierInfoForGraphOfWords();
100 void ComputeBoltzmannProbabilities();
103 double ComputeZStar(); ///< Field method on tier 2
105 double GetPhonologicalContentTier1Bigrams()
106 { return m_PhonologicalContent_Bigram
; }
107 double GetPhonologicalContentUnigrams()
108 { return m_PhonologicalContent_Unigram
; }
109 double GetZ_Local() { return m_MyZ_Local
; }
110 double GetZ_Distant() { return m_MyZ_Distant
; }
112 double GetTier2_LocalMI_Score() { return m_Tier2_LocalMI_Score
; }
113 double GetLocalMI_Plog() { return m_LocalMI_Plog
; }
114 double GetTier2_DistantMI_Score() { return m_Tier2_DistantMI_Score
; }
115 double GetDistantMI_Plog() { return m_DistantMI_Plog
; }
118 #endif // WORDCOLLECTION_H