1 // Affixes and signatures in signature/suffix-based model of morphology
2 // Copyright © 2009 The University of Chicago
3 #ifndef SIGNATURECOLLECTION_H
4 #define SIGNATURECOLLECTION_H
6 // See the CMiniLexicon class in MiniLexicon.h for an overview of
7 // suffix/signature-based discovery of morphology.
9 #include "CollectionTemplate.h"
10 #include "generaldefinitions.h"
11 #include "AffixLocation.h"
12 #include "Allomorphy.h"
14 template<class K
, class V
> class QMap
;
17 class CSignatureCollection
: public TCollection
<class CSignature
> {
18 class CSuffixCollection
* MySuffixes
;
19 class CPrefixCollection
* MyPrefixes
;
21 enum eAffixLocation m_SignatureType
;
22 double m_DLofPointersToMyMembers
;
23 double m_SumOfDLofPointersInternalToEachMember
;
25 // construction/destruction.
27 CSignatureCollection();
28 CSignatureCollection(enum eAffixLocation prefix_or_suffix
);
29 CSignatureCollection(CMiniLexicon
* mini
, CSuffixCollection
* suffixes
,
30 enum eAffixLocation prefix_or_suffix
);
31 CSignatureCollection(CMiniLexicon
* mini
, CPrefixCollection
* prefixes
,
32 enum eAffixLocation prefix_or_suffix
);
34 CSignatureCollection(CMiniLexicon
* mini
);
35 ~CSignatureCollection();
39 CSignatureCollection(const CSignatureCollection
& x
);
40 CSignatureCollection
& operator=(const CSignatureCollection
& x
);
49 CSignature
* operator<<(CSignature
* sig
);
50 CSignature
* operator<<(CParse
* affixes
);
51 void AddPointer(CSignature
* sig
);
52 CSignature
* AddToCollection(CParse
& affixes
);
53 CSignature
* AddToCollection(CStringSurrogate
& dot_delimited
);
57 CSignature
* operator^=(CParse
& affixes
);
58 CSignature
* operator^=(CParse
* affixes
);
59 /// str should be a .-delimited list of affixes
60 /// result is 0 if no such signature exists
61 CSignature
* operator^=(CStringSurrogate
& str
);
62 /// *this ^= CStringSurrogate(str)
63 CSignature
* operator^=(QString str
);
67 /// doesn't delete sig
68 bool Remove(CSignature
* sig
);
70 bool RemoveMember(CSignature
* sig
);
71 /// deletes *this ^= dot_delimited
72 bool RemoveMember(CStringSurrogate
& dot_delimited
);
73 bool RemoveMember(CStringSurrogate
& dot_delimited
, bool delete_it
);
74 /// remove each item from m_DeletionArray
75 void DeleteMarkedMembers();
77 // union of contained signatures.
79 void SetMyPrefixes(CPrefixCollection
* prefixes
);
80 CPrefixCollection
* GetMyPrefixes() { return MyPrefixes
; }
81 void SetMySuffixes(CSuffixCollection
* suffixes
);
82 CSuffixCollection
* GetMySuffixes() { return MySuffixes
; }
84 // affix location (prefix or suffix).
86 eAffixLocation
GetSignatureType() { return m_SignatureType
; }
88 // Compare pairs of sigs, and identify pairs where one is exactly a suffix of the other
90 void CompareSignaturePairsForTotalOverlap();
91 void RecutLongerSigToMatchTheShorter(CSignatureAlignment
* pSigAlignment
);
95 void FindAllomorphy();
96 void PutAffixesOfRegularSignaturesIntoNewSuffixes(
97 CSuffixCollection
* NewSuffixes
,
98 int MinimumNumberOfStems
);
99 void PutAffixesOfRegularSignaturesIntoNewPrefixes(
100 CPrefixCollection
* NewPrefixes
);
104 void BorrowedSigsDisplay(class Q3ListView
* widget
,
105 QMap
<QString
, QString
>* filter
);
106 void ListDisplay(class Q3ListView
* widget
,
107 QMap
<QString
, QString
>* filter
= 0);
109 // serialization and deserialization (file I/O).
111 void OutputXfst (QString
);
112 void OutputSignatures(QString filename
);
113 void ReadSignatureFile(QString filename
,
114 enum eAffixLocation prefix_or_suffix
);
115 void ReadSignatureFileBis(QString filename
);
117 /// scan all sigs, and if the stems of pSig do not begin
118 /// with DiffLetter, then Diff
119 void TryToRemove(const QString Suffix
, const QString DiffLetter
);
121 void CalculateFrequencies();
122 /// For each signature, determines if a signature
123 /// with more robustness is contained in it; if so, takes the
124 /// larger robustness as its own. This measures our certain of
125 /// the STEMS as such for the signature.
126 void CheckRobustness();
127 void FindDisplayOrdering();
129 /// sum for each signature the number of stems * number of affixes
130 int GetTotalNumberOfWords();
131 int TheseTwoSuffixesShareHowManyStems(
132 class CSuffix
* suf1
, class CSuffix
* suf2
);
133 double ComputeDLofInternalPointersOfEachMember(enum eMDL_STYLE style
);
134 double ComputeLengthOfPointersToEachOfMyMembers(enum eMDL_STYLE style
);
135 void GetIndividualCountsForEachStem();
138 #endif // SIGNATURECOLLECTION_H