1 // Mini-lexicon’s collection of stems
2 // Copyright © 2009 The University of Chicago
3 #ifndef STEMCOLLECTION_H
4 #define STEMCOLLECTION_H
6 // See the CMiniLexicon class in MiniLexicon.h for an overview of
7 // suffix/signature-based discovery of morphology.
11 #include "CollectionTemplate.h"
12 #include "AffixLocation.h"
13 #include "generaldefinitions.h"
14 template<class K
, class V
> class QMap
;
16 /// List of stems for a stem/signature model of morphology
18 /// Maintains a list of stems used in analyzing a particular collection
19 /// of words (that is, all stems associated to a particular mini-lexicon).
20 /// Semantically, a stem collection is a set (like std::set) of pointers
21 /// to CStem objects which it owns. The contained stems are generally
22 /// deleted upon removal.
24 /// When a stem is inserted, it is added to the associated lexicon, and
25 /// when a stem is removed, it is removed from the associated lexicon.
27 /// This collection type also maintains a total use count for the stems it
28 /// manages, for use in description length calculations.
30 /// To support the successor-frequency algorithm, a stem collection
31 /// should support incremental lookup of stems (find all stems starting
32 /// with a given phoneme sequence, add a phoneme to that sequence to
33 /// narrow the search, etc).
34 class CStemCollection
: public TCollection
<class CStem
> {
36 enum eAffixLocation m_AffixLocation
;
37 double m_TotalUseCount
;
39 // construction/destruction.
42 CStemCollection(class CMiniLexicon
* mini
);
44 // copy construction, copy-assignment implicitly defined.
46 // Qt3-style collection view.
48 void ListDisplay(class Q3ListView
* parent
,
49 QMap
<class QString
, class QString
>* filter
= 0);
51 // input/output to file.
53 void OutputStems(class QString filename
,
54 QMap
<class QString
, class QString
>* filter
);
55 void ReadStemFile(class QString filename
,
56 enum eAffixLocation affix_loc
);
60 CStem
* operator<<(const CStem
* stem
);
61 CStem
* operator<<(class CStringSurrogate stem_text
);
62 CStem
* operator<<(const class CParse
* stem_text
);
63 CStem
* operator<<(class QString stem_text
);
64 void AddPointer(CStem
* pointee
);
65 CStem
* AddToCollection(const class CParse
& stem_text
);
66 CStem
* AddToCollection(const class CStringSurrogate
& stem_text
);
75 bool Remove(CStem
* stem
); // doesn't delete CStem*
76 bool RemoveMember(CStem
* stem
); // deletes CStem*
77 bool RemoveMember(const class CStringSurrogate
& stem_text
); // deletes CStem*
78 bool RemoveMember(const class CStringSurrogate
& stem_text
,
79 bool delete_stem
); // FSA
80 void DeleteMarkedMembers();
84 double GetTotalUseCount();
86 // description length.
88 double CalculateSumOfPointersToMyStems(enum eMDL_STYLE style
);
89 double CalculateTotalPhonologicalInformationContent(
90 class CLexicon
* MotherLexicon
);
93 #endif // STEMCOLLECTION_H