1 // Finite-state automaton representation of morphology
2 // Copyright © 2009 The University of Chicago
9 #include <QStandardItemModel>
10 #include "MiniLexicon.h"
22 typedef std::list
<FSAMorpheme
*> FSAMorphemeList
;
23 typedef std::list
<FSAstate
*> FSAStateList
;
24 typedef std::list
<FSAedge
*> FSAedgeList
;
25 typedef std::list
<FSAedgeList
*> FSApathList
;
32 FSAMorpheme(QString qs
,int cc
):str(qs
),m_corpusCount(cc
){};
34 double GetDL(int characterCount
) const;
35 const QString
& toStr() const {return str
;};
37 bool operator==(const FSAMorpheme
& other
) const;
41 friend class FSAstate
;
44 extern QString
join(const FSAMorphemeList
& v
, const QString
& delim
);
47 FSAStateList
* m_States
;
49 FSAStateList
* m_StartStates
;
50 std::list
<FSAedgeList
*> m_FSAPathList
;
53 CMiniLexicon
* m_lexicon
;
55 // the same morpheme may occur at multiple edges
56 QMap
<QString
, FSAMorpheme
*> m_morphemes
;
57 //QMultiMap<QString, FSAedge*> m_morphemeEdges;
58 int m_nextStartStateId
;
61 // construction/destruction.
63 explicit FSA(CMiniLexicon
* pMiniLex
);
70 FSA
& operator=(const FSA
& x
);
71 void AddEdge(FSAedge
* edge
);
76 void AddSignature(CSignature
* pSig
);
77 //void AddPrefixSignature(CSignature* pSig);
78 void AddState(FSAstate
* state
);
82 void RemoveEdge(FSAedge
* edge
);
84 /// description length
87 void AddPrefixes(CMiniLexicon
* pMiniLex
);
90 void FSAListDisplay(Q3ListView
* widget
,
91 QMap
<QString
, QString
>* filter
,
96 /// list paths to leaves in m_FSAPathList
101 FSAStateList
* GetStates() { return m_States
;}
103 // linguistic analysis.
105 /// sample function that does some manipulation of FSA
108 static int GetRobustness(FSAedgeList
* pPath
);
110 void OutputFSAXfst ( const QString
& filename
);
112 //returns pointer to new edge
113 FSAedge
* DoParallelSplit (FSAedge
* pEdge
, FSAMorphemeList
& morphsToMove
);
114 //returns pointer to "first" new edge, ie, edge from start state
115 FSAedge
* DoSeriesSplit(FSAedge
* pEdge
, unsigned int len
, bool suffix
=true);
117 //first,last are iterators to list of lists,
118 // union of lists must be equal to set of morphemes on pEdge
119 void DoMultParallelSplit( FSAedge
* pEdge
,
120 std::list
<FSAMorphemeList
>::iterator fisrt
,
121 std::list
<FSAMorphemeList
>::iterator last
);
123 friend class FSAedge
;
124 friend class FSAstate
;
128 FSAMorphemeList m_Morphemes
;
129 FSAstate
* m_FromState
;
133 // construction/destruction.
135 explicit FSAedge(FSA
* pFSA
, FSAstate
* start
= 0, FSAstate
* end
= 0);
136 // FSAedge(FSA* pFSA, FSAstate* start, FSAstate* end, class CParse* pLabels);
137 // suppress implied default constructor
141 // destructor implicitly defined.
145 FSAedge(const FSAedge
& x
);
146 FSAedge
& operator=(const FSAedge
& x
);
149 void AddMorpheme(const QString
& morpheme_text
, int count
);
150 void RemoveMorpheme(FSAMorphemeList::iterator iter
);
151 FSAMorphemeList
* GetMorphemes() { return &m_Morphemes
; }
153 // source and target states.
155 FSAstate
* GetFromState() const { return m_FromState
; }
156 FSAstate
* GetToState() const { return m_ToState
; }
161 FSAedgeList m_EdgesOut
;
162 FSAedgeList m_EdgesIn
;
164 QString m_stateLabel
;
166 // XXX. only used for breadth-first search
167 std::list
<FSAedgeList
*> m_PathList
; //list of paths to this node
168 unsigned int m_DiscoverCount
;
172 std::list
<FSAedgeList
*>* GetPathList(){ return &m_PathList
; }
173 void addPath(FSAedgeList
* pPath
) { m_PathList
.push_back(pPath
);}
174 void setMaxDepth(int d
){this->m_MaxDepth
=d
;}
175 int getMaxDepth(){return this->m_MaxDepth
;}
179 FSAedgeList
* GetEdgesOut() { return &m_EdgesOut
; }
180 void AddEdgeOut(FSAedge
* pEdge
);
182 FSAedgeList
* GetEdgesIn() { return &m_EdgesIn
; }
183 void AddEdgeIn(FSAedge
* pEdge
);
186 void OutputXfst(QTextStream
& outf
);
187 void SearchEdgeXfst (QTextStream
& outf
, std::set
<FSAstate
*>& discovered
);
188 QString
getStateName(){ return QString("S%1").arg(this->m_stateId
) ;}
191 friend class FSAListViewItem
;
196 class CSignature
* m_Sig1
; // the standard of comparison
197 class CSignature
* m_Sig2
; // the signature being reanalyzed
199 int m_Length
; // number of affixes in longer signature
203 /// Qt3-style row object for a table displaying the FSA
204 /// Each row corresponds to a complete edge path
205 class FSAListViewItem
: public Q3ListViewItem
{
207 /// points to path to some final state
208 FSAedgeList
* m_pPath
;
210 // construction/destruction.
212 FSAListViewItem(Q3ListView
* pView
, FSAedgeList
* path
)
213 : Q3ListViewItem(pView
), m_pImage(), m_pPath(path
) { }
214 FSAListViewItem(FSAListViewItem
* pParent
, FSAedgeList
* path
)
215 : Q3ListViewItem(pParent
), m_pImage(), m_pPath(path
) { }
216 // disable default-construction.
224 FSAListViewItem(const FSAListViewItem
& x
)
226 // drop cached pixmap to avoid deleting it twice
228 m_pPath(x
.m_pPath
) { }
229 FSAListViewItem
& operator=(const FSAListViewItem
& x
)
231 Q3ListViewItem::operator=(x
);
239 /// graphical display
240 /// repeated requests are fast since created image is cached
241 QPixmap
* GetQPixmap();
242 /// write information about path to “command line” pane
243 void DisplayEdgePath(Q3TextEdit
* m_commandLine
);
246 FSAstate
* GetLVStartState()
247 { return m_pPath
->front()->GetFromState(); }
249 /// helper for graphical display
256 inline void FSAListViewItem::build_pixmap()
258 // not using graphviz, empty image
259 m_pImage
= new QPixmap
;