CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / StateEmitHMM.h
bloba00dbb906ab97128b503501529db68a47d8041e2
1 // Hidden Markov model
2 // Copyright © 2009 The University of Chicago
3 #ifndef STATEEMITHMM_H
4 #define STATEEMITHMM_H
6 #include <QString>
7 #include <QMap>
8 #include "generaldefinitions.h"
9 #include "Typedefs.h"
11 class LinguisticaMainWindow;
12 class CParse;
13 class hmmForSortingItem;
14 class CPhoneCollection;
15 class CWordCollection;
16 class VideoFrame;
18 template<class V> class Q3SortedList;
20 typedef Q3SortedList<hmmForSortingItem> hmmSortedList;
21 typedef QMap<int, hmmSortedList*> IntTohmmSortedList;
22 typedef QMap<int, CParse*> IntToParse;
23 typedef QMap<int, void*> IntToVoid;
24 typedef QMap<int, int*> IntToIntArray;
25 typedef QMap<int, int> IntToInt;
26 typedef QMap<QString, int> QStringToInt;
28 class Video;
30 class StateEmitHMM
33 public:
34 class LinguisticaMainWindow* m_parent;
35 class CLexicon* m_Lexicon;
37 // Construction Parameters
38 int m_countOfStates;
39 int m_countOfSymbols;
40 int m_lengthOfObservation;
41 int m_maxLengthOfObservation;
42 int m_NumberOfIterations;
43 int m_countOfDataItems;
44 QString m_HMMLog;
45 QString m_HMMLogDirectory;
46 eHmmDataType m_dataType;
48 Video* m_Video;
51 // Vocabulary List
52 IntToString m_symbolList;
53 StringToInt m_symbolIndex;
56 // Train Data
57 IntToParse m_trainingData;
58 IntToIntArray m_encodedTrainingData;
59 IntToVoid m_trainingDataSource;
60 QStringToInt m_trainingDataIndices;
61 IntToInt m_trainingDataFrequency;
62 IntToInt m_trainingDataSizes;
63 int* m_trainingDatum;
64 void* m_trainingDatumSource; // a pointer to the Object that provided the data
66 // prob parameters
67 double* m_PI;
68 double** m_A;
69 double** m_B;
70 double** m_Alpha;
71 double** m_Beta;
72 double*** m_P;
74 // soft counts
75 double* m_PI_SoftCounts; // state i softcounts
76 double** m_A_SoftCounts; // state i --> state j softcounts
77 double** m_B_SoftCounts; // state i --> symbol j softcounts
78 double* m_Entropy; // Entropy of each state's emissions
80 // probability of data
81 double* m_WordProbabilities; // could be total Forward probl, or Viterbi; one for each data item
83 // For next use, output
84 IntTohmmSortedList m_symbolStateList;
86 // Results
87 double m_LogProbabilityOfData;
89 public:
90 StateEmitHMM(LinguisticaMainWindow*);
91 StateEmitHMM(CLexicon*);
93 virtual ~StateEmitHMM();
95 // init function
96 bool preprocessData(eHmmDataType, void*); // type =1 : take PhoneTier1; type =2 take PhoneTier2, type=3 take CParse
97 void init(int, int loops = 1000 );
98 void initPiAndAB();
100 // forward procedure
101 void forward(int, double&);
103 // backward procedure
104 void backward(int, double&);
106 // Expectation procedure
107 void Expectation(int);
109 // Total Maximization procedure
110 void Maximization();
112 // Train procedure
113 double trainParameters();
115 // clear function
116 void clear();
118 // compute a symbol's state list
119 void getStateListForASymbol();
121 // Log function
122 void logInfo(double totalLogProbability);
123 void OutputTransitions(int IterationNumber, QString FileName);
124 void OutputTransitionsToLogFile(int IterationNumber, QString FileName);
125 void OutputEmissions(int IterationNumber, QString LogFileName);
126 void OutputInitials(int IterationNumber, QString LogFileName );
128 // Totals
129 double GetLogProbability() { return m_LogProbabilityOfData; }
131 // Output to VideoFrame
132 void InsertValues ( VideoFrame* );
133 void Display( );
137 class hmmForSortingItem
139 public:
140 int m_stateNumber;
141 double m_probRatio;
143 public:
144 hmmForSortingItem(){};
145 ~hmmForSortingItem(){};
147 hmmForSortingItem(int stateNumber, double probRatio)
149 m_stateNumber = stateNumber;
150 m_probRatio = probRatio;
155 bool operator<(const hmmForSortingItem& another)
157 return (m_probRatio > another.m_probRatio);
161 bool operator==(const hmmForSortingItem& another)
163 return (m_probRatio == another.m_probRatio);
169 #endif // STATEEMITHMM_H