1 // Implementation of CSparseVector methods
2 // Copyright © 2009 The University of Chicago
3 #include "SparseVector.h"
11 int CompareBiData (const void* , const void* );
13 CSparseVector::CSparseVector()
17 m_Values
= new float [m_NumberOfSlots
];
18 m_Dimension
= new int [m_NumberOfSlots
];
20 for (int i
= 0; i
< m_NumberOfSlots
; i
++)
29 CSparseVector::CSparseVector ( int n
)
31 int MinimumNumberOfSlots
= 10;
32 if (n
> MinimumNumberOfSlots
)
37 m_NumberOfSlots
= MinimumNumberOfSlots
;
41 m_Values
= new float [m_NumberOfSlots
];
42 m_Dimension
= new int [m_NumberOfSlots
];
44 for (int i
= 0; i
< m_NumberOfSlots
; i
++)
52 CSparseVector:: CSparseVector (CSparseVector
& rhs
)
55 m_NumberOfSlots
= rhs
.GetNumberOfSlots();
56 m_Length
= rhs
.GetLength();
59 while ( GetNextDimension ( m_Dimension
[pos
], m_Values
[pos
], pos
) )
61 // blank intentionally
64 m_NormalizedFlag
= rhs
.GetNormalizedFlag();
67 CSparseVector:: ~CSparseVector()
69 if (m_Values
) {delete [] m_Values
; }
70 if (m_Dimension
) {delete [] m_Dimension
;}
73 void CSparseVector::Clear()
77 for (int i = 0; i < m_NumberOfSlots; i++)
86 void CSparseVector::SetLength (int n)
94 m_Values = new float [n];
98 int CSparseVector:: GetLength() {return m_Length
;}
101 float CSparseVector::operator[] (int n) // uses internal index, not external --
103 if (n < 0 || n >= m_Length ) {return -1;}
104 else { return m_Values[n]; };
108 int CSparseVector::GetNumberOfSlots() {return m_NumberOfSlots
;}
110 int CSparseVector::GetNextDimension (int& DimOut
, float& Value
, int& Position
)
112 if ( Position
< 0 ) { return 0; }
113 if ( Position
>= m_Length
) { return 0; }
114 DimOut
= m_Dimension
[ Position
];
115 Value
= m_Values
[ Position
];
121 float CSparseVector::GetAt (int dim) // dim is the value to the outside world
123 int n = ContainsDim(dim);
125 //if (n == -1 ) { return -1; }
127 if ( n== -1) { return 0; }
128 else { return m_Values[n]; };
131 void CSparseVector::operator= (CSparseVector& rhs)
134 m_Length = rhs.GetLength();
136 if ( rhs.GetLength() > m_NumberOfSlots )
138 if (m_Dimension) delete m_Dimension;
139 if (m_Values) delete m_Values;
140 m_NumberOfSlots = rhs.GetLength();
141 m_Dimension = new int [ m_NumberOfSlots ];
142 m_Values = new float [ m_NumberOfSlots ];
144 for (int i = 0; i<m_Length; i++)
146 m_Dimension[i] = rhs.m_Dimension[i];
147 m_Values[i] = rhs.m_Values[i];
152 int CSparseVector::operator== (CSparseVector& L2)
154 if (m_Length != L2.GetLength() ) {return 0;}
155 for (int i = 0; i < m_Length; i++)
157 if (m_Dimension[i] != L2.m_Dimension[i]) { return 0; }
158 if (m_Values[i] != L2[i] ) { return 0; }
163 int CSparseVector::Delete (int Dim)
165 int InternalDim = -1;
170 for (int i = 0; i < m_Length; i++)
172 if ( m_Dimension[i] == Dim)
182 for ( i = InternalDim; i< m_Length -1; i++)
184 m_Dimension[i] = m_Dimension[i+1];
185 m_Values[i] = m_Values[i+1];
191 int CSparseVector::GetTopDimension()
193 return m_Dimension[m_Length -1];
196 void CSparseVector::SetAt (int dimen, float value) // insert New at the
205 // Find out if the dimension is already represented in this vector...
207 //--------------------------------------------------
208 index = ContainsDim (dimen);
209 if (index >= 0 ) // if it is already represented, fine...
211 m_Values[index] = value;
215 //--------------------------------------------------
216 // Otherwise, we must add a slot to represent it.
220 if (m_Length <= m_NumberOfSlots)
222 for (int i = 0; i < m_Length-1 && dimen > m_Dimension[i]; i++ )
224 // nothing here on purpose
227 for (i = m_Length - 1; i > RightSpot; i--)
229 m_Dimension[i] = m_Dimension[i-1];
230 m_Values[i] = m_Values [i-1];
232 m_Values [RightSpot] = value;
233 m_Dimension [RightSpot] = dimen;
239 m_NumberOfSlots *= 2;
240 float * Newm_Values = new float[ m_NumberOfSlots ];
241 int * Newm_Dimension = new int [ m_NumberOfSlots ];
243 for (int i = 0; i< m_Length - 1 && dimen > m_Dimension[i] ; i++)
245 Newm_Dimension[i] = m_Dimension[i];
246 Newm_Values[i] = m_Values[i];
249 Newm_Values[i] = value;
250 Newm_Dimension[i] = dimen;
252 for (i++; i< m_Length; i++)
254 Newm_Dimension[i] = m_Dimension[i-1];
255 Newm_Values[i] = m_Values [i-1];
260 delete[] m_Dimension;
263 m_Values = Newm_Values;
264 m_Dimension = Newm_Dimension;
267 m_NormalizedFlag = 0;
270 void CSparseVector::operator() (int n, float val)
276 void CSparseVector::IncrementAt(int n, float val )
278 int dim = ContainsDim (n);
281 m_Values[dim] += val;
283 } else // we assume that a value that was not present is taken to be zero.
291 int Overlap (CSparseVector& List1, CSparseVector& List2)
299 while ( i < List1.GetLength() && j < List2.GetLength() )
321 void CSparseVector::ClearOut()
323 for (int i = 0; i < m_NumberOfSlots; i++)
329 m_NormalizedFlag = 0;
333 int CSparseVector::ContainsDim (int n)
334 { // take advantage of log Size: do this smarter. Do
335 // this in log n time, not n/2!
337 if (m_Length == 0) {return -1;}
339 for (int i = 0; i < m_Length; i++)
341 if ( m_Dimension[i] == n)
352 ostream& CSparseVector::OutputToStream (CWordCollection& rWords, ostream& stream,int threshold)
355 BiData* SortArray = new BiData[ m_Length ];
357 for (int i = 0; i<m_Length; i++) {
359 SortArray[i].Integer = m_Dimension[i];
360 SortArray[i].m_Values = m_Values[i];
363 qsort ( (void*) SortArray, m_Length, sizeof (BiData ), CompareBiData );
365 for ( i = 0; i<m_Length ; i++)
367 if ( SortArray[i].m_Values < threshold ) {continue;}
368 if (SortArray[i].Integer > (int) rWords.GetCount() ||
369 SortArray[i].Integer < 0 )
372 << setw(6) << SortArray[i].Integer
373 << setw(16) << rWords[ SortArray[i].Integer ]-> GetKey()
374 << setw (7) << SortArray[i].m_Values
385 int CompareBiData (const void* BiData1, const void* BiData2)
388 float S1 = (float) int( ((BiData*) BiData1)->m_Values );
389 float S2 = (float) int( ((BiData*) BiData2)->m_Values );
390 if (S2 > S1) {return 1;}
391 if (S2 < S1) {return -1;}
396 void CSparseVector::MakeLogFreq (CWordCollection* Words)
399 for (int i = 0; i < m_Length; i++ )
401 WordNumber = m_Dimension[i];
402 Q_ASSERT ( Words->GetAt (WordNumber) ->GetCorpusCount() > 0 );
404 ( log ( m_Values[i] ) -
405 log ( Words->GetAt (WordNumber)->GetCorpusCount() ) );
407 m_NormalizedFlag = 0;
410 void CSparseVector::Normalize()
412 for (int i = 0; i < m_Length; i++)
414 Total += m_Values[i];
416 for (i=0; i < m_Length; i++)
418 m_Values[i] /= Total;
420 m_NormalizedFlag = 1;
423 int CSparseVector::GetNormalizedFlag() {return m_NormalizedFlag
;}
425 float CSparseVector::InnerProduct ( CSparseVector* Vector2)
431 for (int i = 0; i < m_Length; i++)
433 ThisDim = m_Dimension[i];
434 ThatValue = Vector2->GetAt(ThisDim);
437 Total += m_Values[i] * ThatValue;