1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
20 #include <boost/noncopyable.hpp>
29 explicit stack_printer(const char* msg
) :
32 fprintf(stdout
, "%s: --begin\n", msMsg
.c_str());
33 mfStartTime
= getTime();
38 double fEndTime
= getTime();
39 fprintf(stdout
, "%s: --end (duration: %g sec)\n", msMsg
.c_str(), (fEndTime
-mfStartTime
));
42 void printTime(int line
) const
44 double fEndTime
= getTime();
45 fprintf(stdout
, "%s: --(%d) (duration: %g sec)\n", msMsg
.c_str(), line
, (fEndTime
-mfStartTime
));
49 double getTime() const
52 gettimeofday(&tv
, NULL
);
53 return tv
.tv_sec
+ tv
.tv_usec
/ 1000000.0;
60 typedef std::vector
<int> values_type
;
61 typedef std::vector
<size_t> indices_type
;
64 size_t val_count
= 6000000;
65 double multiplier
= 300000.0;
66 bool dump_values
= false;
68 size_t val_count
= 20;
69 double multiplier
= 10.0;
70 bool dump_values
= true;
73 struct field
: boost::noncopyable
75 values_type items
; /// unique values
76 indices_type data
; /// original value series as indices into unique values.
77 indices_type order
; /// ascending order of the values as indices.
80 long compare(int left
, int right
)
89 bool has_item(const values_type
& items
, const indices_type
& order
, int val
, long& index
)
94 long high
= items
.size() - 1;
98 long this_index
= (low
+ high
) / 2;
99 comp_res
= compare(items
[order
[this_index
]], val
);
101 low
= this_index
+ 1;
104 high
= this_index
- 1;
116 bool check_items(const values_type
& items
)
121 // Items are supposed to be all unique values.
122 values_type
copied(items
);
123 sort(copied
.begin(), copied
.end());
124 copied
.erase(unique(copied
.begin(), copied
.end()), copied
.end());
125 return copied
.size() == items
.size();
128 bool check_order(const values_type
& items
, const indices_type
& order
)
130 // Ensure that the order is truly in ascending order.
131 if (items
.size() != order
.size())
137 indices_type::const_iterator it
= order
.begin();
138 values_type::value_type prev
= items
[*it
];
139 for (++it
; it
!= order
.end(); ++it
)
141 values_type::value_type val
= items
[*it
];
151 bool check_data(const values_type
& items
, const indices_type
& data
, const values_type
& original
)
153 if (items
.empty() || data
.empty() || original
.empty())
156 if (data
.size() != original
.size())
159 size_t n
= data
.size();
160 for (size_t i
= 0; i
< n
; ++i
)
162 if (items
[data
[i
]] != original
[i
])
168 bool dump_and_check(const field
& fld
, const values_type
& original
, bool dump_values
)
170 cout
<< "unique item count: " << fld
.items
.size() << endl
;
171 cout
<< "original data count: " << fld
.data
.size() << endl
;
175 cout
<< "--- items" << endl
;
176 copy(fld
.items
.begin(), fld
.items
.end(), ostream_iterator
<int>(cout
, "\n"));
177 cout
<< "--- sorted items" << endl
;
179 indices_type::const_iterator it
= fld
.order
.begin(), it_end
= fld
.order
.end();
180 for (; it
!= it_end
; ++it
)
182 cout
<< fld
.items
[*it
] << endl
;
187 if (!check_items(fld
.items
))
189 cout
<< "item check failed" << endl
;
193 if (!check_order(fld
.items
, fld
.order
))
195 cout
<< "order check failed" << endl
;
199 if (!check_data(fld
.items
, fld
.data
, original
))
201 cout
<< "data check failed" << endl
;
208 void run1(const values_type
& vals
, bool dump_values
)
212 stack_printer
__stack_printer__("::run1 (existing algorithm)");
213 values_type::const_iterator it
= vals
.begin(), it_end
= vals
.end();
214 for (; it
!= it_end
; ++it
)
217 if (!has_item(fld
.items
, fld
.order
, *it
, index
))
219 // This item doesn't exist in the dimension array yet.
220 fld
.items
.push_back(*it
);
222 fld
.order
.begin()+index
, fld
.items
.size()-1);
223 fld
.data
.push_back(fld
.items
.size()-1);
226 fld
.data
.push_back(fld
.order
[index
]);
230 bool res
= dump_and_check(fld
, vals
, dump_values
);
231 cout
<< "check: " << (res
? "success" : "failure") << endl
;
240 bucket(int _value
, size_t _order_index
, size_t _data_index
) :
241 value(_value
), order_index(_order_index
), data_index(_data_index
) {}
243 bucket(const bucket
& r
) :
244 value(r
.value
), order_index(r
.order_index
), data_index(r
.data_index
) {}
247 void print_buckets(const vector
<bucket
>& buckets
, const char* msg
)
249 cout
<< "--- buckets content (" << msg
<< ")" << endl
;
250 vector
<bucket
>::const_iterator it
= buckets
.begin(), it_end
= buckets
.end();
251 for (; it
!= it_end
; ++it
)
253 cout
<< "value: " << it
->value
<< " order index: " << it
->order_index
254 << " data index: " << it
->data_index
<< endl
;
256 cout
<< "---" << endl
;
259 struct less_by_value
: std::binary_function
<bucket
, bucket
, bool>
261 bool operator() (const bucket
& left
, const bucket
& right
) const
263 return left
.value
< right
.value
;
267 struct less_by_data_index
: std::binary_function
<bucket
, bucket
, bool>
269 bool operator() (const bucket
& left
, const bucket
& right
) const
271 return left
.data_index
< right
.data_index
;
275 struct equal_by_value
: std::binary_function
<bucket
, bucket
, bool>
277 bool operator() (const bucket
& left
, const bucket
& right
) const
279 return left
.value
== right
.value
;
283 class push_back_value
: std::unary_function
<bucket
, void>
287 push_back_value(values_type
& _items
) : items(_items
) {}
288 void operator() (const bucket
& v
)
290 items
.push_back(v
.value
);
294 class push_back_order_index
: std::unary_function
<bucket
, void>
296 indices_type
& data_indices
;
298 push_back_order_index(indices_type
& _items
) : data_indices(_items
) {}
299 void operator() (const bucket
& v
)
301 data_indices
.push_back(v
.order_index
);
305 void run2(const values_type
& vals
, bool dump_values
)
309 stack_printer
__stack_printer__("::run2 (alternative algorithm)");
310 vector
<bucket
> buckets
;
311 buckets
.reserve(vals
.size());
313 // Push back all original values.
314 values_type::const_iterator it
= vals
.begin(), it_end
= vals
.end();
315 for (size_t i
= 0; it
!= it_end
; ++it
, ++i
)
316 buckets
.push_back(bucket(*it
, 0, i
));
321 cout
<< "error: empty buckets" << endl
;
325 // print_buckets(buckets, "original");
327 // Sort by the value.
328 sort(buckets
.begin(), buckets
.end(), less_by_value());
330 // print_buckets(buckets, "sorted");
333 // Set order index such that unique values have identical index value.
334 size_t cur_index
= 0;
335 vector
<bucket
>::iterator it
= buckets
.begin(), it_end
= buckets
.end();
336 int prev
= it
->value
;
337 it
->order_index
= cur_index
;
338 for (++it
; it
!= it_end
; ++it
)
340 if (prev
!= it
->value
)
343 it
->order_index
= cur_index
;
348 // print_buckets(buckets, "sorted and indexed");
350 // Re-sort the bucket this time by the data index.
351 sort(buckets
.begin(), buckets
.end(), less_by_data_index());
352 // print_buckets(buckets, "re-sort by data index");
354 // Copy the order index series into the field object.
355 fld
.data
.reserve(buckets
.size());
356 for_each(buckets
.begin(), buckets
.end(), push_back_order_index(fld
.data
));
358 // Sort by the value again.
359 sort(buckets
.begin(), buckets
.end(), less_by_value());
362 vector
<bucket
>::iterator it_unique_end
=
363 unique(buckets
.begin(), buckets
.end(), equal_by_value());
365 // print_buckets(buckets, "uniqued");
367 // Copy the unique values into items.
368 vector
<bucket
>::iterator it_beg
= buckets
.begin();
369 size_t len
= distance(it_beg
, it_unique_end
);
370 fld
.items
.reserve(len
);
371 for_each(it_beg
, it_unique_end
, push_back_value(fld
.items
));
373 // The items are actually already sorted. So, just insert a sequence
374 // of integers from 0 and up.
375 fld
.order
.reserve(len
);
376 for (size_t i
= 0; i
< len
; ++i
)
377 fld
.order
.push_back(i
);
380 bool res
= dump_and_check(fld
, vals
, dump_values
);
381 cout
<< "check: " << (res
? "success" : "failure") << endl
;
389 vals
.reserve(val_count
);
392 cout
<< "--- original" << endl
;
394 for (size_t i
= 0; i
< val_count
; ++i
)
399 values_type::value_type v2
= v
;
403 cout
<< i
<< ": " << v2
<< endl
;
407 cout
<< "---" << endl
;
409 run1(vals
, dump_values
);
410 run2(vals
, dump_values
);
415 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */