1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
20 #include <boost/noncopyable.hpp>
29 explicit stack_printer(const char* msg
) :
32 fprintf(stdout
, "%s: --begin\n", msMsg
.c_str());
33 mfStartTime
= getTime();
38 double fEndTime
= getTime();
39 fprintf(stdout
, "%s: --end (duration: %g sec)\n", msMsg
.c_str(), (fEndTime
-mfStartTime
));
42 void printTime(int line
) const
44 double fEndTime
= getTime();
45 fprintf(stdout
, "%s: --(%d) (duration: %g sec)\n", msMsg
.c_str(), line
, (fEndTime
-mfStartTime
));
49 double getTime() const
52 gettimeofday(&tv
, NULL
);
53 return tv
.tv_sec
+ tv
.tv_usec
/ 1000000.0;
60 typedef std::vector
<int> values_type
;
61 typedef std::vector
<size_t> indices_type
;
64 size_t val_count
= 6000000;
65 double multiplier
= 300000.0;
66 bool dump_values
= false;
68 size_t val_count
= 20;
69 double multiplier
= 10.0;
70 bool dump_values
= true;
73 struct field
: boost::noncopyable
75 values_type items
; /// unique values
76 indices_type data
; /// original value series as indices into unique values.
77 indices_type order
; /// ascending order of the values as indices.
80 long compare(int left
, int right
)
89 bool has_item(const values_type
& items
, const indices_type
& order
, int val
, long& index
)
94 long high
= items
.size() - 1;
97 long this_index
= (low
+ high
) / 2;
98 const long comp_res
= compare(items
[order
[this_index
]], val
);
100 low
= this_index
+ 1;
103 high
= this_index
- 1;
115 bool check_items(const values_type
& items
)
120 // Items are supposed to be all unique values.
121 values_type
copied(items
);
122 sort(copied
.begin(), copied
.end());
123 copied
.erase(unique(copied
.begin(), copied
.end()), copied
.end());
124 return copied
.size() == items
.size();
127 bool check_order(const values_type
& items
, const indices_type
& order
)
129 // Ensure that the order is truly in ascending order.
130 if (items
.size() != order
.size())
136 indices_type::const_iterator it
= order
.begin();
137 values_type::value_type prev
= items
[*it
];
138 for (++it
; it
!= order
.end(); ++it
)
140 values_type::value_type val
= items
[*it
];
150 bool check_data(const values_type
& items
, const indices_type
& data
, const values_type
& original
)
152 if (items
.empty() || data
.empty() || original
.empty())
155 if (data
.size() != original
.size())
158 size_t n
= data
.size();
159 for (size_t i
= 0; i
< n
; ++i
)
161 if (items
[data
[i
]] != original
[i
])
167 bool dump_and_check(const field
& fld
, const values_type
& original
, bool dump_values
)
169 cout
<< "unique item count: " << fld
.items
.size() << endl
;
170 cout
<< "original data count: " << fld
.data
.size() << endl
;
174 cout
<< "--- items" << endl
;
175 copy(fld
.items
.begin(), fld
.items
.end(), ostream_iterator
<int>(cout
, "\n"));
176 cout
<< "--- sorted items" << endl
;
178 indices_type::const_iterator it
= fld
.order
.begin(), it_end
= fld
.order
.end();
179 for (; it
!= it_end
; ++it
)
181 cout
<< fld
.items
[*it
] << endl
;
186 if (!check_items(fld
.items
))
188 cout
<< "item check failed" << endl
;
192 if (!check_order(fld
.items
, fld
.order
))
194 cout
<< "order check failed" << endl
;
198 if (!check_data(fld
.items
, fld
.data
, original
))
200 cout
<< "data check failed" << endl
;
207 void run1(const values_type
& vals
, bool dump_values
)
211 stack_printer
__stack_printer__("::run1 (existing algorithm)");
212 values_type::const_iterator it
= vals
.begin(), it_end
= vals
.end();
213 for (; it
!= it_end
; ++it
)
216 if (!has_item(fld
.items
, fld
.order
, *it
, index
))
218 // This item doesn't exist in the dimension array yet.
219 fld
.items
.push_back(*it
);
221 fld
.order
.begin()+index
, fld
.items
.size()-1);
222 fld
.data
.push_back(fld
.items
.size()-1);
225 fld
.data
.push_back(fld
.order
[index
]);
229 bool res
= dump_and_check(fld
, vals
, dump_values
);
230 cout
<< "check: " << (res
? "success" : "failure") << endl
;
239 bucket(int _value
, size_t _order_index
, size_t _data_index
) :
240 value(_value
), order_index(_order_index
), data_index(_data_index
) {}
242 bucket(const bucket
& r
) :
243 value(r
.value
), order_index(r
.order_index
), data_index(r
.data_index
) {}
246 void print_buckets(const vector
<bucket
>& buckets
, const char* msg
)
248 cout
<< "--- buckets content (" << msg
<< ")" << endl
;
249 vector
<bucket
>::const_iterator it
= buckets
.begin(), it_end
= buckets
.end();
250 for (; it
!= it_end
; ++it
)
252 cout
<< "value: " << it
->value
<< " order index: " << it
->order_index
253 << " data index: " << it
->data_index
<< endl
;
255 cout
<< "---" << endl
;
258 struct less_by_value
: std::binary_function
<bucket
, bucket
, bool>
260 bool operator() (const bucket
& left
, const bucket
& right
) const
262 return left
.value
< right
.value
;
266 struct less_by_data_index
: std::binary_function
<bucket
, bucket
, bool>
268 bool operator() (const bucket
& left
, const bucket
& right
) const
270 return left
.data_index
< right
.data_index
;
274 struct equal_by_value
: std::binary_function
<bucket
, bucket
, bool>
276 bool operator() (const bucket
& left
, const bucket
& right
) const
278 return left
.value
== right
.value
;
282 class push_back_value
: std::unary_function
<bucket
, void>
286 push_back_value(values_type
& _items
) : items(_items
) {}
287 void operator() (const bucket
& v
)
289 items
.push_back(v
.value
);
293 class push_back_order_index
: std::unary_function
<bucket
, void>
295 indices_type
& data_indices
;
297 push_back_order_index(indices_type
& _items
) : data_indices(_items
) {}
298 void operator() (const bucket
& v
)
300 data_indices
.push_back(v
.order_index
);
304 void run2(const values_type
& vals
, bool dump_values
)
308 stack_printer
__stack_printer__("::run2 (alternative algorithm)");
309 vector
<bucket
> buckets
;
310 buckets
.reserve(vals
.size());
312 // Push back all original values.
313 values_type::const_iterator it
= vals
.begin(), it_end
= vals
.end();
314 for (size_t i
= 0; it
!= it_end
; ++it
, ++i
)
315 buckets
.push_back(bucket(*it
, 0, i
));
320 cout
<< "error: empty buckets" << endl
;
324 // print_buckets(buckets, "original");
326 // Sort by the value.
327 sort(buckets
.begin(), buckets
.end(), less_by_value());
329 // print_buckets(buckets, "sorted");
332 // Set order index such that unique values have identical index value.
333 size_t cur_index
= 0;
334 vector
<bucket
>::iterator it
= buckets
.begin(), it_end
= buckets
.end();
335 int prev
= it
->value
;
336 it
->order_index
= cur_index
;
337 for (++it
; it
!= it_end
; ++it
)
339 if (prev
!= it
->value
)
342 it
->order_index
= cur_index
;
347 // print_buckets(buckets, "sorted and indexed");
349 // Re-sort the bucket this time by the data index.
350 sort(buckets
.begin(), buckets
.end(), less_by_data_index());
351 // print_buckets(buckets, "re-sort by data index");
353 // Copy the order index series into the field object.
354 fld
.data
.reserve(buckets
.size());
355 for_each(buckets
.begin(), buckets
.end(), push_back_order_index(fld
.data
));
357 // Sort by the value again.
358 sort(buckets
.begin(), buckets
.end(), less_by_value());
361 vector
<bucket
>::iterator it_unique_end
=
362 unique(buckets
.begin(), buckets
.end(), equal_by_value());
364 // print_buckets(buckets, "uniqued");
366 // Copy the unique values into items.
367 vector
<bucket
>::iterator it_beg
= buckets
.begin();
368 size_t len
= distance(it_beg
, it_unique_end
);
369 fld
.items
.reserve(len
);
370 for_each(it_beg
, it_unique_end
, push_back_value(fld
.items
));
372 // The items are actually already sorted. So, just insert a sequence
373 // of integers from 0 and up.
374 fld
.order
.reserve(len
);
375 for (size_t i
= 0; i
< len
; ++i
)
376 fld
.order
.push_back(i
);
379 bool res
= dump_and_check(fld
, vals
, dump_values
);
380 cout
<< "check: " << (res
? "success" : "failure") << endl
;
388 vals
.reserve(val_count
);
391 cout
<< "--- original" << endl
;
393 for (size_t i
= 0; i
< val_count
; ++i
)
398 values_type::value_type v2
= v
;
402 cout
<< i
<< ": " << v2
<< endl
;
406 cout
<< "---" << endl
;
408 run1(vals
, dump_values
);
409 run2(vals
, dump_values
);
414 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */