1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * Version: MPL 1.1 / GPLv3+ / LGPLv3+
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License or as specified alternatively below. You may obtain a copy of
8 * the License at http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * Major Contributor(s):
16 * Copyright (C) 2012 Kohei Yoshida <kohei.yoshida@suse.com>
18 * All Rights Reserved.
20 * For minor contributions see the git repository.
22 * Alternatively, the contents of this file may be used under the terms of
23 * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
24 * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
25 * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
26 * instead of those above.
39 #include <boost/noncopyable.hpp>
48 explicit stack_printer(const char* msg
) :
51 fprintf(stdout
, "%s: --begin\n", msMsg
.c_str());
52 mfStartTime
= getTime();
57 double fEndTime
= getTime();
58 fprintf(stdout
, "%s: --end (duration: %g sec)\n", msMsg
.c_str(), (fEndTime
-mfStartTime
));
61 void printTime(int line
) const
63 double fEndTime
= getTime();
64 fprintf(stdout
, "%s: --(%d) (duration: %g sec)\n", msMsg
.c_str(), line
, (fEndTime
-mfStartTime
));
68 double getTime() const
71 gettimeofday(&tv
, NULL
);
72 return tv
.tv_sec
+ tv
.tv_usec
/ 1000000.0;
79 typedef std::vector
<int> values_type
;
80 typedef std::vector
<size_t> indices_type
;
83 size_t val_count
= 6000000;
84 double multiplier
= 300000.0;
85 bool dump_values
= false;
87 size_t val_count
= 20;
88 double multiplier
= 10.0;
89 bool dump_values
= true;
92 struct field
: boost::noncopyable
94 values_type items
; /// unique values
95 indices_type data
; /// original value series as indices into unique values.
96 indices_type order
; /// ascending order of the values as indices.
99 long compare(int left
, int right
)
108 bool has_item(const values_type
& items
, const indices_type
& order
, int val
, long& index
)
110 index
= items
.size();
113 long high
= items
.size() - 1;
117 long this_index
= (low
+ high
) / 2;
118 comp_res
= compare(items
[order
[this_index
]], val
);
120 low
= this_index
+ 1;
123 high
= this_index
- 1;
135 bool check_items(const values_type
& items
)
140 // Items are supposed to be all unique values.
141 values_type
copied(items
);
142 sort(copied
.begin(), copied
.end());
143 copied
.erase(unique(copied
.begin(), copied
.end()), copied
.end());
144 return copied
.size() == items
.size();
147 bool check_order(const values_type
& items
, const indices_type
& order
)
149 // Ensure that the order is truly in ascending order.
150 if (items
.size() != order
.size())
156 indices_type::const_iterator it
= order
.begin();
157 values_type::value_type prev
= items
[*it
];
158 for (++it
; it
!= order
.end(); ++it
)
160 values_type::value_type val
= items
[*it
];
170 bool check_data(const values_type
& items
, const indices_type
& data
, const values_type
& original
)
172 if (items
.empty() || data
.empty() || original
.empty())
175 if (data
.size() != original
.size())
178 size_t n
= data
.size();
179 for (size_t i
= 0; i
< n
; ++i
)
181 if (items
[data
[i
]] != original
[i
])
187 bool dump_and_check(const field
& fld
, const values_type
& original
, bool dump_values
)
189 cout
<< "unique item count: " << fld
.items
.size() << endl
;
190 cout
<< "original data count: " << fld
.data
.size() << endl
;
194 cout
<< "--- items" << endl
;
195 copy(fld
.items
.begin(), fld
.items
.end(), ostream_iterator
<int>(cout
, "\n"));
196 cout
<< "--- sorted items" << endl
;
198 indices_type::const_iterator it
= fld
.order
.begin(), it_end
= fld
.order
.end();
199 for (; it
!= it_end
; ++it
)
201 cout
<< fld
.items
[*it
] << endl
;
206 if (!check_items(fld
.items
))
208 cout
<< "item check failed" << endl
;
212 if (!check_order(fld
.items
, fld
.order
))
214 cout
<< "order check failed" << endl
;
218 if (!check_data(fld
.items
, fld
.data
, original
))
220 cout
<< "data check failed" << endl
;
227 void run1(const values_type
& vals
, bool dump_values
)
231 stack_printer
__stack_printer__("::run1 (existing algorithm)");
232 values_type::const_iterator it
= vals
.begin(), it_end
= vals
.end();
233 for (; it
!= it_end
; ++it
)
236 if (!has_item(fld
.items
, fld
.order
, *it
, index
))
238 // This item doesn't exist in the dimension array yet.
239 fld
.items
.push_back(*it
);
241 fld
.order
.begin()+index
, fld
.items
.size()-1);
242 fld
.data
.push_back(fld
.items
.size()-1);
245 fld
.data
.push_back(fld
.order
[index
]);
249 bool res
= dump_and_check(fld
, vals
, dump_values
);
250 cout
<< "check: " << (res
? "success" : "failure") << endl
;
259 bucket(int _value
, size_t _order_index
, size_t _data_index
) :
260 value(_value
), order_index(_order_index
), data_index(_data_index
) {}
262 bucket(const bucket
& r
) :
263 value(r
.value
), order_index(r
.order_index
), data_index(r
.data_index
) {}
266 void print_buckets(const vector
<bucket
>& buckets
, const char* msg
)
268 cout
<< "--- buckets content (" << msg
<< ")" << endl
;
269 vector
<bucket
>::const_iterator it
= buckets
.begin(), it_end
= buckets
.end();
270 for (; it
!= it_end
; ++it
)
272 cout
<< "value: " << it
->value
<< " order index: " << it
->order_index
273 << " data index: " << it
->data_index
<< endl
;
275 cout
<< "---" << endl
;
278 struct less_by_value
: std::binary_function
<bucket
, bucket
, bool>
280 bool operator() (const bucket
& left
, const bucket
& right
) const
282 return left
.value
< right
.value
;
286 struct less_by_data_index
: std::binary_function
<bucket
, bucket
, bool>
288 bool operator() (const bucket
& left
, const bucket
& right
) const
290 return left
.data_index
< right
.data_index
;
294 struct equal_by_value
: std::binary_function
<bucket
, bucket
, bool>
296 bool operator() (const bucket
& left
, const bucket
& right
) const
298 return left
.value
== right
.value
;
302 class push_back_value
: std::unary_function
<bucket
, void>
306 push_back_value(values_type
& _items
) : items(_items
) {}
307 void operator() (const bucket
& v
)
309 items
.push_back(v
.value
);
313 class push_back_order_index
: std::unary_function
<bucket
, void>
315 indices_type
& data_indices
;
317 push_back_order_index(indices_type
& _items
) : data_indices(_items
) {}
318 void operator() (const bucket
& v
)
320 data_indices
.push_back(v
.order_index
);
324 void run2(const values_type
& vals
, bool dump_values
)
328 stack_printer
__stack_printer__("::run2 (alternative algorithm)");
329 vector
<bucket
> buckets
;
330 buckets
.reserve(vals
.size());
332 // Push back all original values.
333 values_type::const_iterator it
= vals
.begin(), it_end
= vals
.end();
334 for (size_t i
= 0; it
!= it_end
; ++it
, ++i
)
335 buckets
.push_back(bucket(*it
, 0, i
));
340 cout
<< "error: empty buckets" << endl
;
344 // print_buckets(buckets, "original");
346 // Sort by the value.
347 sort(buckets
.begin(), buckets
.end(), less_by_value());
349 // print_buckets(buckets, "sorted");
352 // Set order index such that unique values have identical index value.
353 size_t cur_index
= 0;
354 vector
<bucket
>::iterator it
= buckets
.begin(), it_end
= buckets
.end();
355 int prev
= it
->value
;
356 it
->order_index
= cur_index
;
357 for (++it
; it
!= it_end
; ++it
)
359 if (prev
!= it
->value
)
362 it
->order_index
= cur_index
;
367 // print_buckets(buckets, "sorted and indexed");
369 // Re-sort the bucket this time by the data index.
370 sort(buckets
.begin(), buckets
.end(), less_by_data_index());
371 // print_buckets(buckets, "re-sort by data index");
373 // Copy the order index series into the field object.
374 fld
.data
.reserve(buckets
.size());
375 for_each(buckets
.begin(), buckets
.end(), push_back_order_index(fld
.data
));
377 // Sort by the value again.
378 sort(buckets
.begin(), buckets
.end(), less_by_value());
381 vector
<bucket
>::iterator it_unique_end
=
382 unique(buckets
.begin(), buckets
.end(), equal_by_value());
384 // print_buckets(buckets, "uniqued");
386 // Copy the unique values into items.
387 vector
<bucket
>::iterator it_beg
= buckets
.begin();
388 size_t len
= distance(it_beg
, it_unique_end
);
389 fld
.items
.reserve(len
);
390 for_each(it_beg
, it_unique_end
, push_back_value(fld
.items
));
392 // The items are actually already sorted. So, just insert a sequence
393 // of integers from 0 and up.
394 fld
.order
.reserve(len
);
395 for (size_t i
= 0; i
< len
; ++i
)
396 fld
.order
.push_back(i
);
399 bool res
= dump_and_check(fld
, vals
, dump_values
);
400 cout
<< "check: " << (res
? "success" : "failure") << endl
;
408 vals
.reserve(val_count
);
411 cout
<< "--- original" << endl
;
413 for (size_t i
= 0; i
< val_count
; ++i
)
418 values_type::value_type v2
= v
;
422 cout
<< i
<< ": " << v2
<< endl
;
426 cout
<< "---" << endl
;
428 run1(vals
, dump_values
);
429 run2(vals
, dump_values
);
434 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */