sc/workben/dpcache/perf-test.cpp

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  */
   9
  10 #include <cstdlib>
  11 #include <iostream>
  12 #include <stdio.h>
  13 #include <string>
  14 #include <sys/time.h>
  15 #include <vector>
  16 #include <iterator>
  17 #include <algorithm>
  18 #include <functional>
  19
  20 #include <boost/noncopyable.hpp>
  21
  22 using namespace std;
  23
  24 namespace {
  25
  26 class stack_printer
  27 {
  28 public:
  29     explicit stack_printer(const char* msg) :
  30         msMsg(msg)
  31     {
  32         fprintf(stdout, "%s: --begin\n", msMsg.c_str());
  33         mfStartTime = getTime();
  34     }
  35
  36     ~stack_printer()
  37     {
  38         double fEndTime = getTime();
  39         fprintf(stdout, "%s: --end (duration: %g sec)\n", msMsg.c_str(), (fEndTime-mfStartTime));
  40     }
  41
  42     void printTime(int line) const
  43     {
  44         double fEndTime = getTime();
  45         fprintf(stdout, "%s: --(%d) (duration: %g sec)\n", msMsg.c_str(), line, (fEndTime-mfStartTime));
  46     }
  47
  48 private:
  49     double getTime() const
  50     {
  51         timeval tv;
  52         gettimeofday(&tv, NULL);
  53         return tv.tv_sec + tv.tv_usec / 1000000.0;
  54     }
  55
  56     ::std::string msMsg;
  57     double mfStartTime;
  58 };
  59
  60 typedef std::vector<int> values_type;
  61 typedef std::vector<size_t> indices_type;
  62
  63 #if 1
  64 size_t val_count = 6000000;
  65 double multiplier = 300000.0;
  66 bool dump_values = false;
  67 #else
  68 size_t val_count = 20;
  69 double multiplier = 10.0;
  70 bool dump_values = true;
  71 #endif
  72
  73 struct field : boost::noncopyable
  74 {
  75     values_type items;   /// unique values
  76     indices_type data;   /// original value series as indices into unique values.
  77     indices_type order;  /// ascending order of the values as indices.
  78 };
  79
  80 long compare(int left, int right)
  81 {
  82     if (left == right)
  83         return 0;
  84     if (left < right)
  85         return -1;
  86     return 1;
  87 }
  88
  89 bool has_item(const values_type& items, const indices_type& order, int val, long& index)
  90 {
  91     index = items.size();
  92     bool found = false;
  93     long low = 0;
  94     long high = items.size() - 1;
  95     long comp_res;
  96     while (low <= high)
  97     {
  98         long this_index = (low + high) / 2;
  99         comp_res = compare(items[order[this_index]], val);
 100         if (comp_res < 0)
 101             low = this_index + 1;
 102         else
 103         {
 104             high = this_index - 1;
 105             if (comp_res == 0)
 106             {
 107                 found = true;
 108                 low = this_index;
 109             }
 110         }
 111     }
 112     index = low;
 113     return found;
 114 }
 115
 116 bool check_items(const values_type& items)
 117 {
 118     if (items.empty())
 119         return false;
 120
 121     // Items are supposed to be all unique values.
 122     values_type copied(items);
 123     sort(copied.begin(), copied.end());
 124     copied.erase(unique(copied.begin(), copied.end()), copied.end());
 125     return copied.size() == items.size();
 126 }
 127
 128 bool check_order(const values_type& items, const indices_type& order)
 129 {
 130     // Ensure that the order is truly in ascending order.
 131     if (items.size() != order.size())
 132         return false;
 133
 134     if (items.empty())
 135         return false;
 136
 137     indices_type::const_iterator it = order.begin();
 138     values_type::value_type prev = items[*it];
 139     for (++it; it != order.end(); ++it)
 140     {
 141         values_type::value_type val = items[*it];
 142         if (prev >= val)
 143             return false;
 144
 145         prev = val;
 146     }
 147
 148     return true;
 149 }
 150
 151 bool check_data(const values_type& items, const indices_type& data, const values_type& original)
 152 {
 153     if (items.empty() || data.empty() || original.empty())
 154         return false;
 155
 156     if (data.size() != original.size())
 157         return false;
 158
 159     size_t n = data.size();
 160     for (size_t i = 0; i < n; ++i)
 161     {
 162         if (items[data[i]] != original[i])
 163             return false;
 164     }
 165     return true;
 166 }
 167
 168 bool dump_and_check(const field& fld, const values_type& original, bool dump_values)
 169 {
 170     cout << "unique item count:   " << fld.items.size() << endl;
 171     cout << "original data count: " << fld.data.size() << endl;
 172
 173     if (dump_values)
 174     {
 175         cout << "--- items" << endl;
 176         copy(fld.items.begin(), fld.items.end(), ostream_iterator<int>(cout, "\n"));
 177         cout << "--- sorted items" << endl;
 178         {
 179             indices_type::const_iterator it = fld.order.begin(), it_end = fld.order.end();
 180             for (; it != it_end; ++it)
 181             {
 182                 cout << fld.items[*it] << endl;
 183             }
 184         }
 185     }
 186
 187     if (!check_items(fld.items))
 188     {
 189         cout << "item check failed" << endl;
 190         return false;
 191     }
 192
 193     if (!check_order(fld.items, fld.order))
 194     {
 195         cout << "order check failed" << endl;
 196         return false;
 197     }
 198
 199     if (!check_data(fld.items, fld.data, original))
 200     {
 201         cout << "data check failed" << endl;
 202         return false;
 203     }
 204
 205     return true;
 206 }
 207
 208 void run1(const values_type& vals, bool dump_values)
 209 {
 210     field fld;
 211     {
 212         stack_printer __stack_printer__("::run1 (existing algorithm)");
 213         values_type::const_iterator it = vals.begin(), it_end = vals.end();
 214         for (; it != it_end; ++it)
 215         {
 216             long index = 0;
 217             if (!has_item(fld.items, fld.order, *it, index))
 218             {
 219                 // This item doesn't exist in the dimension array yet.
 220                 fld.items.push_back(*it);
 221                 fld.order.insert(
 222                     fld.order.begin()+index, fld.items.size()-1);
 223                 fld.data.push_back(fld.items.size()-1);
 224             }
 225             else
 226                 fld.data.push_back(fld.order[index]);
 227         }
 228     }
 229
 230     bool res = dump_and_check(fld, vals, dump_values);
 231     cout << "check: " << (res ? "success" : "failure") << endl;
 232 }
 233
 234 struct bucket
 235 {
 236     int value;
 237     size_t order_index;
 238     size_t data_index;
 239
 240     bucket(int _value, size_t _order_index, size_t _data_index) :
 241         value(_value), order_index(_order_index), data_index(_data_index) {}
 242
 243     bucket(const bucket& r) :
 244         value(r.value), order_index(r.order_index), data_index(r.data_index) {}
 245 };
 246
 247 void print_buckets(const vector<bucket>& buckets, const char* msg)
 248 {
 249     cout << "--- buckets content (" << msg << ")" << endl;
 250     vector<bucket>::const_iterator it = buckets.begin(), it_end = buckets.end();
 251     for (; it != it_end; ++it)
 252     {
 253         cout << "value: " << it->value << "  order index: " << it->order_index
 254              << "  data index: " << it->data_index << endl;
 255     }
 256     cout << "---" << endl;
 257 }
 258
 259 struct less_by_value : std::binary_function<bucket, bucket, bool>
 260 {
 261     bool operator() (const bucket& left, const bucket& right) const
 262     {
 263         return left.value < right.value;
 264     }
 265 };
 266
 267 struct less_by_data_index : std::binary_function<bucket, bucket, bool>
 268 {
 269     bool operator() (const bucket& left, const bucket& right) const
 270     {
 271         return left.data_index < right.data_index;
 272     }
 273 };
 274
 275 struct equal_by_value : std::binary_function<bucket, bucket, bool>
 276 {
 277     bool operator() (const bucket& left, const bucket& right) const
 278     {
 279         return left.value == right.value;
 280     }
 281 };
 282
 283 class push_back_value : std::unary_function<bucket, void>
 284 {
 285     values_type& items;
 286 public:
 287     push_back_value(values_type& _items) : items(_items) {}
 288     void operator() (const bucket& v)
 289     {
 290         items.push_back(v.value);
 291     }
 292 };
 293
 294 class push_back_order_index : std::unary_function<bucket, void>
 295 {
 296     indices_type& data_indices;
 297 public:
 298     push_back_order_index(indices_type& _items) : data_indices(_items) {}
 299     void operator() (const bucket& v)
 300     {
 301         data_indices.push_back(v.order_index);
 302     }
 303 };
 304
 305 void run2(const values_type& vals, bool dump_values)
 306 {
 307     field fld;
 308     {
 309         stack_printer __stack_printer__("::run2 (alternative algorithm)");
 310         vector<bucket> buckets;
 311         buckets.reserve(vals.size());
 312         {
 313             // Push back all original values.
 314             values_type::const_iterator it = vals.begin(), it_end = vals.end();
 315             for (size_t i = 0; it != it_end; ++it, ++i)
 316                 buckets.push_back(bucket(*it, 0, i));
 317         }
 318
 319         if (buckets.empty())
 320         {
 321             cout << "error: empty buckets" << endl;
 322             return;
 323         }
 324
 325 //      print_buckets(buckets, "original");
 326
 327         // Sort by the value.
 328         sort(buckets.begin(), buckets.end(), less_by_value());
 329
 330 //      print_buckets(buckets, "sorted");
 331
 332         {
 333             // Set order index such that unique values have identical index value.
 334             size_t cur_index = 0;
 335             vector<bucket>::iterator it = buckets.begin(), it_end = buckets.end();
 336             int prev = it->value;
 337             it->order_index = cur_index;
 338             for (++it; it != it_end; ++it)
 339             {
 340                 if (prev != it->value)
 341                     ++cur_index;
 342
 343                 it->order_index = cur_index;
 344                 prev = it->value;
 345             }
 346         }
 347
 348 //      print_buckets(buckets, "sorted and indexed");
 349
 350         // Re-sort the bucket this time by the data index.
 351         sort(buckets.begin(), buckets.end(), less_by_data_index());
 352 //      print_buckets(buckets, "re-sort by data index");
 353
 354         // Copy the order index series into the field object.
 355         fld.data.reserve(buckets.size());
 356         for_each(buckets.begin(), buckets.end(), push_back_order_index(fld.data));
 357
 358         // Sort by the value again.
 359         sort(buckets.begin(), buckets.end(), less_by_value());
 360
 361         // Unique by value.
 362         vector<bucket>::iterator it_unique_end =
 363             unique(buckets.begin(), buckets.end(), equal_by_value());
 364
 365 //      print_buckets(buckets, "uniqued");
 366
 367         // Copy the unique values into items.
 368         vector<bucket>::iterator it_beg = buckets.begin();
 369         size_t len = distance(it_beg, it_unique_end);
 370         fld.items.reserve(len);
 371         for_each(it_beg, it_unique_end, push_back_value(fld.items));
 372
 373         // The items are actually already sorted.  So, just insert a sequence
 374         // of integers from 0 and up.
 375         fld.order.reserve(len);
 376         for (size_t i = 0; i < len; ++i)
 377             fld.order.push_back(i);
 378     }
 379
 380     bool res = dump_and_check(fld, vals, dump_values);
 381     cout << "check: " << (res ? "success" : "failure") << endl;
 382 }
 383
 384 }
 385
 386 int main()
 387 {
 388     values_type vals;
 389     vals.reserve(val_count);
 390
 391     if (dump_values)
 392         cout << "--- original" << endl;
 393
 394     for (size_t i = 0; i < val_count; ++i)
 395     {
 396         double v = rand();
 397         v /= RAND_MAX;
 398         v *= multiplier;
 399         values_type::value_type v2 = v;
 400         vals.push_back(v2);
 401
 402         if (dump_values)
 403             cout << i << ": " << v2 << endl;
 404     }
 405
 406     if (dump_values)
 407         cout << "---" << endl;
 408
 409     run1(vals, dump_values);
 410     run2(vals, dump_values);
 411
 412     return EXIT_SUCCESS;
 413 }
 414
 415 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */