server/supernova/dsp_thread_queue/dsp_thread_queue.hpp

   1 //  dsp thread queue
   2 //  Copyright (C) 2007, 2008, 2009, 2010 Tim Blechmann
   3 //
   4 //  This program is free software; you can redistribute it and/or modify
   5 //  it under the terms of the GNU General Public License as published by
   6 //  the Free Software Foundation; either version 2 of the License, or
   7 //  (at your option) any later version.
   8 //
   9 //  This program is distributed in the hope that it will be useful,
  10 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 //  GNU General Public License for more details.
  13 //
  14 //  You should have received a copy of the GNU General Public License
  15 //  along with this program; see the file COPYING.  If not, write to
  16 //  the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17 //  Boston, MA 02111-1307, USA.
  18
  19 #ifndef DSP_THREAD_QUEUE_DSP_THREAD_QUEUE_HPP
  20 #define DSP_THREAD_QUEUE_DSP_THREAD_QUEUE_HPP
  21
  22 #include <algorithm>
  23 #include <iostream>
  24 #include <memory>
  25 #include <vector>
  26
  27 #include <boost/atomic.hpp>
  28 #include <boost/cstdint.hpp>
  29 #include <boost/thread.hpp>
  30
  31 #ifdef DEBUG_DSP_THREADS
  32 #include <boost/foreach.hpp>
  33 #include <cstdio>
  34 #endif
  35
  36 #include <boost/lockfree/stack.hpp>
  37
  38 #include "nova-tt/semaphore.hpp"
  39
  40 #include "utilities/branch_hints.hpp"
  41 #include "utilities/utils.hpp"
  42
  43 namespace nova
  44 {
  45
  46 template <typename runnable, typename Alloc>
  47 class dsp_queue_interpreter;
  48
  49 /*
  50 concept runnable
  51 {
  52     runnable(const & runnable);
  53
  54     operator()(uint threadindex);
  55 };
  56 */
  57
  58 /** item of a dsp thread queue
  59  *
  60  * \tparam Alloc allocator for successor list
  61  *
  62  * \todo operator new doesn't support stateful allocators
  63  */
  64 template <typename runnable,
  65           typename Alloc = std::allocator<void*> >
  66 class dsp_thread_queue_item:
  67     private Alloc
  68 {
  69     typedef nova::dsp_queue_interpreter<runnable, Alloc> dsp_queue_interpreter;
  70
  71     typedef typename Alloc::template rebind<dsp_thread_queue_item>::other new_allocator;
  72
  73 public:
  74     typedef boost::uint_fast16_t activation_limit_t;
  75
  76     struct successor_list
  77     {
  78         struct data_t
  79         {
  80             uint32_t count;
  81             uint32_t size;
  82             dsp_thread_queue_item* content[0];
  83         };
  84
  85         typedef typename Alloc::template rebind<data_t>::other array_allocator;
  86
  87         /* create instance */
  88         explicit successor_list(uint32_t size = 0)
  89         {
  90             data = array_allocator().allocate(2*sizeof(uint32_t) + size * sizeof(dsp_thread_queue_item*));
  91             data->count = 1;
  92             data->size = size;
  93         }
  94
  95         successor_list(successor_list const & rhs):
  96             data(rhs.data)
  97         {
  98             data->count++;
  99         }
 100
 101         successor_list & operator=(successor_list const & rhs)
 102         {
 103             if (--data->count == 0)
 104                 array_allocator().deallocate(data, 2*sizeof(uint32_t) + data->size * sizeof(dsp_thread_queue_item*));
 105
 106             data = rhs.data;
 107             data->count++;
 108             return *this;
 109         }
 110
 111         std::size_t size(void) const
 112         {
 113             return data->size;
 114         }
 115
 116         bool empty(void) const
 117         {
 118             return size() == 0;
 119         }
 120
 121         dsp_thread_queue_item *& operator[](std::size_t index)
 122         {
 123             assert (index < size());
 124             return data->content[index];
 125         }
 126
 127         dsp_thread_queue_item * const& operator[](std::size_t index) const
 128         {
 129             assert (index < size());
 130             return data->content[index];
 131         }
 132
 133         ~successor_list(void)
 134         {
 135             if (--data->count == 0)
 136                 array_allocator().deallocate(data, 2*sizeof(uint32_t) + data->size * sizeof(dsp_thread_queue_item*));
 137         }
 138
 139         data_t * data;
 140     };
 141
 142     dsp_thread_queue_item(runnable const & job, successor_list const & successors,
 143                           activation_limit_t activation_limit):
 144         activation_count(0), job(job), successors(successors), activation_limit(activation_limit)
 145     {}
 146
 147     dsp_thread_queue_item * run(dsp_queue_interpreter & interpreter, boost::uint8_t thread_index)
 148     {
 149         assert(activation_count == 0);
 150
 151         job(thread_index);
 152
 153         dsp_thread_queue_item * next = update_dependencies(interpreter);
 154         reset_activation_count();
 155         return next;
 156     }
 157
 158     /** called from the run method or once, when dsp queue is initialized */
 159     void reset_activation_count(void)
 160     {
 161         assert(activation_count == 0);
 162         activation_count.store(activation_limit, boost::memory_order_release);
 163     }
 164
 165     runnable const & get_job(void) const
 166     {
 167         return job;
 168     }
 169
 170     runnable & get_job(void)
 171     {
 172         return job;
 173     }
 174
 175 #ifdef DEBUG_DSP_THREADS
 176     void dump_item(void)
 177     {
 178         using namespace std;
 179         printf("\titem %p\n", this);
 180         printf("\tactivation limit %d\n", int(activation_limit));
 181
 182         if (!successors.empty()) {
 183             printf("\tsuccessors:\n");
 184             BOOST_FOREACH(dsp_thread_queue_item * item, successors) {
 185                 printf("\t\t%p\n", item);
 186             }
 187         }
 188         printf("\n");
 189     }
 190 #endif
 191
 192 private:
 193     /** \brief update all successors and possibly mark them as runnable */
 194     dsp_thread_queue_item * update_dependencies(dsp_queue_interpreter & interpreter)
 195     {
 196         dsp_thread_queue_item * ptr;
 197         std::size_t i = 0;
 198         for (;;) {
 199             if (i == successors.size())
 200                 return NULL;
 201
 202             ptr = successors[i++]->dec_activation_count(interpreter);
 203             if (ptr)
 204                 break; // no need to update the next item to run
 205         }
 206
 207         while (i != successors.size()) {
 208             dsp_thread_queue_item * next = successors[i++]->dec_activation_count(interpreter);
 209             if (next)
 210                 interpreter.mark_as_runnable(next);
 211         }
 212
 213         return ptr;
 214     }
 215
 216     /** \brief decrement activation count and return this, if it drops to zero
 217      */
 218     inline dsp_thread_queue_item * dec_activation_count(dsp_queue_interpreter & interpreter)
 219     {
 220         activation_limit_t current = activation_count--;
 221         assert(current > 0);
 222
 223         if (current == 1)
 224             return this;
 225         else
 226             return NULL;
 227     }
 228
 229     boost::atomic<activation_limit_t> activation_count; /**< current activation count */
 230
 231     runnable job;
 232     const successor_list successors;                           /**< list of successing nodes */
 233     const activation_limit_t activation_limit;                 /**< number of precedessors */
 234 };
 235
 236 template <typename runnable, typename Alloc = std::allocator<void*> >
 237 class dsp_thread_queue
 238 {
 239     typedef boost::uint_fast16_t node_count_t;
 240
 241     typedef nova::dsp_thread_queue_item<runnable, Alloc> dsp_thread_queue_item;
 242     typedef std::vector<dsp_thread_queue_item*,
 243                         typename Alloc::template rebind<dsp_thread_queue_item*>::other
 244                        > item_vector_t;
 245
 246     typedef typename Alloc::template rebind<dsp_thread_queue_item>::other item_allocator;
 247
 248 public:
 249 #ifdef DEBUG_DSP_THREADS
 250     void dump_queue(void)
 251     {
 252         using namespace std;
 253
 254         printf("queue %p\n items:\n", this);
 255         BOOST_FOREACH(dsp_thread_queue_item * item, queue_items)
 256             item->dump_item();
 257         printf("\ninitial items:\n", this);
 258         BOOST_FOREACH(dsp_thread_queue_item * item, initially_runnable_items)
 259             item->dump_item();
 260
 261         printf("\n");
 262         std::cout << std::endl;
 263     }
 264 #endif
 265
 266     /** preallocate node_count nodes */
 267     dsp_thread_queue(std::size_t node_count):
 268         total_node_count(0)
 269     {
 270         initially_runnable_items.reserve(node_count);
 271         queue_items = item_allocator().allocate(node_count * sizeof(dsp_thread_queue_item));
 272     }
 273
 274     ~dsp_thread_queue(void)
 275     {
 276         for (std::size_t i = 0; i != total_node_count; ++i)
 277             queue_items[i].~dsp_thread_queue_item();
 278         item_allocator().deallocate(queue_items, total_node_count * sizeof(dsp_thread_queue_item));
 279     }
 280
 281     void add_initially_runnable(dsp_thread_queue_item * item)
 282     {
 283         initially_runnable_items.push_back(item);
 284     }
 285
 286     /** return initialized queue item */
 287     dsp_thread_queue_item *
 288     allocate_queue_item(runnable const & job,
 289                         typename dsp_thread_queue_item::successor_list const & successors,
 290                         typename dsp_thread_queue_item::activation_limit_t activation_limit)
 291     {
 292         dsp_thread_queue_item * ret = queue_items + total_node_count;
 293         ++total_node_count;
 294
 295         assert (total_node_count <= initially_runnable_items.capacity());
 296         new (ret) dsp_thread_queue_item(job, successors, activation_limit);
 297         return ret;
 298     }
 299
 300     void reset_activation_counts(void)
 301     {
 302         for (node_count_t i = 0; i != total_node_count; ++i)
 303             queue_items[i].reset_activation_count();
 304     }
 305
 306     node_count_t get_total_node_count(void) const
 307     {
 308         return total_node_count;
 309     }
 310
 311 private:
 312     node_count_t total_node_count;          /* total number of nodes */
 313     item_vector_t initially_runnable_items; /* nodes without precedessor */
 314     dsp_thread_queue_item * queue_items;    /* all nodes */
 315
 316     friend class dsp_queue_interpreter<runnable, Alloc>;
 317 };
 318
 319 template <typename runnable,
 320           typename Alloc = std::allocator<void*> >
 321 class dsp_queue_interpreter
 322 {
 323 protected:
 324     typedef nova::dsp_thread_queue<runnable, Alloc> dsp_thread_queue;
 325     typedef nova::dsp_thread_queue_item<runnable, Alloc> dsp_thread_queue_item;
 326     typedef typename dsp_thread_queue_item::successor_list successor_list;
 327     typedef std::size_t size_t;
 328
 329 public:
 330     typedef boost::uint_fast8_t thread_count_t;
 331     typedef boost::uint_fast16_t node_count_t;
 332
 333 #ifdef __GXX_EXPERIMENTAL_CXX0X__
 334     typedef std::unique_ptr<dsp_thread_queue> dsp_thread_queue_ptr;
 335 #else
 336     typedef std::auto_ptr<dsp_thread_queue> dsp_thread_queue_ptr;
 337 #endif
 338
 339     dsp_queue_interpreter(thread_count_t tc):
 340         runnable_set(1024), node_count(0)
 341     {
 342         if (!runnable_set.is_lock_free())
 343             std::cout << "Warning: scheduler queue is not lockfree!" << std::endl;
 344
 345         set_thread_count(tc);
 346     }
 347
 348     /** prepares queue and queue interpreter for dsp tick
 349      *
 350      *  \return true, if dsp queue is valid
 351      *          false, if no dsp queue is available or queue is empty
 352      */
 353     bool init_tick(void)
 354     {
 355         if (unlikely((queue.get() == NULL) or                /* no queue */
 356                      (queue->get_total_node_count() == 0)    /* no nodes */
 357                     ))
 358             return false;
 359
 360         /* reset node count */
 361         assert(node_count == 0);
 362         assert(runnable_set.empty());
 363         node_count.store(queue->get_total_node_count(), boost::memory_order_release);
 364
 365         for (size_t i = 0; i != queue->initially_runnable_items.size(); ++i)
 366             mark_as_runnable(queue->initially_runnable_items[i]);
 367
 368         return true;
 369     }
 370
 371     dsp_thread_queue_ptr release_queue(void)
 372     {
 373         dsp_thread_queue_ptr ret(queue.release());
 374         return ret;
 375     }
 376
 377 #ifdef __GXX_EXPERIMENTAL_CXX0X__
 378     dsp_thread_queue_ptr reset_queue(dsp_thread_queue_ptr && new_queue)
 379     {
 380         dsp_thread_queue_ptr ret(std::move(queue));
 381
 382         queue = std::move(new_queue);
 383         if (queue.get() == 0)
 384             return ret;
 385
 386         queue->reset_activation_counts();
 387
 388 #ifdef DEBUG_DSP_THREADS
 389         queue->dump_queue();
 390 #endif
 391
 392         thread_count_t thread_number =
 393             std::min(thread_count_t(std::min(total_node_count(),
 394                                              node_count_t(std::numeric_limits<thread_count_t>::max()))),
 395                      thread_count);
 396
 397         used_helper_threads = thread_number - 1; /* this thread is not waked up */
 398         return ret;
 399     }
 400
 401 #else
 402
 403     dsp_thread_queue_ptr reset_queue(dsp_thread_queue_ptr & new_queue)
 404     {
 405         dsp_thread_queue_ptr ret(queue.release());
 406         queue = new_queue;
 407         if (queue.get() == 0)
 408             return ret;
 409
 410         queue->reset_activation_counts();
 411
 412 #ifdef DEBUG_DSP_THREADS
 413         queue->dump_queue();
 414 #endif
 415
 416         thread_count_t thread_number =
 417             std::min(thread_count_t(std::min(total_node_count(),
 418                                              node_count_t(std::numeric_limits<thread_count_t>::max()))),
 419                      thread_count);
 420
 421         used_helper_threads = thread_number - 1; /* this thread is not waked up */
 422         return ret;
 423     }
 424 #endif
 425
 426     node_count_t total_node_count(void) const
 427     {
 428         return queue->get_total_node_count();
 429     }
 430
 431     void set_thread_count(thread_count_t i)
 432     {
 433         assert (i < std::numeric_limits<thread_count_t>::max());
 434         i = std::max(thread_count_t(1u), i);
 435         thread_count = i;
 436     }
 437
 438     thread_count_t get_thread_count(void) const
 439     {
 440         return thread_count;
 441     }
 442
 443     thread_count_t get_used_helper_threads(void) const
 444     {
 445         return used_helper_threads;
 446     }
 447
 448     void tick(thread_count_t thread_index)
 449     {
 450         run_item(thread_index);
 451     }
 452
 453
 454 private:
 455     struct backup
 456     {
 457         backup(int min, int max): min(min), max(max), loops(min) {}
 458
 459         void run(void)
 460         {
 461             for (int i = 0; i != loops; ++i)
 462                 asm(""); // empty asm to avoid optimization
 463
 464             loops = std::min(loops * 2, max);
 465         }
 466
 467         void reset(void)
 468         {
 469             loops = min;
 470         }
 471
 472         int min, max, loops;
 473     };
 474
 475     void run_item(thread_count_t index)
 476     {
 477         backup b(256, 32768);
 478         for (;;) {
 479             if (node_count.load(boost::memory_order_acquire)) {
 480                 /* we still have some nodes to process */
 481                 int state = run_next_item(index);
 482
 483                 switch (state) {
 484                 case no_remaining_items:
 485                     return;
 486                 case fifo_empty:
 487                     b.run();
 488
 489                 default:
 490                     b.reset();
 491                 }
 492             } else
 493                 return;
 494         }
 495     }
 496
 497 public:
 498     void tick_master(void)
 499     {
 500         run_item_master();
 501     }
 502
 503 private:
 504     void run_item_master(void)
 505     {
 506         run_item(0);
 507         wait_for_end();
 508         assert(runnable_set.empty());
 509     }
 510
 511     void wait_for_end(void)
 512     {
 513         while (node_count.load(boost::memory_order_acquire) != 0)
 514         {} // busy-wait for helper threads to finish
 515     }
 516
 517     HOT int run_next_item(thread_count_t index)
 518     {
 519         dsp_thread_queue_item * item;
 520         bool success = runnable_set.pop(item);
 521
 522         if (!success)
 523             return fifo_empty;
 524
 525         node_count_t consumed = 0;
 526
 527         do {
 528             item = item->run(*this, index);
 529             consumed += 1;
 530         } while (item != NULL);
 531
 532         node_count_t remaining = node_count.fetch_sub(consumed, boost::memory_order_release);
 533
 534         assert (remaining >= consumed);
 535
 536         if (remaining == consumed)
 537             return no_remaining_items;
 538         else
 539             return remaining_items;
 540     }
 541
 542     void mark_as_runnable(dsp_thread_queue_item * item)
 543     {
 544         runnable_set.push(item);
 545     }
 546
 547     friend class nova::dsp_thread_queue_item<runnable, Alloc>;
 548
 549 private:
 550     enum {
 551         no_remaining_items,
 552         fifo_empty,
 553         remaining_items
 554     };
 555
 556     dsp_thread_queue_ptr queue;
 557
 558     thread_count_t thread_count;        /* number of dsp threads to be used by this queue */
 559     thread_count_t used_helper_threads; /* number of helper threads, which are actually used */
 560
 561     boost::lockfree::stack<dsp_thread_queue_item*> runnable_set;
 562     boost::atomic<node_count_t> node_count; /* number of nodes, that need to be processed during this tick */
 563 };
 564
 565 } /* namespace nova */
 566
 567 #endif /* DSP_THREAD_QUEUE_DSP_THREAD_QUEUE_HPP */