libhoomd/utils/Autotuner.cc

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 #include <iostream>
  51 #include <stdexcept>
  52 #include <algorithm>
  53
  54 #include <boost/python.hpp>
  55 using namespace boost::python;
  56
  57 #include "Autotuner.h"
  58
  59 #ifdef ENABLE_MPI
  60 #include "HOOMDMPI.h"
  61 #endif
  62
  63 using namespace std;
  64
  65 /*! \file Autotuner.cc
  66     \brief Definition of Autotuner
  67 */
  68
  69 /*! \param parameters List of valid parameters
  70     \param nsamples Number of time samples to take at each parameter
  71     \param period Number of calls to begin() before sampling is redone
  72     \param name Descriptive name (used in messenger output)
  73     \param exec_conf Execution configuration
  74 */
  75 Autotuner::Autotuner(const std::vector<unsigned int>& parameters,
  76                      unsigned int nsamples,
  77                      unsigned int period,
  78                      const std::string& name,
  79                      boost::shared_ptr<const ExecutionConfiguration> exec_conf)
  80     : m_nsamples(nsamples), m_period(period), m_enabled(true), m_name(name), m_parameters(parameters),
  81       m_state(STARTUP), m_current_sample(0), m_current_element(0), m_calls(0),
  82       m_exec_conf(exec_conf), m_avg(false)
  83     {
  84     m_exec_conf->msg->notice(5) << "Constructing Autotuner " << nsamples << " " << period << " " << name << endl;
  85
  86     // ensure that m_nsamples is odd (so the median is easy to get). This also ensures that m_nsamples > 0.
  87     if ((m_nsamples & 1) == 0)
  88         m_nsamples += 1;
  89
  90     // initialize memory
  91     if (m_parameters.size() == 0)
  92         {
  93         this->m_exec_conf->msg->error() << "Autotuner " << m_name << " got no parameters" << endl;
  94         throw std::runtime_error("Error initializing autotuner");
  95         }
  96     m_samples.resize(m_parameters.size());
  97     m_sample_median.resize(m_parameters.size());
  98
  99     for (unsigned int i = 0; i < m_parameters.size(); i++)
 100         {
 101         m_samples[i].resize(m_nsamples);
 102         }
 103
 104     m_current_param = m_parameters[m_current_element];
 105
 106     // create CUDA events
 107     #ifdef ENABLE_CUDA
 108     cudaEventCreate(&m_start);
 109     cudaEventCreate(&m_stop);
 110     CHECK_CUDA_ERROR();
 111     #endif
 112
 113     m_sync = false;
 114     }
 115
 116
 117 /*! \param start first valid parameter
 118     \param end last valid parameter
 119     \param step spacing between valid parameters
 120     \param nsamples Number of time samples to take at each parameter
 121     \param period Number of calls to begin() before sampling is redone
 122     \param name Descriptive name (used in messenger output)
 123     \param exec_conf Execution configuration
 124
 125     \post Valid parameters will be generated with a spacing of \a step in the range [start,end] inclusive.
 126 */
 127 Autotuner::Autotuner(unsigned int start,
 128                      unsigned int end,
 129                      unsigned int step,
 130                      unsigned int nsamples,
 131                      unsigned int period,
 132                      const std::string& name,
 133                      boost::shared_ptr<const ExecutionConfiguration> exec_conf)
 134     : m_nsamples(nsamples), m_period(period), m_enabled(true), m_name(name),
 135       m_state(STARTUP), m_current_sample(0), m_current_element(0), m_calls(0), m_current_param(0),
 136       m_exec_conf(exec_conf), m_avg(false)
 137     {
 138     m_exec_conf->msg->notice(5) << "Constructing Autotuner " << " " << start << " " << end << " " << step << " "
 139                                 << nsamples << " " << period << " " << name << endl;
 140
 141     // initialize the parameters
 142     m_parameters.resize((end - start) / step + 1);
 143     unsigned int cur_param = start;
 144     for (unsigned int i = 0; i < m_parameters.size(); i++)
 145         {
 146         m_parameters[i] = cur_param;
 147         cur_param += step;
 148         }
 149
 150     // ensure that m_nsamples is odd (so the median is easy to get). This also ensures that m_nsamples > 0.
 151     if ((m_nsamples & 1) == 0)
 152         m_nsamples += 1;
 153
 154     // initialize memory
 155     if (m_parameters.size() == 0)
 156         {
 157         m_exec_conf->msg->error() << "Autotuner " << m_name << " got no parameters" << endl;
 158         throw std::runtime_error("Error initializing autotuner");
 159         }
 160     m_samples.resize(m_parameters.size());
 161     m_sample_median.resize(m_parameters.size());
 162
 163     for (unsigned int i = 0; i < m_parameters.size(); i++)
 164         {
 165         m_samples[i].resize(m_nsamples);
 166         }
 167
 168     m_current_param = m_parameters[m_current_element];
 169
 170     // create CUDA events
 171     #ifdef ENABLE_CUDA
 172     cudaEventCreate(&m_start);
 173     cudaEventCreate(&m_stop);
 174     CHECK_CUDA_ERROR();
 175     #endif
 176
 177     m_sync = false;
 178     }
 179
 180 Autotuner::~Autotuner()
 181     {
 182     m_exec_conf->msg->notice(5) << "Destroying Autotuner " << m_name << endl;
 183     #ifdef ENABLE_CUDA
 184     cudaEventDestroy(m_start);
 185     cudaEventDestroy(m_stop);
 186     CHECK_CUDA_ERROR();
 187     #endif
 188     }
 189
 190 void Autotuner::begin()
 191     {
 192     // skip if disabled
 193     if (!m_enabled)
 194         return;
 195
 196     #ifdef ENABLE_CUDA
 197     // if we are scanning, record a cuda event - otherwise do nothing
 198     if (m_state == STARTUP || m_state == SCANNING)
 199         {
 200         cudaEventRecord(m_start, 0);
 201         if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
 202             CHECK_CUDA_ERROR();
 203         }
 204     #endif
 205     }
 206
 207 void Autotuner::end()
 208     {
 209     // skip if disabled
 210     if (!m_enabled)
 211         return;
 212
 213     #ifdef ENABLE_CUDA
 214     // handle timing updates if scanning
 215     if (m_state == STARTUP || m_state == SCANNING)
 216         {
 217         cudaEventRecord(m_stop, 0);
 218         cudaEventSynchronize(m_stop);
 219         cudaEventElapsedTime(&m_samples[m_current_element][m_current_sample], m_start, m_stop);
 220         m_exec_conf->msg->notice(9) << "Autotuner " << m_name << ": t(" << m_current_param << "," << m_current_sample
 221                                      << ") = " << m_samples[m_current_element][m_current_sample] << endl;
 222
 223         if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
 224             CHECK_CUDA_ERROR();
 225         }
 226     #endif
 227
 228     // handle state data updates and transitions
 229     if (m_state == STARTUP)
 230         {
 231         // move on to the next sample
 232         m_current_sample++;
 233
 234         // if we hit the end of the samples, reset and move on to the next element
 235         if (m_current_sample >= m_nsamples)
 236             {
 237             m_current_sample = 0;
 238             m_current_element++;
 239
 240             // if we hit the end of the elements, transition to the IDLE state and compute the optimal parameter
 241             if (m_current_element >= m_parameters.size())
 242                 {
 243                 m_current_element = 0;
 244                 m_state = IDLE;
 245                 m_current_param = computeOptimalParameter();
 246                 }
 247             else
 248                 {
 249                 // if moving on to the next element, update the cached parameter to set
 250                 m_current_param = m_parameters[m_current_element];
 251                 }
 252             }
 253         }
 254     else if (m_state == SCANNING)
 255         {
 256         // move on to the next element
 257         m_current_element++;
 258
 259         // if we hit the end of the elements, transition to the IDLE state and compute the optimal parameter, and move
 260         // on to the next sample for next time
 261         if (m_current_element >= m_parameters.size())
 262             {
 263             m_current_element = 0;
 264             m_state = IDLE;
 265             m_current_param = computeOptimalParameter();
 266             m_current_sample = (m_current_sample + 1) % m_nsamples;
 267             }
 268         else
 269             {
 270             // if moving on to the next element, update the cached parameter to set
 271             m_current_param = m_parameters[m_current_element];
 272             }
 273         }
 274     else if (m_state == IDLE)
 275         {
 276         // increment the calls counter and see if we should transition to the scanning state
 277         m_calls++;
 278
 279         if (m_calls > m_period)
 280             {
 281             // reset state for the next time
 282             m_calls = 0;
 283
 284             // initialize a scan
 285             m_current_param = m_parameters[m_current_element];
 286             m_state = SCANNING;
 287             m_exec_conf->msg->notice(4) << "Autotuner " << m_name << " - beginning scan" << std::endl;
 288             }
 289         }
 290     }
 291
 292 /*! \returns The optimal parameter given the current data in m_samples
 293
 294     computeOptimalParameter computes the median time among all samples for a given element. It then chooses the
 295     fastest time (with the lowest index breaking a tie) and returns the parameter that resulted in that time.
 296 */
 297 unsigned int Autotuner::computeOptimalParameter()
 298     {
 299     bool is_root = true;
 300
 301     #ifdef ENABLE_MPI
 302     unsigned int nranks = 0;
 303     if (m_sync)
 304         {
 305         nranks = m_exec_conf->getNRanks();
 306         is_root = !m_exec_conf->getRank();
 307         }
 308     #endif
 309
 310     // start by computing the median for each element
 311     std::vector<float> v;
 312     for (unsigned int i = 0; i < m_parameters.size(); i++)
 313         {
 314         v = m_samples[i];
 315         #ifdef ENABLE_MPI
 316         if (m_sync && nranks)
 317             {
 318             // combine samples from all ranks on rank zero
 319             std::vector< std::vector<float> > all_v;
 320             MPI_Barrier(m_exec_conf->getMPICommunicator());
 321             gather_v(v, all_v, 0, m_exec_conf->getMPICommunicator());
 322             if (is_root)
 323                 {
 324                 v.clear();
 325                 assert(all_v.size() == nranks);
 326                 for (unsigned int j = 0; j < nranks; ++j)
 327                     v.insert(v.end(), all_v[j].begin(), all_v[j].end());
 328                 }
 329             }
 330         #endif
 331         if (is_root)
 332             {
 333             if (m_avg)
 334                 {
 335                 // compute average
 336                 float sum = 0.0f;
 337                 for (std::vector<float>::iterator it = v.begin(); it != v.end(); ++it)
 338                     sum += *it;
 339                 m_sample_median[i] = sum/v.size();
 340                 }
 341             else
 342                 {
 343                 // compute median
 344                 size_t n = v.size() / 2;
 345                 nth_element(v.begin(), v.begin()+n, v.end());
 346                 m_sample_median[i] = v[n];
 347                 }
 348             }
 349         }
 350
 351     unsigned int opt;
 352
 353     if (is_root)
 354         {
 355         // now find the minimum and maximum times in the medians
 356         float min = m_sample_median[0];
 357         unsigned int min_idx = 0;
 358         //float max = m_sample_median[0];
 359         //unsigned int max_idx = 0;
 360
 361         for (unsigned int i = 1; i < m_parameters.size(); i++)
 362             {
 363             if (m_sample_median[i] < min)
 364                 {
 365                 min = m_sample_median[i];
 366                 min_idx = i;
 367                 }
 368             /*if (m_sample_median[i] > max)
 369                 {
 370                 max = m_sample_median[i];
 371                 max_idx = i;
 372                 }*/
 373             }
 374
 375         // get the optimal param
 376         opt = m_parameters[min_idx];
 377         // unsigned int percent = int(max/min * 100.0f)-100;
 378
 379         // print stats
 380         m_exec_conf->msg->notice(4) << "Autotuner " << m_name << " found optimal parameter " << opt << endl;
 381         }
 382
 383     #ifdef ENABLE_MPI
 384     if (m_sync && nranks) bcast(opt, 0, m_exec_conf->getMPICommunicator());
 385     #endif
 386     return opt;
 387     }
 388
 389 void export_Autotuner()
 390     {
 391     class_<Autotuner, boost::noncopyable>
 392     ("Autotuner", init< unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, const std::string&, boost::shared_ptr<ExecutionConfiguration> >())
 393     .def("getParam", &Autotuner::getParam)
 394     .def("setEnabled", &Autotuner::setEnabled)
 395     .def("setMoveRatio", &Autotuner::isComplete)
 396     .def("setNSelect", &Autotuner::setPeriod)
 397     ;
 398     }