libhoomd/utils/Autotuner.h

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // inclusion guard
  51 #ifndef _AUTOTUNER_H_
  52 #define _AUTOTUNER_H_
  53
  54 /*! \file Autotuner.h
  55     \brief Declaration of Autotuner
  56 */
  57
  58 #include "ExecutionConfiguration.h"
  59
  60 #include <vector>
  61 #include <string>
  62
  63 #ifdef ENABLE_CUDA
  64 #include <cuda_runtime.h>
  65 #endif
  66
  67 //! Autotuner for low level GPU kernel parameters
  68 /*! **Overview** <br>
  69     Autotuner is a helper class that autotunes GPU kernel parameters (such as block size) for performance. It runs an
  70     internal state machine and makes sweeps over all valid parameter values. Performance is measured just for the single
  71     kernel in question with cudaEvent timers. A number of sweeps are combined with a median to determine the fastest
  72     parameter. Additional timing sweeps are performed at a defined period in order to update to changing conditions.
  73
  74     The begin() and end() methods must be called before and after the kernel launch to be tuned. The value of the tuned
  75     parameter should be set to the return value of getParam(). begin() and end() drive the state machine to choose
  76     parameters and insert the cuda timing events (when needed).
  77
  78     Autotuning can be enabled/disabled by calling setEnabled(). A disabled Autotuner makes no more parameter sweeps,
  79     but continues to return the last determined optimal parameter. If an Autotuner is disabled before it finishes the
  80     first complete sweep through parameters, the first parameter in the list is returned and a warning is issued.
  81     isComplete() queries if the initial scan is complete. setPeriod() changes the period at which the autotuner performs
  82     new scans.
  83
  84     Each Autotuner instance has a string name to help identify it's output on the notice stream.
  85
  86     Autotuner is not useful in non-GPU builds. Timing is performed with CUDA events and requires ENABLE_CUDA=on.
  87     Behavior of Autotuner is undefined when ENABLE_CUDA=off.
  88
  89     ** Implementation ** <br>
  90     Internally, m_nsamples is the number of samples to take (odd for median computation). m_current_sample is the
  91     current sample being taken in a circular fashion, and m_current_element is the index of the current parameter being
  92     sampled. m_samples stores the time of each sampled kernel launch, and m_sample_median stores the current median of
  93     each set of samples. When idle, the number of calls is counted in m_calls. m_state lists the current state in the
  94     state machine.
  95 */
  96 class Autotuner
  97     {
  98     public:
  99         //! Constructor
 100         Autotuner(const std::vector<unsigned int>& parameters,
 101                   unsigned int nsamples,
 102                   unsigned int period,
 103                   const std::string& name,
 104                   boost::shared_ptr<const ExecutionConfiguration> exec_conf);
 105
 106         //! Constructor with implicit range
 107         Autotuner(unsigned int start,
 108                   unsigned int end,
 109                   unsigned int step,
 110                   unsigned int nsamples,
 111                   unsigned int period,
 112                   const std::string& name,
 113                   boost::shared_ptr<const ExecutionConfiguration> exec_conf);
 114
 115         //! Destructor
 116         ~Autotuner();
 117
 118         //! Call before kernel launch
 119         void begin();
 120
 121         //! Call after kernel launch
 122         void end();
 123
 124         //! Get the parameter to set for the kernel launch
 125         /*! \returns the current parameter that should be set for the kernel launch
 126
 127         While sampling, the value returned by this function will sweep though all valid parameters. Otherwise, it will
 128         return the fastest performing parameter.
 129         */
 130         unsigned int getParam()
 131             {
 132             return m_current_param;
 133             }
 134
 135         //! Enable/disable sampling
 136         /*! \param enabled true to enable sampling, false to disable it
 137         */
 138         void setEnabled(bool enabled)
 139             {
 140             m_enabled = enabled;
 141
 142             if (!enabled)
 143                 {
 144                 m_exec_conf->msg->notice(6) << "Disable Autotuner " << m_name << std::endl;
 145
 146                 // if not complete, issue a warning
 147                 if (!isComplete())
 148                     {
 149                     m_exec_conf->msg->warning() << "Disabling Autotuner " << m_name << " before initial scan completed!" << std::endl;
 150                     }
 151                 else
 152                     {
 153                     // ensure that we are in the idle state and have an up to date optimal parameter
 154                     m_current_element = 0;
 155                     m_state = IDLE;
 156                     m_current_param = computeOptimalParameter();
 157                     }
 158                 }
 159             else
 160                 {
 161                 m_exec_conf->msg->notice(6) << "Enable Autotuner " << m_name << std::endl;
 162                 }
 163             }
 164
 165         //! Test if initial sampling is complete
 166         /*! \returns true if the initial sampling run is complete
 167         */
 168         bool isComplete()
 169             {
 170             if (m_state != STARTUP)
 171                 return true;
 172             else
 173                 return false;
 174             }
 175
 176         //! Change the sampling period
 177         /*! \param period New period to set
 178         */
 179         void setPeriod(unsigned int period)
 180             {
 181             m_exec_conf->msg->notice(6) << "Set Autotuner " << m_name << " period = " << period << std::endl;
 182             m_period = period;
 183             }
 184
 185         //! Set flag for synchronization via MPI
 186         /*! \param sync If true, synchronize parameters across all MPI ranks
 187          */
 188         void setSync(bool sync)
 189             {
 190             m_sync = sync;
 191             }
 192
 193         //! Set average flag
 194         /*! \param avg If true, use average instead of median of samples to compute kernel time
 195          */
 196         void setAverage(bool avg)
 197             {
 198             m_avg = avg;
 199             }
 200
 201     protected:
 202         unsigned int computeOptimalParameter();
 203
 204         //! State names
 205         enum State
 206            {
 207            STARTUP,
 208            IDLE,
 209            SCANNING
 210            };
 211
 212         // parameters
 213         unsigned int m_nsamples;    //!< Number of samples to take for each parameter
 214         unsigned int m_period;      //!< Number of calls before sampling occurs again
 215         bool m_enabled;             //!< True if enabled
 216         std::string m_name;         //!< Descriptive name
 217         std::vector<unsigned int> m_parameters;  //!< valid parameters
 218
 219         // state info
 220         State m_state;                  //!< Current state
 221         unsigned int m_current_sample;  //!< Current sample taken
 222         unsigned int m_current_element; //!< Index of current parameter sampled
 223         unsigned int m_calls;           //!< Count of the number of calls since the last sample
 224         unsigned int m_current_param;   //!< Value of the current parameter
 225
 226         std::vector< std::vector< float > > m_samples;  //!< Raw sample data for each element
 227         std::vector< float > m_sample_median;           //!< Current sample median for each element
 228
 229         boost::shared_ptr<const ExecutionConfiguration> m_exec_conf; //!< Execution configuration
 230
 231         #ifdef ENABLE_CUDA
 232         cudaEvent_t m_start;      //!< CUDA event for recording start times
 233         cudaEvent_t m_stop;       //!< CUDA event for recording end times
 234         #endif
 235
 236         bool m_sync;              //!< If true, synchronize results via MPI
 237         bool m_avg;               //!< If true, use sample average instead of median
 238     };
 239
 240 //! Export the Autotuner class to python
 241 void export_Autotuner();
 242
 243 #endif // _AUTOTUNER_H_