Merge branch 'maint'
[hoomd-blue.git] / libhoomd / utils / Autotuner.cc
blob0154f676b41e0079065ebdd1574a6f0dc944ae0c
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 #include <iostream>
51 #include <stdexcept>
52 #include <algorithm>
54 #include <boost/python.hpp>
55 using namespace boost::python;
57 #include "Autotuner.h"
59 #ifdef ENABLE_MPI
60 #include "HOOMDMPI.h"
61 #endif
63 using namespace std;
65 /*! \file Autotuner.cc
66 \brief Definition of Autotuner
69 /*! \param parameters List of valid parameters
70 \param nsamples Number of time samples to take at each parameter
71 \param period Number of calls to begin() before sampling is redone
72 \param name Descriptive name (used in messenger output)
73 \param exec_conf Execution configuration
75 Autotuner::Autotuner(const std::vector<unsigned int>& parameters,
76 unsigned int nsamples,
77 unsigned int period,
78 const std::string& name,
79 boost::shared_ptr<const ExecutionConfiguration> exec_conf)
80 : m_nsamples(nsamples), m_period(period), m_enabled(true), m_name(name), m_parameters(parameters),
81 m_state(STARTUP), m_current_sample(0), m_current_element(0), m_calls(0),
82 m_exec_conf(exec_conf), m_avg(false)
84 m_exec_conf->msg->notice(5) << "Constructing Autotuner " << nsamples << " " << period << " " << name << endl;
86 // ensure that m_nsamples is odd (so the median is easy to get). This also ensures that m_nsamples > 0.
87 if ((m_nsamples & 1) == 0)
88 m_nsamples += 1;
90 // initialize memory
91 if (m_parameters.size() == 0)
93 this->m_exec_conf->msg->error() << "Autotuner " << m_name << " got no parameters" << endl;
94 throw std::runtime_error("Error initializing autotuner");
96 m_samples.resize(m_parameters.size());
97 m_sample_median.resize(m_parameters.size());
99 for (unsigned int i = 0; i < m_parameters.size(); i++)
101 m_samples[i].resize(m_nsamples);
104 m_current_param = m_parameters[m_current_element];
106 // create CUDA events
107 #ifdef ENABLE_CUDA
108 cudaEventCreate(&m_start);
109 cudaEventCreate(&m_stop);
110 CHECK_CUDA_ERROR();
111 #endif
113 m_sync = false;
117 /*! \param start first valid parameter
118 \param end last valid parameter
119 \param step spacing between valid parameters
120 \param nsamples Number of time samples to take at each parameter
121 \param period Number of calls to begin() before sampling is redone
122 \param name Descriptive name (used in messenger output)
123 \param exec_conf Execution configuration
125 \post Valid parameters will be generated with a spacing of \a step in the range [start,end] inclusive.
127 Autotuner::Autotuner(unsigned int start,
128 unsigned int end,
129 unsigned int step,
130 unsigned int nsamples,
131 unsigned int period,
132 const std::string& name,
133 boost::shared_ptr<const ExecutionConfiguration> exec_conf)
134 : m_nsamples(nsamples), m_period(period), m_enabled(true), m_name(name),
135 m_state(STARTUP), m_current_sample(0), m_current_element(0), m_calls(0), m_current_param(0),
136 m_exec_conf(exec_conf), m_avg(false)
138 m_exec_conf->msg->notice(5) << "Constructing Autotuner " << " " << start << " " << end << " " << step << " "
139 << nsamples << " " << period << " " << name << endl;
141 // initialize the parameters
142 m_parameters.resize((end - start) / step + 1);
143 unsigned int cur_param = start;
144 for (unsigned int i = 0; i < m_parameters.size(); i++)
146 m_parameters[i] = cur_param;
147 cur_param += step;
150 // ensure that m_nsamples is odd (so the median is easy to get). This also ensures that m_nsamples > 0.
151 if ((m_nsamples & 1) == 0)
152 m_nsamples += 1;
154 // initialize memory
155 if (m_parameters.size() == 0)
157 m_exec_conf->msg->error() << "Autotuner " << m_name << " got no parameters" << endl;
158 throw std::runtime_error("Error initializing autotuner");
160 m_samples.resize(m_parameters.size());
161 m_sample_median.resize(m_parameters.size());
163 for (unsigned int i = 0; i < m_parameters.size(); i++)
165 m_samples[i].resize(m_nsamples);
168 m_current_param = m_parameters[m_current_element];
170 // create CUDA events
171 #ifdef ENABLE_CUDA
172 cudaEventCreate(&m_start);
173 cudaEventCreate(&m_stop);
174 CHECK_CUDA_ERROR();
175 #endif
177 m_sync = false;
180 Autotuner::~Autotuner()
182 m_exec_conf->msg->notice(5) << "Destroying Autotuner " << m_name << endl;
183 #ifdef ENABLE_CUDA
184 cudaEventDestroy(m_start);
185 cudaEventDestroy(m_stop);
186 CHECK_CUDA_ERROR();
187 #endif
190 void Autotuner::begin()
192 // skip if disabled
193 if (!m_enabled)
194 return;
196 #ifdef ENABLE_CUDA
197 // if we are scanning, record a cuda event - otherwise do nothing
198 if (m_state == STARTUP || m_state == SCANNING)
200 cudaEventRecord(m_start, 0);
201 if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
202 CHECK_CUDA_ERROR();
204 #endif
207 void Autotuner::end()
209 // skip if disabled
210 if (!m_enabled)
211 return;
213 #ifdef ENABLE_CUDA
214 // handle timing updates if scanning
215 if (m_state == STARTUP || m_state == SCANNING)
217 cudaEventRecord(m_stop, 0);
218 cudaEventSynchronize(m_stop);
219 cudaEventElapsedTime(&m_samples[m_current_element][m_current_sample], m_start, m_stop);
220 m_exec_conf->msg->notice(9) << "Autotuner " << m_name << ": t(" << m_current_param << "," << m_current_sample
221 << ") = " << m_samples[m_current_element][m_current_sample] << endl;
223 if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
224 CHECK_CUDA_ERROR();
226 #endif
228 // handle state data updates and transitions
229 if (m_state == STARTUP)
231 // move on to the next sample
232 m_current_sample++;
234 // if we hit the end of the samples, reset and move on to the next element
235 if (m_current_sample >= m_nsamples)
237 m_current_sample = 0;
238 m_current_element++;
240 // if we hit the end of the elements, transition to the IDLE state and compute the optimal parameter
241 if (m_current_element >= m_parameters.size())
243 m_current_element = 0;
244 m_state = IDLE;
245 m_current_param = computeOptimalParameter();
247 else
249 // if moving on to the next element, update the cached parameter to set
250 m_current_param = m_parameters[m_current_element];
254 else if (m_state == SCANNING)
256 // move on to the next element
257 m_current_element++;
259 // if we hit the end of the elements, transition to the IDLE state and compute the optimal parameter, and move
260 // on to the next sample for next time
261 if (m_current_element >= m_parameters.size())
263 m_current_element = 0;
264 m_state = IDLE;
265 m_current_param = computeOptimalParameter();
266 m_current_sample = (m_current_sample + 1) % m_nsamples;
268 else
270 // if moving on to the next element, update the cached parameter to set
271 m_current_param = m_parameters[m_current_element];
274 else if (m_state == IDLE)
276 // increment the calls counter and see if we should transition to the scanning state
277 m_calls++;
279 if (m_calls > m_period)
281 // reset state for the next time
282 m_calls = 0;
284 // initialize a scan
285 m_current_param = m_parameters[m_current_element];
286 m_state = SCANNING;
287 m_exec_conf->msg->notice(4) << "Autotuner " << m_name << " - beginning scan" << std::endl;
292 /*! \returns The optimal parameter given the current data in m_samples
294 computeOptimalParameter computes the median time among all samples for a given element. It then chooses the
295 fastest time (with the lowest index breaking a tie) and returns the parameter that resulted in that time.
297 unsigned int Autotuner::computeOptimalParameter()
299 bool is_root = true;
301 #ifdef ENABLE_MPI
302 unsigned int nranks = 0;
303 if (m_sync)
305 nranks = m_exec_conf->getNRanks();
306 is_root = !m_exec_conf->getRank();
308 #endif
310 // start by computing the median for each element
311 std::vector<float> v;
312 for (unsigned int i = 0; i < m_parameters.size(); i++)
314 v = m_samples[i];
315 #ifdef ENABLE_MPI
316 if (m_sync && nranks)
318 // combine samples from all ranks on rank zero
319 std::vector< std::vector<float> > all_v;
320 MPI_Barrier(m_exec_conf->getMPICommunicator());
321 gather_v(v, all_v, 0, m_exec_conf->getMPICommunicator());
322 if (is_root)
324 v.clear();
325 assert(all_v.size() == nranks);
326 for (unsigned int j = 0; j < nranks; ++j)
327 v.insert(v.end(), all_v[j].begin(), all_v[j].end());
330 #endif
331 if (is_root)
333 if (m_avg)
335 // compute average
336 float sum = 0.0f;
337 for (std::vector<float>::iterator it = v.begin(); it != v.end(); ++it)
338 sum += *it;
339 m_sample_median[i] = sum/v.size();
341 else
343 // compute median
344 size_t n = v.size() / 2;
345 nth_element(v.begin(), v.begin()+n, v.end());
346 m_sample_median[i] = v[n];
351 unsigned int opt;
353 if (is_root)
355 // now find the minimum and maximum times in the medians
356 float min = m_sample_median[0];
357 unsigned int min_idx = 0;
358 //float max = m_sample_median[0];
359 //unsigned int max_idx = 0;
361 for (unsigned int i = 1; i < m_parameters.size(); i++)
363 if (m_sample_median[i] < min)
365 min = m_sample_median[i];
366 min_idx = i;
368 /*if (m_sample_median[i] > max)
370 max = m_sample_median[i];
371 max_idx = i;
375 // get the optimal param
376 opt = m_parameters[min_idx];
377 // unsigned int percent = int(max/min * 100.0f)-100;
379 // print stats
380 m_exec_conf->msg->notice(4) << "Autotuner " << m_name << " found optimal parameter " << opt << endl;
383 #ifdef ENABLE_MPI
384 if (m_sync && nranks) bcast(opt, 0, m_exec_conf->getMPICommunicator());
385 #endif
386 return opt;
389 void export_Autotuner()
391 class_<Autotuner, boost::noncopyable>
392 ("Autotuner", init< unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, const std::string&, boost::shared_ptr<ExecutionConfiguration> >())
393 .def("getParam", &Autotuner::getParam)
394 .def("setEnabled", &Autotuner::setEnabled)
395 .def("setMoveRatio", &Autotuner::isComplete)
396 .def("setNSelect", &Autotuner::setPeriod)