libhoomd/computes_gpu/NeighborListGPU.cc

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: joaander
  51
  52 /*! \file NeighborListGPU.cc
  53     \brief Implementation of the NeighborListGPU class
  54 */
  55
  56 #include "NeighborListGPU.h"
  57 #include "NeighborListGPU.cuh"
  58
  59 #include <boost/python.hpp>
  60 using namespace boost::python;
  61
  62 #ifdef ENABLE_MPI
  63 #include "Communicator.h"
  64 #endif
  65
  66 #include <iostream>
  67 using namespace std;
  68
  69 /*! \param num_iters Number of iterations to average for the benchmark
  70     \returns Milliseconds of execution time per calculation
  71
  72     Calls filterNlist repeatedly to benchmark the neighbor list filter step.
  73 */
  74 double NeighborListGPU::benchmarkFilter(unsigned int num_iters)
  75     {
  76     ClockSource t;
  77     // warm up run
  78     forceUpdate();
  79     compute(0);
  80     buildNlist(0);
  81     filterNlist();
  82
  83 #ifdef ENABLE_CUDA
  84     if (exec_conf->isCUDAEnabled())
  85         {
  86         cudaThreadSynchronize();
  87         CHECK_CUDA_ERROR();
  88         }
  89 #endif
  90
  91     // benchmark
  92     uint64_t start_time = t.getTime();
  93     for (unsigned int i = 0; i < num_iters; i++)
  94         filterNlist();
  95
  96 #ifdef ENABLE_CUDA
  97     if (exec_conf->isCUDAEnabled())
  98         cudaThreadSynchronize();
  99 #endif
 100     uint64_t total_time_ns = t.getTime() - start_time;
 101
 102     // convert the run time to milliseconds
 103     return double(total_time_ns) / 1e6 / double(num_iters);
 104     }
 105
 106 void NeighborListGPU::buildNlist(unsigned int timestep)
 107     {
 108     if (m_storage_mode != full)
 109         {
 110         m_exec_conf->msg->error() << "Only full mode nlists can be generated on the GPU" << endl;
 111         throw runtime_error("Error computing neighbor list");
 112         }
 113
 114     if (m_filter_body || m_filter_diameter)
 115         {
 116         m_exec_conf->msg->error() << "NeighborListGPU does not currently support body or diameter exclusions." << endl;
 117         m_exec_conf->msg->error() << "Please contact the developers and notify them that you need this functionality" << endl;
 118
 119         throw runtime_error("Error computing neighbor list");
 120         }
 121
 122     // check that the simulation box is big enough
 123     const BoxDim& box = m_pdata->getBox();
 124     Scalar3 nearest_plane_distance = box.getNearestPlaneDistance();
 125
 126     if (m_prof)
 127         m_prof->push(exec_conf, "compute");
 128
 129     // acquire the particle data
 130     ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
 131     // access the nlist data arrays
 132     ArrayHandle<unsigned int> d_nlist(m_nlist, access_location::device, access_mode::overwrite);
 133     ArrayHandle<unsigned int> d_n_neigh(m_n_neigh, access_location::device, access_mode::overwrite);
 134
 135     ArrayHandle<Scalar4> d_last_pos(m_last_pos, access_location::device, access_mode::overwrite);
 136
 137     // start by creating a temporary copy of r_cut sqaured
 138     Scalar rmax = m_r_cut + m_r_buff;
 139     // add d_max - 1.0, if diameter filtering is not already taking care of it
 140     if (!m_filter_diameter)
 141         rmax += m_d_max - Scalar(1.0);
 142     Scalar rmaxsq = rmax*rmax;
 143
 144     if ((box.getPeriodic().x && nearest_plane_distance.x <= rmax * 2.0) ||
 145         (box.getPeriodic().y && nearest_plane_distance.y <= rmax * 2.0) ||
 146         (this->m_sysdef->getNDimensions() == 3 && box.getPeriodic().z && nearest_plane_distance.z <= rmax * 2.0))
 147         {
 148         m_exec_conf->msg->error() << "nlist: Simulation box is too small! Particles would be interacting with themselves." << endl;
 149         throw runtime_error("Error updating neighborlist bins");
 150         }
 151
 152     gpu_compute_nlist_nsq(d_nlist.data,
 153                           d_n_neigh.data,
 154                           d_last_pos.data,
 155                           m_conditions.getDeviceFlags(),
 156                           m_nlist_indexer,
 157                           d_pos.data,
 158                           m_pdata->getN(),
 159                           m_pdata->getNGhosts(),
 160                           box,
 161                           rmaxsq);
 162
 163     if (exec_conf->isCUDAErrorCheckingEnabled())
 164         CHECK_CUDA_ERROR();
 165
 166     if (m_prof)
 167         m_prof->pop(exec_conf);
 168     }
 169
 170 void NeighborListGPU::scheduleDistanceCheck(unsigned int timestep)
 171     {
 172     // prevent against unnecessary calls
 173     if (! shouldCheckDistance(timestep))
 174         {
 175         m_distcheck_scheduled = false;
 176         return;
 177         }
 178
 179     // scan through the particle data arrays and calculate distances
 180     if (m_prof) m_prof->push(exec_conf, "dist-check");
 181
 182     // access data
 183     ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
 184     BoxDim box = m_pdata->getBox();
 185     ArrayHandle<Scalar4> d_last_pos(m_last_pos, access_location::device, access_mode::read);
 186
 187     // get current global nearest plane distance
 188     Scalar3 L_g = m_pdata->getGlobalBox().getNearestPlaneDistance();
 189
 190     // Cutoff distance for inclusion in neighbor list
 191     Scalar rmax = m_r_cut + m_r_buff;
 192     if (!m_filter_diameter)
 193         rmax += m_d_max - Scalar(1.0);
 194
 195     // Find direction of maximum box length contraction (smallest eigenvalue of deformation tensor)
 196     Scalar3 lambda = L_g / m_last_L;
 197     Scalar lambda_min = (lambda.x < lambda.y) ? lambda.x : lambda.y;
 198     lambda_min = (lambda_min < lambda.z) ? lambda_min : lambda.z;
 199
 200     // maximum displacement for each particle (after subtraction of homogeneous dilations)
 201     Scalar delta_max = (rmax*lambda_min - m_r_cut)/Scalar(2.0);
 202     Scalar maxshiftsq = delta_max > 0  ? delta_max*delta_max : 0;
 203
 204     ArrayHandle<unsigned int> h_flags(m_flags, access_location::device, access_mode::readwrite);
 205     gpu_nlist_needs_update_check_new(h_flags.data,
 206                                      d_last_pos.data,
 207                                      d_pos.data,
 208                                      m_pdata->getN(),
 209                                      box,
 210                                      maxshiftsq,
 211                                      lambda,
 212                                      ++m_checkn);
 213
 214     if (exec_conf->isCUDAErrorCheckingEnabled())
 215         CHECK_CUDA_ERROR();
 216
 217     m_distcheck_scheduled = true;
 218     m_last_schedule_tstep = timestep;
 219
 220     // record synchronization point
 221     cudaEventRecord(m_event);
 222
 223     if (m_prof) m_prof->pop(exec_conf);
 224     }
 225
 226 bool NeighborListGPU::distanceCheck(unsigned int timestep)
 227     {
 228     // check if we have scheduled a kernel for the current time step
 229     if (! m_distcheck_scheduled || m_last_schedule_tstep != timestep)
 230         scheduleDistanceCheck(timestep);
 231
 232     m_distcheck_scheduled = false;
 233
 234     ArrayHandleAsync<unsigned int> h_flags(m_flags, access_location::host, access_mode::read);
 235
 236     // wait for kernel to complete
 237     cudaEventSynchronize(m_event);
 238
 239     bool result = (*h_flags.data == m_checkn);
 240
 241     #ifdef ENABLE_MPI
 242     if (m_pdata->getDomainDecomposition())
 243         {
 244         if (m_prof) m_prof->push(m_exec_conf,"MPI allreduce");
 245         // check if migrate criterium is fulfilled on any rank
 246         int local_result = result ? 1 : 0;
 247         int global_result = 0;
 248         MPI_Allreduce(&local_result,
 249             &global_result,
 250             1,
 251             MPI_INT,
 252             MPI_MAX,
 253             m_exec_conf->getMPICommunicator());
 254         result = (global_result > 0);
 255         if (m_prof) m_prof->pop();
 256         }
 257     #endif
 258
 259
 260     return result;
 261     }
 262
 263 /*! Calls gpu_nlsit_filter() to filter the neighbor list on the GPU
 264 */
 265 void NeighborListGPU::filterNlist()
 266     {
 267     if (m_prof)
 268         m_prof->push(exec_conf, "filter");
 269
 270     // access data
 271
 272     ArrayHandle<unsigned int> d_n_ex_idx(m_n_ex_idx, access_location::device, access_mode::read);
 273     ArrayHandle<unsigned int> d_ex_list_idx(m_ex_list_idx, access_location::device, access_mode::read);
 274     ArrayHandle<unsigned int> d_n_neigh(m_n_neigh, access_location::device, access_mode::readwrite);
 275     ArrayHandle<unsigned int> d_nlist(m_nlist, access_location::device, access_mode::readwrite);
 276
 277     m_tuner_filter->begin();
 278     gpu_nlist_filter(d_n_neigh.data,
 279                      d_nlist.data,
 280                      m_nlist_indexer,
 281                      d_n_ex_idx.data,
 282                      d_ex_list_idx.data,
 283                      m_ex_list_indexer,
 284                      m_pdata->getN(),
 285                      m_tuner_filter->getParam());
 286     if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR();
 287     m_tuner_filter->end();
 288
 289     if (m_prof)
 290         m_prof->pop(exec_conf);
 291     }
 292
 293
 294 //! Update the exclusion list on the GPU
 295 void NeighborListGPU::updateExListIdx()
 296     {
 297     if (m_prof)
 298         m_prof->push(m_exec_conf,"update-ex");
 299
 300     ArrayHandle<unsigned int> d_rtag(m_pdata->getRTags(), access_location::device, access_mode::read);
 301     ArrayHandle<unsigned int> d_tag(m_pdata->getTags(), access_location::device, access_mode::read);
 302
 303     ArrayHandle<unsigned int> d_n_ex_tag(m_n_ex_tag, access_location::device, access_mode::read);
 304     ArrayHandle<unsigned int> d_ex_list_tag(m_ex_list_tag, access_location::device, access_mode::read);
 305     ArrayHandle<unsigned int> d_n_ex_idx(m_n_ex_idx, access_location::device, access_mode::overwrite);
 306     ArrayHandle<unsigned int> d_ex_list_idx(m_ex_list_idx, access_location::device, access_mode::overwrite);
 307
 308     gpu_update_exclusion_list(d_tag.data,
 309                               d_rtag.data,
 310                               d_n_ex_tag.data,
 311                               d_ex_list_tag.data,
 312                               m_ex_list_indexer_tag,
 313                               d_n_ex_idx.data,
 314                               d_ex_list_idx.data,
 315                               m_ex_list_indexer,
 316                               m_pdata->getN());
 317     if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR();
 318
 319     if (m_prof)
 320         m_prof->pop(m_exec_conf);
 321     }
 322
 323 void export_NeighborListGPU()
 324     {
 325     class_<NeighborListGPU, boost::shared_ptr<NeighborListGPU>, bases<NeighborList>, boost::noncopyable >
 326                      ("NeighborListGPU", init< boost::shared_ptr<SystemDefinition>, Scalar, Scalar >())
 327                      .def("benchmarkFilter", &NeighborListGPU::benchmarkFilter)
 328                      ;
 329     }