libhoomd/cuda/TwoStepNVTGPU.cu

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: joaander
  51
  52 #include "TwoStepNVTGPU.cuh"
  53
  54 #ifdef WIN32
  55 #include <cassert>
  56 #else
  57 #include <assert.h>
  58 #endif
  59
  60 /*! \file TwoStepNVTGPU.cu
  61     \brief Defines GPU kernel code for NVT integration on the GPU. Used by TwoStepNVTGPU.
  62 */
  63
  64 //! Takes the first 1/2 step forward in the NVT integration step
  65 /*! \param d_pos array of particle positions
  66     \param d_vel array of particle velocities
  67     \param d_accel array of particle accelerations
  68     \param d_image array of particle images
  69     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
  70     \param group_size Number of members in the group
  71     \param box Box dimensions for periodic boundary condition handling
  72     \param denominv Intermediate variable computed on the host and used in the NVT integration step
  73     \param deltaT Amount of real time to step forward in one time step
  74
  75
  76     Take the first half step forward in the NVT integration.
  77
  78     See gpu_nve_step_one_kernel() for some performance notes on how to handle the group data reads efficiently.
  79 */
  80 extern "C" __global__
  81 void gpu_nvt_step_one_kernel(Scalar4 *d_pos,
  82                              Scalar4 *d_vel,
  83                              const Scalar3 *d_accel,
  84                              int3 *d_image,
  85                              unsigned int *d_group_members,
  86                              unsigned int group_size,
  87                              BoxDim box,
  88                              Scalar denominv,
  89                              Scalar deltaT)
  90     {
  91     // determine which particle this thread works on
  92     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
  93
  94     if (group_idx < group_size)
  95         {
  96         unsigned int idx = d_group_members[group_idx];
  97
  98         // update positions to the next timestep and update velocities to the next half step
  99         Scalar4 postype = d_pos[idx];
 100         Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
 101
 102         Scalar4 velmass = d_vel[idx];
 103         Scalar3 vel = make_scalar3(velmass.x, velmass.y, velmass.z);
 104         Scalar3 accel = d_accel[idx];
 105
 106         // perform update computation
 107         vel = (vel + (Scalar(1.0)/Scalar(2.0)) * accel * deltaT) * denominv;
 108         pos += vel * deltaT;
 109
 110         // read in the image flags
 111         int3 image = d_image[idx];
 112
 113         // time to fix the periodic boundary conditions
 114         box.wrap(pos, image);
 115
 116         // write out the results
 117         d_pos[idx] = make_scalar4(pos.x, pos.y, pos.z, postype.w);
 118         d_vel[idx] = make_scalar4(vel.x, vel.y, vel.z, velmass.w);
 119         d_image[idx] = image;
 120         }
 121     }
 122
 123 /*! \param d_pos array of particle positions
 124     \param d_vel array of particle velocities
 125     \param d_accel array of particle accelerations
 126     \param d_image array of particle images
 127     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
 128     \param group_size Number of members in the group
 129     \param box Box dimensions for periodic boundary condition handling
 130     \param block_size Size of the block to run
 131     \param Xi Current value of the NVT degree of freedom Xi
 132     \param deltaT Amount of real time to step forward in one time step
 133 */
 134 cudaError_t gpu_nvt_step_one(Scalar4 *d_pos,
 135                              Scalar4 *d_vel,
 136                              const Scalar3 *d_accel,
 137                              int3 *d_image,
 138                              unsigned int *d_group_members,
 139                              unsigned int group_size,
 140                              const BoxDim& box,
 141                              unsigned int block_size,
 142                              Scalar Xi,
 143                              Scalar deltaT)
 144     {
 145     static unsigned int max_block_size = UINT_MAX;
 146     if (max_block_size == UINT_MAX)
 147         {
 148         cudaFuncAttributes attr;
 149         cudaFuncGetAttributes(&attr, (const void *)gpu_nvt_step_one_kernel);
 150         max_block_size = attr.maxThreadsPerBlock;
 151         }
 152
 153     unsigned int run_block_size = min(block_size, max_block_size);
 154
 155     // setup the grid to run the kernel
 156     dim3 grid( (group_size/run_block_size) + 1, 1, 1);
 157     dim3 threads(run_block_size, 1, 1);
 158
 159     // run the kernel
 160     gpu_nvt_step_one_kernel<<< grid, threads >>>(d_pos,
 161                          d_vel,
 162                          d_accel,
 163                          d_image,
 164                          d_group_members,
 165                          group_size,
 166                          box,
 167                          Scalar(1.0) / (Scalar(1.0) + deltaT/Scalar(2.0) * Xi),
 168                          deltaT);
 169     return cudaSuccess;
 170     }
 171
 172 //! Takes the second 1/2 step forward in the NVT integration step
 173 /*! \param d_vel array of particle velocities
 174     \param d_accel array of particle accelerations
 175     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
 176     \param group_size Number of members in the group
 177     \param d_net_force Net force on each particle
 178     \param Xi current value of the NVT degree of freedom Xi
 179     \param deltaT Amount of real time to step forward in one time step
 180 */
 181 extern "C" __global__
 182 void gpu_nvt_step_two_kernel(Scalar4 *d_vel,
 183                              Scalar3 *d_accel,
 184                              unsigned int *d_group_members,
 185                              unsigned int group_size,
 186                              Scalar4 *d_net_force,
 187                              Scalar Xi,
 188                              Scalar deltaT)
 189     {
 190     // determine which particle this thread works on
 191     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
 192
 193     if (group_idx < group_size)
 194         {
 195         unsigned int idx = d_group_members[group_idx];
 196
 197         // read in the net force and calculate the acceleration
 198         Scalar4 net_force = d_net_force[idx];
 199         Scalar3 accel = make_scalar3(net_force.x,net_force.y,net_force.z);
 200
 201         Scalar4 vel = d_vel[idx];
 202
 203         Scalar mass = vel.w;
 204         accel.x /= mass;
 205         accel.y /= mass;
 206         accel.z /= mass;
 207
 208         vel.x += (Scalar(1.0)/Scalar(2.0)) * deltaT * (accel.x - Xi * vel.x);
 209         vel.y += (Scalar(1.0)/Scalar(2.0)) * deltaT * (accel.y - Xi * vel.y);
 210         vel.z += (Scalar(1.0)/Scalar(2.0)) * deltaT * (accel.z - Xi * vel.z);
 211
 212         // write out data
 213         d_vel[idx] = vel;
 214         // since we calculate the acceleration, we need to write it for the next step
 215         d_accel[idx] = accel;
 216         }
 217     }
 218
 219 /*! \param d_vel array of particle velocities
 220     \param d_accel array of particle accelerations
 221     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
 222     \param group_size Number of members in the group
 223     \param d_net_force Net force on each particle
 224     \param block_size Size of the block to execute on the device
 225     \param Xi current value of the NVT degree of freedom Xi
 226     \param deltaT Amount of real time to step forward in one time step
 227 */
 228 cudaError_t gpu_nvt_step_two(Scalar4 *d_vel,
 229                              Scalar3 *d_accel,
 230                              unsigned int *d_group_members,
 231                              unsigned int group_size,
 232                              Scalar4 *d_net_force,
 233                              unsigned int block_size,
 234                              Scalar Xi,
 235                              Scalar deltaT)
 236     {
 237     static unsigned int max_block_size = UINT_MAX;
 238     if (max_block_size == UINT_MAX)
 239         {
 240         cudaFuncAttributes attr;
 241         cudaFuncGetAttributes(&attr, (const void *)gpu_nvt_step_two_kernel);
 242         max_block_size = attr.maxThreadsPerBlock;
 243         }
 244
 245     unsigned int run_block_size = min(block_size, max_block_size);
 246
 247     // setup the grid to run the kernel
 248     dim3 grid( (group_size/run_block_size) + 1, 1, 1);
 249     dim3 threads(run_block_size, 1, 1);
 250
 251     // run the kernel
 252     gpu_nvt_step_two_kernel<<< grid, threads >>>(d_vel, d_accel, d_group_members, group_size, d_net_force, Xi, deltaT);
 253
 254     return cudaSuccess;
 255     }
 256
 257 // vim:syntax=cpp