2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: joaander
52 #include "TwoStepNVTGPU.cuh"
60 /*! \file TwoStepNVTGPU.cu
61 \brief Defines GPU kernel code for NVT integration on the GPU. Used by TwoStepNVTGPU.
64 //! Takes the first 1/2 step forward in the NVT integration step
65 /*! \param d_pos array of particle positions
66 \param d_vel array of particle velocities
67 \param d_accel array of particle accelerations
68 \param d_image array of particle images
69 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
70 \param group_size Number of members in the group
71 \param box Box dimensions for periodic boundary condition handling
72 \param denominv Intermediate variable computed on the host and used in the NVT integration step
73 \param deltaT Amount of real time to step forward in one time step
76 Take the first half step forward in the NVT integration.
78 See gpu_nve_step_one_kernel() for some performance notes on how to handle the group data reads efficiently.
81 void gpu_nvt_step_one_kernel(Scalar4 *d_pos,
83 const Scalar3 *d_accel,
85 unsigned int *d_group_members,
86 unsigned int group_size,
91 // determine which particle this thread works on
92 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
94 if (group_idx < group_size)
96 unsigned int idx = d_group_members[group_idx];
98 // update positions to the next timestep and update velocities to the next half step
99 Scalar4 postype = d_pos[idx];
100 Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
102 Scalar4 velmass = d_vel[idx];
103 Scalar3 vel = make_scalar3(velmass.x, velmass.y, velmass.z);
104 Scalar3 accel = d_accel[idx];
106 // perform update computation
107 vel = (vel + (Scalar(1.0)/Scalar(2.0)) * accel * deltaT) * denominv;
110 // read in the image flags
111 int3 image = d_image[idx];
113 // time to fix the periodic boundary conditions
114 box.wrap(pos, image);
116 // write out the results
117 d_pos[idx] = make_scalar4(pos.x, pos.y, pos.z, postype.w);
118 d_vel[idx] = make_scalar4(vel.x, vel.y, vel.z, velmass.w);
119 d_image[idx] = image;
123 /*! \param d_pos array of particle positions
124 \param d_vel array of particle velocities
125 \param d_accel array of particle accelerations
126 \param d_image array of particle images
127 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
128 \param group_size Number of members in the group
129 \param box Box dimensions for periodic boundary condition handling
130 \param block_size Size of the block to run
131 \param Xi Current value of the NVT degree of freedom Xi
132 \param deltaT Amount of real time to step forward in one time step
134 cudaError_t gpu_nvt_step_one(Scalar4 *d_pos,
136 const Scalar3 *d_accel,
138 unsigned int *d_group_members,
139 unsigned int group_size,
141 unsigned int block_size,
145 static unsigned int max_block_size = UINT_MAX;
146 if (max_block_size == UINT_MAX)
148 cudaFuncAttributes attr;
149 cudaFuncGetAttributes(&attr, (const void *)gpu_nvt_step_one_kernel);
150 max_block_size = attr.maxThreadsPerBlock;
153 unsigned int run_block_size = min(block_size, max_block_size);
155 // setup the grid to run the kernel
156 dim3 grid( (group_size/run_block_size) + 1, 1, 1);
157 dim3 threads(run_block_size, 1, 1);
160 gpu_nvt_step_one_kernel<<< grid, threads >>>(d_pos,
167 Scalar(1.0) / (Scalar(1.0) + deltaT/Scalar(2.0) * Xi),
172 //! Takes the second 1/2 step forward in the NVT integration step
173 /*! \param d_vel array of particle velocities
174 \param d_accel array of particle accelerations
175 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
176 \param group_size Number of members in the group
177 \param d_net_force Net force on each particle
178 \param Xi current value of the NVT degree of freedom Xi
179 \param deltaT Amount of real time to step forward in one time step
181 extern "C" __global__
182 void gpu_nvt_step_two_kernel(Scalar4 *d_vel,
184 unsigned int *d_group_members,
185 unsigned int group_size,
186 Scalar4 *d_net_force,
190 // determine which particle this thread works on
191 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
193 if (group_idx < group_size)
195 unsigned int idx = d_group_members[group_idx];
197 // read in the net force and calculate the acceleration
198 Scalar4 net_force = d_net_force[idx];
199 Scalar3 accel = make_scalar3(net_force.x,net_force.y,net_force.z);
201 Scalar4 vel = d_vel[idx];
208 vel.x += (Scalar(1.0)/Scalar(2.0)) * deltaT * (accel.x - Xi * vel.x);
209 vel.y += (Scalar(1.0)/Scalar(2.0)) * deltaT * (accel.y - Xi * vel.y);
210 vel.z += (Scalar(1.0)/Scalar(2.0)) * deltaT * (accel.z - Xi * vel.z);
214 // since we calculate the acceleration, we need to write it for the next step
215 d_accel[idx] = accel;
219 /*! \param d_vel array of particle velocities
220 \param d_accel array of particle accelerations
221 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
222 \param group_size Number of members in the group
223 \param d_net_force Net force on each particle
224 \param block_size Size of the block to execute on the device
225 \param Xi current value of the NVT degree of freedom Xi
226 \param deltaT Amount of real time to step forward in one time step
228 cudaError_t gpu_nvt_step_two(Scalar4 *d_vel,
230 unsigned int *d_group_members,
231 unsigned int group_size,
232 Scalar4 *d_net_force,
233 unsigned int block_size,
237 static unsigned int max_block_size = UINT_MAX;
238 if (max_block_size == UINT_MAX)
240 cudaFuncAttributes attr;
241 cudaFuncGetAttributes(&attr, (const void *)gpu_nvt_step_two_kernel);
242 max_block_size = attr.maxThreadsPerBlock;
245 unsigned int run_block_size = min(block_size, max_block_size);
247 // setup the grid to run the kernel
248 dim3 grid( (group_size/run_block_size) + 1, 1, 1);
249 dim3 threads(run_block_size, 1, 1);
252 gpu_nvt_step_two_kernel<<< grid, threads >>>(d_vel, d_accel, d_group_members, group_size, d_net_force, Xi, deltaT);