2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: jglaser
52 #include "TwoStepNPTMTKGPU.cuh"
60 /*! \file TwoStepNPTMTKGPU.cu
61 \brief Defines GPU kernel code for NPT integration on the GPU using the Martyna-Tobias-Klein update equations. Used by TwoStepNPTMTKGPU.
64 //! Shared memory used in reducing the sum of the squared velocities
65 extern __shared__ Scalar npt_mtk_sdata[];
67 //! Kernel to propagate the positions and velocities, first half of NPT update
68 __global__ void gpu_npt_mtk_step_one_kernel(Scalar4 *d_pos,
70 const Scalar3 *d_accel,
71 unsigned int *d_group_members,
72 unsigned int group_size,
73 Scalar exp_thermo_fac,
80 Scalar mat_exp_v_int_xx,
81 Scalar mat_exp_v_int_xy,
82 Scalar mat_exp_v_int_xz,
83 Scalar mat_exp_v_int_yy,
84 Scalar mat_exp_v_int_yz,
85 Scalar mat_exp_v_int_zz,
92 Scalar mat_exp_r_int_xx,
93 Scalar mat_exp_r_int_xy,
94 Scalar mat_exp_r_int_xz,
95 Scalar mat_exp_r_int_yy,
96 Scalar mat_exp_r_int_yz,
97 Scalar mat_exp_r_int_zz,
101 // determine which particle this thread works on
102 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
104 // initialize eigenvectors
105 if (group_idx < group_size)
107 unsigned int idx = d_group_members[group_idx];
109 // fetch particle position
110 Scalar4 pos = d_pos[idx];
112 // fetch particle velocity and acceleration
113 Scalar4 vel = d_vel[idx];
114 Scalar3 v = make_scalar3(vel.x, vel.y, vel.z);
115 Scalar3 accel = d_accel[idx];;
116 Scalar3 r = make_scalar3(pos.x, pos.y, pos.z);
118 // apply thermostat update of velocity
121 // propagate velocity by half a time step and position by the full time step
122 // by multiplying with upper triangular matrix
123 v.x = mat_exp_v_xx * v.x + mat_exp_v_xy * v.y + mat_exp_v_xz * v.z;
124 v.y = mat_exp_v_yy * v.y + mat_exp_v_yz * v.z;
125 v.z = mat_exp_v_zz * v.z;
127 v.x += mat_exp_v_int_xx * accel.x + mat_exp_v_int_xy * accel.y + mat_exp_v_int_xz * accel.z;
128 v.y += mat_exp_v_int_yy * accel.y + mat_exp_v_int_yz * accel.z;
129 v.z += mat_exp_v_int_zz * accel.z;
133 // rescale this group of particles
134 r.x = mat_exp_r_xx * r.x + mat_exp_r_xy * r.y + mat_exp_r_xz * r.z;
135 r.y = mat_exp_r_yy * r.y + mat_exp_r_yz * r.z;
136 r.z = mat_exp_r_zz * r.z;
139 r.x += mat_exp_r_int_xx * v.x + mat_exp_r_int_xy * v.y + mat_exp_r_int_xz * v.z;
140 r.y += mat_exp_r_int_yy * v.y + mat_exp_r_int_yz * v.z;
141 r.z += mat_exp_r_int_zz * v.z;
143 // write out the results
144 d_pos[idx] = make_scalar4(r.x,r.y,r.z,pos.w);
145 d_vel[idx] = make_scalar4(v.x,v.y,v.z,vel.w);
149 /*! \param d_pos array of particle positions
150 \param d_vel array of particle velocities
151 \param d_accel array of particle accelerations
152 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
153 \param group_size Number of members in the group
154 \param exp_thermo_fac Update factor for thermostat
155 \param mat_exp_v Matrix exponential for velocity update
156 \param mat_exp_v_int Integrated matrix exp for velocity update
157 \param mat_exp_r Matrix exponential for position update
158 \param mat_exp_r_int Integrated matrix exp for position update
159 \param deltaT Time to advance (for one full step)
160 \param deltaT Time to move forward in one whole step
161 \param rescale_all True if all particles in the system should be rescaled at once
163 This is just a kernel driver for gpu_npt_mtk_step_one_kernel(). See it for more details.
165 cudaError_t gpu_npt_mtk_step_one(Scalar4 *d_pos,
167 const Scalar3 *d_accel,
168 unsigned int *d_group_members,
169 unsigned int group_size,
170 Scalar exp_thermo_fac,
172 Scalar *mat_exp_v_int,
174 Scalar *mat_exp_r_int,
178 // setup the grid to run the kernel
179 unsigned int block_size = 256;
180 dim3 grid( (group_size / block_size) + 1, 1, 1);
181 dim3 threads(block_size, 1, 1);
184 gpu_npt_mtk_step_one_kernel<<< grid, threads >>>(d_pos,
220 /*! \param N number of particles in the system
221 \param d_pos array of particle positions
222 \param d_image array of particle images
223 \param box The new box the particles where the particles now reside
225 Wrap particle positions for all particles in the box
227 extern "C" __global__
228 void gpu_npt_mtk_wrap_kernel(const unsigned int N,
233 // determine which particle this thread works on
234 int idx = blockIdx.x * blockDim.x + threadIdx.x;
236 // wrap ALL particles in the box
239 // fetch particle position
240 Scalar4 postype = d_pos[idx];
241 Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
243 // read in the image flags
244 int3 image = d_image[idx];
246 // fix periodic boundary conditions
247 box.wrap(pos, image);
249 // write out the results
250 d_pos[idx] = make_scalar4(pos.x, pos.y, pos.z, postype.w);
251 d_image[idx] = image;
255 /*! \param N number of particles in the system
256 \param d_pos array of particle positions
257 \param d_image array of particle images
258 \param box The new box the particles where the particles now reside
260 This is just a kernel driver for gpu_npt_mtk_wrap_kernel(). See it for more details.
262 cudaError_t gpu_npt_mtk_wrap(const unsigned int N,
267 // setup the grid to run the kernel
268 unsigned int block_size=256;
269 dim3 grid( (N / block_size) + 1, 1, 1);
270 dim3 threads(block_size, 1, 1);
273 gpu_npt_mtk_wrap_kernel<<< grid, threads >>>(N, d_pos, d_image, box);
278 //! Kernel to propagate the positions and velocities, second half of NPT update
279 __global__ void gpu_npt_mtk_step_two_kernel(Scalar4 *d_vel,
281 const Scalar4 *d_net_force,
282 unsigned int *d_group_members,
283 unsigned int group_size,
290 Scalar mat_exp_v_int_xx,
291 Scalar mat_exp_v_int_xy,
292 Scalar mat_exp_v_int_xz,
293 Scalar mat_exp_v_int_yy,
294 Scalar mat_exp_v_int_yz,
295 Scalar mat_exp_v_int_zz,
298 // determine which particle this thread works on
299 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
301 if (group_idx < group_size)
303 unsigned int idx = d_group_members[group_idx];
305 // fetch particle velocity and acceleration
306 Scalar4 vel = d_vel[idx];
308 // compute acceleration
309 Scalar minv = Scalar(1.0)/vel.w;
310 Scalar4 net_force = d_net_force[idx];
311 Scalar3 accel = make_scalar3(net_force.x, net_force.y, net_force.z);
314 Scalar3 v = make_scalar3(vel.x, vel.y, vel.z);
316 // propagate velocity by half a time step by multiplying with an upper triangular matrix
317 v.x = mat_exp_v_xx * v.x + mat_exp_v_xy * v.y + mat_exp_v_xz * v.z;
318 v.y = mat_exp_v_yy * v.y + mat_exp_v_yz * v.z;
319 v.z = mat_exp_v_zz * v.z;
321 v.x += mat_exp_v_int_xx * accel.x + mat_exp_v_int_xy * accel.y + mat_exp_v_int_xz * accel.z;
322 v.y += mat_exp_v_int_yy * accel.y + mat_exp_v_int_yz * accel.z;
323 v.z += mat_exp_v_int_zz * accel.z;
325 // write out velocity
326 d_vel[idx] = make_scalar4(v.x, v.y, v.z, vel.w);
328 // since we calculate the acceleration, we need to write it for the next step
329 d_accel[idx] = accel;
333 /*! \param d_vel array of particle velocities
334 \param d_accel array of particle accelerations
335 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
336 \param group_size Number of members in the group
337 \param mat_exp_v Matrix exponential for velocity update
338 \param mat_exp_v_int Integrated matrix exp for velocity update
339 \param d_net_force Net force on each particle
341 \param deltaT Time to move forward in one whole step
343 This is just a kernel driver for gpu_npt_mtk_step_kernel(). See it for more details.
345 cudaError_t gpu_npt_mtk_step_two(Scalar4 *d_vel,
347 unsigned int *d_group_members,
348 unsigned int group_size,
349 Scalar4 *d_net_force,
351 Scalar* mat_exp_v_int,
354 // setup the grid to run the kernel
355 unsigned int block_size=256;
356 dim3 grid( (group_size / block_size) + 1, 1, 1);
357 dim3 threads(block_size, 1, 1);
360 gpu_npt_mtk_step_two_kernel<<< grid, threads >>>(d_vel,
382 //! GPU kernel to perform partial reduction of temperature
383 __global__ void gpu_npt_mtk_temperature_partial(unsigned int *d_group_members,
384 unsigned int group_size,
388 // determine which particle this thread works on
389 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
391 Scalar mv2_element; // element of scratch space read in
392 if (group_idx < group_size)
394 unsigned int idx = d_group_members[group_idx];
396 Scalar4 vel = d_velocity[idx];
399 mv2_element = mass * (vel.x*vel.x + vel.y*vel.y + vel.z*vel.z);
403 // non-participating thread: contribute 0 to the sum
404 mv2_element = Scalar(0.0);
407 npt_mtk_sdata[threadIdx.x] = mv2_element;
410 int offs = blockDim.x >> 1;
413 if (threadIdx.x < offs)
414 npt_mtk_sdata[threadIdx.x] += npt_mtk_sdata[threadIdx.x + offs];
420 // write out partial sum
421 if (threadIdx.x == 0)
422 d_scratch[blockIdx.x] = npt_mtk_sdata[0];
426 //! GPU kernel to perform final reduction of temperature
427 __global__ void gpu_npt_mtk_temperature_final_sum(Scalar *d_scratch,
428 Scalar *d_temperature,
430 unsigned int num_partial_sums)
432 Scalar final_sum(0.0);
434 for (int start = 0; start < num_partial_sums; start += blockDim.x)
437 if (start + threadIdx.x < num_partial_sums)
439 npt_mtk_sdata[threadIdx.x] = d_scratch[start + threadIdx.x];
442 npt_mtk_sdata[threadIdx.x] = Scalar(0.0);
446 // reduce the sum in parallel
447 int offs = blockDim.x >> 1;
450 if (threadIdx.x < offs)
451 npt_mtk_sdata[threadIdx.x] += npt_mtk_sdata[threadIdx.x + offs];
457 if (threadIdx.x == 0)
458 final_sum += npt_mtk_sdata[0];
461 if (threadIdx.x == 0)
462 *d_temperature = final_sum/Scalar(ndof);
465 /*!\param d_temperature Device variable to store the temperature value (output)
466 \param d_vel Array of particle velocities and masses
467 \param d_scratch Temporary scratch space for reduction
468 \param num_blocks Number of CUDA blocks used in reduction
469 \param block_size Size of blocks used in reduction
470 \param d_group_members Members of group for which the reduction is performed
471 \param group_size Size of group
472 \param ndof Number of degrees of freedom of group
474 This function performs the reduction of the temperature on the GPU. It is just
475 a driver function that calls the appropriate GPU kernels.
477 cudaError_t gpu_npt_mtk_temperature(Scalar *d_temperature,
480 unsigned int num_blocks,
481 unsigned int block_size,
482 unsigned int *d_group_members,
483 unsigned int group_size,
486 assert(d_temperature);
490 dim3 grid(num_blocks,1,1);
491 dim3 threads(block_size,1,1);
493 unsigned int shared_bytes = sizeof(Scalar)*block_size;
495 // reduce squared velocity norm times mass, first pass
496 gpu_npt_mtk_temperature_partial<<<grid, threads, shared_bytes>>>(
503 unsigned int final_block_size = 512;
505 threads = dim3(final_block_size, 1, 1);
506 shared_bytes = sizeof(Scalar)*final_block_size;
508 // reduction, second pass
509 gpu_npt_mtk_temperature_final_sum<<<grid, threads, shared_bytes>>>(
518 /*! \param d_vel array of particle velocities and masses
519 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
520 \param group_size Number of members in the group
521 \param exp_v_fac_thermo scaling factor (per direction) for velocity update generated by thermostat
523 GPU kernel to thermostat velocities
525 __global__ void gpu_npt_mtk_thermostat_kernel(Scalar4 *d_vel,
526 unsigned int *d_group_members,
527 unsigned int group_size,
528 Scalar exp_v_fac_thermo)
530 // determine which particle this thread works on
531 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
533 if (group_idx < group_size)
535 unsigned int idx = d_group_members[group_idx];
537 // fetch particle velocity and acceleration
538 Scalar4 vel = d_vel[idx];
539 Scalar3 v = make_scalar3(vel.x, vel.y, vel.z);
542 v *= exp_v_fac_thermo;
544 // write out the results
545 d_vel[idx] = make_scalar4(v.x,v.y,v.z,vel.w);
549 /*! \param d_vel array of particle velocities
550 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
551 \param group_size Number of members in the group
552 \param exp_v_fac_thermo Rescaling factor
553 \param block_size Kernel block size
555 This is just a kernel driver for gpu_npt_step_kernel(). See it for more details.
557 cudaError_t gpu_npt_mtk_thermostat(Scalar4 *d_vel,
558 unsigned int *d_group_members,
559 unsigned int group_size,
560 Scalar exp_v_fac_thermo,
561 unsigned int block_size)
563 // setup the grid to run the kernel
564 dim3 grid( (group_size / block_size) + 1, 1, 1);
565 dim3 threads(block_size, 1, 1);
568 gpu_npt_mtk_thermostat_kernel<<< grid, threads >>>(d_vel,
576 __global__ void gpu_npt_mtk_rescale_kernel(unsigned int N,
585 unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
587 if (idx >= N) return;
590 Scalar4 postype = d_postype[idx];
591 Scalar3 r = make_scalar3(postype.x,postype.y,postype.z);
593 r.x = mat_exp_r_xx * r.x + mat_exp_r_xy * r.y + mat_exp_r_xz * r.z;
594 r.y = mat_exp_r_yy * r.y + mat_exp_r_yz* r.z;
595 r.z = mat_exp_r_zz * r.z;
597 d_postype[idx] = make_scalar4(r.x, r.y, r.z, postype.w);
600 void gpu_npt_mtk_rescale(unsigned int N,
609 unsigned int block_size = 256;
611 dim3 grid( (N / block_size) + 1, 1, 1);
612 dim3 threads(block_size, 1, 1);
614 gpu_npt_mtk_rescale_kernel<<<grid, threads>>> (N,