2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: ndtrung
52 #include "QuaternionMath.h"
53 #include "TwoStepNPTRigidGPU.cuh"
61 /*! \file TwoStepNPTRigidGPU.cu
62 \brief Defines GPU kernel code for NPT integration on the GPU. Used by TwoStepNPTRigidGPU.
65 // Flag for invalid particle index, identical to the sentinel value NO_INDEX in RigidData.h
66 #define INVALID_INDEX 0xffffffff
68 /*! Maclaurine expansion
69 \param x Point to take the expansion
72 __device__ Scalar nph_maclaurin_series(Scalar x)
77 return (Scalar(1.0) + Scalar(1.0/6.0) * x2 + Scalar(1.0/120.0) * x4 + Scalar(1.0/5040.0) * x2 * x4 + Scalar(1.0/362880.0) * x4 * x4);
80 /*! Kernel to zero virial contribution from particles from rigid bodies
81 \param d_virial_rigid Virial contribution from particles in rigid bodies
82 \param local_num Number of particles in this card
84 extern "C" __global__ void gpu_nph_rigid_zero_virial_rigid_kernel(Scalar *d_virial_rigid,
85 unsigned int local_num)
87 int idx = blockIdx.x * blockDim.x + threadIdx.x; // particle's index
91 d_virial_rigid[idx] = 0;
96 /*! Takes the first half-step forward for rigid bodies in the velocity-verlet NVT integration
97 \param rdata_com Body center of mass
98 \param d_rigid_group Body group
99 \param n_group_bodies Number of rigid bodies in my group
100 \param n_bodies Total umber of rigid bodies
101 \param box Box dimensions for periodic boundary condition handling
102 \param npt_rdata_dilation Volume scaling factor
103 \param npt_rdata_dimension System dimensionality
104 \param npt_rdata_new box New box sizes
107 extern "C" __global__ void gpu_nph_rigid_remap_kernel(Scalar4 *rdata_com,
108 unsigned int *d_rigid_group,
109 unsigned int n_group_bodies,
110 unsigned int n_bodies,
112 Scalar nph_rdata_dilation,
113 unsigned int nph_rdata_dimension,
114 Scalar4 *nph_rdata_new_box)
116 unsigned int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
117 if (group_idx >= n_group_bodies)
120 unsigned int idx_body = d_rigid_group[group_idx];
122 Scalar3 curL = box.getL();
125 // reset box to new size/shape
126 L.x = curL.x * nph_rdata_dilation;
127 L.y = curL.y * nph_rdata_dilation;
128 if (nph_rdata_dimension == 3)
129 L.z = curL.z * nph_rdata_dilation;
135 Scalar4 com = rdata_com[idx_body];
136 Scalar3 f = box.makeFraction(make_scalar3(com.x, com.y, com.z));
137 Scalar3 pos = newBox.makeCoordinates(f);
140 rdata_com[idx_body] = make_scalar4(pos.x, pos.y, pos.z, 0);
144 *(nph_rdata_new_box) = make_scalar4(L.x, L.y, L.z, 0.0f);
149 #pragma mark RIGID_STEP_ONE_KERNEL
150 /*! Takes the first half-step forward for rigid bodies in the velocity-verlet NVT integration
151 \param rdata_com Body center of mass
152 \param rdata_vel Body velocity
153 \param rdata_angmom Angular momentum
154 \param rdata_angvel Angular velocity
155 \param rdata_orientation Quaternion
156 \param rdata_body_image Body image
157 \param rdata_conjqm Conjugate quaternion momentum
158 \param d_rigid_mass Body mass
159 \param d_rigid_mi Body inertia moments
160 \param n_group_bodies Number of rigid bodies in my group
161 \param d_rigid_force Body forces
162 \param d_rigid_torque Body torques
163 \param d_rigid_group Body indices
164 \param n_bodies Total umber of rigid bodies
165 \param npt_rdata_epsilon_dot Barostat velocity
166 \param npt_rdata_partial_Ksum_t Body translational kinetic energy
167 \param npt_rdata_partial_Ksum_r Body rotation kinetic energy
168 \param npt_rdata_nf_t Translational degrees of freedom
169 \param npt_rdata_nf_r Translational degrees of freedom
170 \param npt_rdata_dimension System dimesion
171 \param box Box dimensions for periodic boundary condition handling
172 \param deltaT Timestep
176 extern "C" __global__ void gpu_nph_rigid_step_one_body_kernel(Scalar4* rdata_com,
178 Scalar4* rdata_angmom,
179 Scalar4* rdata_angvel,
180 Scalar4* rdata_orientation,
181 int3* rdata_body_image,
182 Scalar4* rdata_conjqm,
183 Scalar *d_rigid_mass,
185 Scalar4 *d_rigid_force,
186 Scalar4 *d_rigid_torque,
187 unsigned int *d_rigid_group,
188 unsigned int n_group_bodies,
189 unsigned int n_bodies,
190 Scalar npt_rdata_epsilon_dot,
191 Scalar* npt_rdata_partial_Ksum_t,
192 Scalar* npt_rdata_partial_Ksum_r,
193 unsigned int npt_rdata_nf_t,
194 unsigned int npt_rdata_nf_r,
195 unsigned int npt_rdata_dimension,
199 unsigned int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
201 if (group_idx >= n_group_bodies)
204 // do velocity verlet update
205 // v(t+deltaT/2) = v(t) + (1/2)a*deltaT
206 // r(t+deltaT) = r(t) + v(t+deltaT/2)*deltaT
208 Scalar4 moment_inertia, com, vel, orientation, ex_space, ey_space, ez_space, force, torque, conjqm;
210 Scalar4 mbody, tbody, fquat;
212 Scalar dt_half = Scalar(0.5) * deltaT;
213 Scalar onednft, onednfr, tmp, scale_t, scale_r, scale_v, akin_t, akin_r;
215 onednft = Scalar(1.0) + (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
216 onednfr = (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
218 tmp = Scalar(-1.0) * dt_half * onednft * npt_rdata_epsilon_dot;
219 scale_t = fast::exp(tmp);
220 tmp = Scalar(-1.0) * dt_half * onednfr * npt_rdata_epsilon_dot;
221 scale_r = fast::exp(tmp);
222 tmp = dt_half * npt_rdata_epsilon_dot;
223 scale_v = deltaT * fast::exp(tmp) * nph_maclaurin_series(tmp);
225 unsigned int idx_body = d_rigid_group[group_idx];
226 body_mass = d_rigid_mass[idx_body];
227 moment_inertia = d_rigid_mi[idx_body];
228 com = rdata_com[idx_body];
229 vel = rdata_vel[idx_body];
230 orientation = rdata_orientation[idx_body];
231 body_image = rdata_body_image[idx_body];
232 force = d_rigid_force[idx_body];
233 torque = d_rigid_torque[idx_body];
234 conjqm = rdata_conjqm[idx_body];
236 exyzFromQuaternion(orientation, ex_space, ey_space, ez_space);
239 Scalar dtfm = dt_half / body_mass;
242 vel2.x = vel.x + dtfm * force.x;
243 vel2.y = vel.y + dtfm * force.y;
244 vel2.z = vel.z + dtfm * force.z;
250 tmp = vel2.x * vel2.x + vel2.y * vel2.y + vel2.z * vel2.z;
251 akin_t = body_mass * tmp;
255 pos2.x = com.x + vel2.x * scale_v;
256 pos2.y = com.y + vel2.y * scale_v;
257 pos2.z = com.z + vel2.z * scale_v;
259 // time to fix the periodic boundary conditions
260 box.wrap(pos2, body_image);
262 matrix_dot(ex_space, ey_space, ez_space, torque, tbody);
263 quatvec(orientation, tbody, fquat);
266 conjqm2.x = conjqm.x + deltaT * fquat.x;
267 conjqm2.y = conjqm.y + deltaT * fquat.y;
268 conjqm2.z = conjqm.z + deltaT * fquat.z;
269 conjqm2.w = conjqm.w + deltaT * fquat.w;
271 conjqm2.x *= scale_r;
272 conjqm2.y *= scale_r;
273 conjqm2.z *= scale_r;
274 conjqm2.w *= scale_r;
276 // use no_squish rotate to update p and q
277 no_squish_rotate(3, conjqm2, orientation, moment_inertia, dt_half);
278 no_squish_rotate(2, conjqm2, orientation, moment_inertia, dt_half);
279 no_squish_rotate(1, conjqm2, orientation, moment_inertia, deltaT);
280 no_squish_rotate(2, conjqm2, orientation, moment_inertia, dt_half);
281 no_squish_rotate(3, conjqm2, orientation, moment_inertia, dt_half);
283 // update the exyz_space
284 // transform p back to angmom
285 // update angular velocity
287 exyzFromQuaternion(orientation, ex_space, ey_space, ez_space);
288 invquatvec(orientation, conjqm2, mbody);
289 transpose_dot(ex_space, ey_space, ez_space, mbody, angmom2);
291 angmom2.x *= Scalar(0.5);
292 angmom2.y *= Scalar(0.5);
293 angmom2.z *= Scalar(0.5);
296 computeAngularVelocity(angmom2, moment_inertia, ex_space, ey_space, ez_space, angvel2);
298 akin_r = angmom2.x * angvel2.x + angmom2.y * angvel2.y + angmom2.z * angvel2.z;
300 // write out the results (MEM_TRANSFER: ? bytes)
301 rdata_com[idx_body] = make_scalar4(pos2.x, pos2.y, pos2.z, com.w);
302 rdata_vel[idx_body] = vel2;
303 rdata_angmom[idx_body] = angmom2;
304 rdata_angvel[idx_body] = angvel2;
305 rdata_orientation[idx_body] = orientation;
306 rdata_body_image[idx_body] = body_image;
307 rdata_conjqm[idx_body] = conjqm2;
309 npt_rdata_partial_Ksum_t[group_idx] = akin_t;
310 npt_rdata_partial_Ksum_r[group_idx] = akin_r;
313 /*! \param rigid_data Rigid body data to step forward 1/2 step
314 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
315 \param group_size Number of members in the group
316 \param d_net_force Particle net forces
317 \param box Box dimensions for periodic boundary condition handling
318 \param npt_rdata Thermostat/barostat data
319 \param deltaT Amount of real time to step forward in one time step
322 cudaError_t gpu_nph_rigid_step_one(const gpu_rigid_data_arrays& rigid_data,
323 unsigned int *d_group_members,
324 unsigned int group_size,
325 Scalar4 *d_net_force,
327 const gpu_npt_rigid_data& npt_rdata,
330 unsigned int n_bodies = rigid_data.n_bodies;
331 unsigned int n_group_bodies = rigid_data.n_group_bodies;
333 // setup the grid to run the kernel for rigid bodies
335 int n_blocks = n_group_bodies / block_size + 1;
336 dim3 body_grid(n_blocks, 1, 1);
337 dim3 body_threads(block_size, 1, 1);
338 gpu_nph_rigid_step_one_body_kernel<<< body_grid, body_threads >>>(rigid_data.com,
342 rigid_data.orientation,
343 rigid_data.body_image,
345 rigid_data.body_mass,
346 rigid_data.moment_inertia,
349 rigid_data.body_indices,
352 npt_rdata.epsilon_dot,
353 npt_rdata.partial_Ksum_t,
354 npt_rdata.partial_Ksum_r,
361 gpu_nph_rigid_remap_kernel<<< body_grid, body_threads >>>(rigid_data.com,
362 rigid_data.body_indices,
374 #pragma mark RIGID_STEP_TWO_KERNEL
375 //! Takes the 2nd 1/2 step forward in the velocity-verlet NPH integration scheme
377 \param rdata_vel Body velocity
378 \param rdata_angmom Angular momentum
379 \param rdata_angvel Angular velocity
380 \param rdata_orientation Quaternion
381 \param rdata_conjqm Conjugate quaternion momentum
382 \param d_rigid_mass Body mass
383 \param d_rigid_mi Body inertia moments
384 \param d_rigid_force Body forces
385 \param d_rigid_torque Body torques
386 \param d_rigid_group Body indices
387 \param n_group_bodies Number of rigid bodies in my group
388 \param n_bodies Total number of rigid bodies
389 \param npt_rdata_eta_dot_t0 Thermostat translational part
390 \param npt_rdata_eta_dot_r0 Thermostat rotational part
391 \param npt_rdata_epsilon_dot Barostat velocity
392 \param npt_rdata_partial_Ksum_t Body translational kinetic energy
393 \param npt_rdata_partial_Ksum_r Body rotation kinetic energy
394 \param npt_rdata_nf_t Translational degrees of freedom
395 \param npt_rdata_nf_r Translational degrees of freedom
396 \param npt_rdata_dimension System dimesion
397 \param deltaT Timestep
398 \param box Box dimensions for periodic boundary condition handling
401 extern "C" __global__ void gpu_nph_rigid_step_two_body_kernel(Scalar4* rdata_vel,
402 Scalar4* rdata_angmom,
403 Scalar4* rdata_angvel,
404 Scalar4* rdata_orientation,
405 Scalar4* rdata_conjqm,
406 Scalar *d_rigid_mass,
408 Scalar4 *d_rigid_force,
409 Scalar4 *d_rigid_torque,
410 unsigned int *d_rigid_group,
411 unsigned int n_group_bodies,
412 unsigned int n_bodies,
413 Scalar npt_rdata_epsilon_dot,
414 Scalar* npt_rdata_partial_Ksum_t,
415 Scalar* npt_rdata_partial_Ksum_r,
416 unsigned int npt_rdata_nf_t,
417 unsigned int npt_rdata_nf_r,
418 unsigned int npt_rdata_dimension,
422 unsigned int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
424 if (group_idx >= n_group_bodies)
428 Scalar4 moment_inertia, vel, ex_space, ey_space, ez_space, orientation, conjqm;
429 Scalar4 force, torque;
430 Scalar4 mbody, tbody, fquat;
432 Scalar dt_half = Scalar(0.5) * deltaT;
433 Scalar onednft, onednfr, tmp, scale_t, scale_r, akin_t, akin_r;
435 onednft = Scalar(1.0) + (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
436 onednfr = (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
438 tmp = Scalar(-1.0) * dt_half * onednft * npt_rdata_epsilon_dot;
440 tmp = Scalar(-1.0) * dt_half * onednfr * npt_rdata_epsilon_dot;
443 unsigned int idx_body = d_rigid_group[group_idx];
445 // Update body velocity and angmom
446 body_mass = d_rigid_mass[idx_body];
447 moment_inertia = d_rigid_mi[idx_body];
448 vel = rdata_vel[idx_body];
449 force = d_rigid_force[idx_body];
450 torque = d_rigid_torque[idx_body];
451 orientation = rdata_orientation[idx_body];
452 conjqm = rdata_conjqm[idx_body];
454 exyzFromQuaternion(orientation, ex_space, ey_space, ez_space);
456 Scalar dtfm = dt_half / body_mass;
458 // update the velocity
460 vel2.x = scale_t * vel.x + dtfm * force.x;
461 vel2.y = scale_t * vel.y + dtfm * force.y;
462 vel2.z = scale_t * vel.z + dtfm * force.z;
465 tmp = vel2.x * vel2.x + vel2.y * vel2.y + vel2.z * vel2.z;
466 akin_t = body_mass * tmp;
468 // update angular momentum
469 matrix_dot(ex_space, ey_space, ez_space, torque, tbody);
470 quatvec(orientation, tbody, fquat);
472 Scalar4 conjqm2, angmom2;
473 conjqm2.x = scale_r * conjqm.x + deltaT * fquat.x;
474 conjqm2.y = scale_r * conjqm.y + deltaT * fquat.y;
475 conjqm2.z = scale_r * conjqm.z + deltaT * fquat.z;
476 conjqm2.w = scale_r * conjqm.w + deltaT * fquat.w;
478 invquatvec(orientation, conjqm2, mbody);
479 transpose_dot(ex_space, ey_space, ez_space, mbody, angmom2);
481 angmom2.x *= Scalar(0.5);
482 angmom2.y *= Scalar(0.5);
483 angmom2.z *= Scalar(0.5);
486 // update angular velocity
488 computeAngularVelocity(angmom2, moment_inertia, ex_space, ey_space, ez_space, angvel2);
490 akin_r = angmom2.x * angvel2.x + angmom2.y * angvel2.y + angmom2.z * angvel2.z;
492 rdata_vel[idx_body] = vel2;
493 rdata_angmom[idx_body] = angmom2;
494 rdata_angvel[idx_body] = angvel2;
495 rdata_conjqm[idx_body] = conjqm2;
497 npt_rdata_partial_Ksum_t[group_idx] = akin_t;
498 npt_rdata_partial_Ksum_r[group_idx] = akin_r;
501 /*! \param rigid_data Rigid body data to step forward 1/2 step
502 \param d_group_members Device array listing the indicies of the mebers of the group to integrate
503 \param group_size Number of members in the group
504 \param d_net_force Particle net forces
505 \param d_net_virial Particle net virial
506 \param box Box dimensions for periodic boundary condition handling
507 \param npt_rdata Thermostat/barostat data
508 \param deltaT Amount of real time to step forward in one time step
511 cudaError_t gpu_nph_rigid_step_two( const gpu_rigid_data_arrays& rigid_data,
512 unsigned int *d_group_members,
513 unsigned int group_size,
514 Scalar4 *d_net_force,
515 Scalar *d_net_virial,
517 const gpu_npt_rigid_data& npt_rdata,
520 unsigned int n_bodies = rigid_data.n_bodies;
521 unsigned int n_group_bodies = rigid_data.n_group_bodies;
523 unsigned int block_size = 64;
524 unsigned int n_blocks = n_group_bodies / block_size + 1;
525 dim3 body_grid(n_blocks, 1, 1);
526 dim3 body_threads(block_size, 1, 1);
527 gpu_nph_rigid_step_two_body_kernel<<< body_grid, body_threads >>>(rigid_data.vel,
530 rigid_data.orientation,
532 rigid_data.body_mass,
533 rigid_data.moment_inertia,
536 rigid_data.body_indices,
539 npt_rdata.epsilon_dot,
540 npt_rdata.partial_Ksum_t,
541 npt_rdata.partial_Ksum_r,
552 #pragma mark RIGID_KINETIC_ENERGY_REDUCTION
554 //! Shared memory for kinetic energy reduction
555 extern __shared__ Scalar nph_rigid_sdata[];
557 /*! Summing the kinetic energy of rigid bodies
558 \param npt_rdata Thermostat data for rigid bodies
561 extern "C" __global__ void gpu_nph_rigid_reduce_ksum_kernel(Scalar* npt_rdata_partial_Ksum_t,
562 Scalar* npt_rdata_partial_Ksum_r,
563 Scalar* npt_rdata_Ksum_t,
564 Scalar* npt_rdata_Ksum_r,
565 unsigned int n_bodies)
567 int global_idx = blockIdx.x * blockDim.x + threadIdx.x;
569 Scalar* body_ke_t = nph_rigid_sdata;
570 Scalar* body_ke_r = &nph_rigid_sdata[blockDim.x];
572 Scalar Ksum_t = 0, Ksum_r=0;
574 // sum up the values in the partial sum via a sliding window
575 for (int start = 0; start < n_bodies; start += blockDim.x)
577 if (start + threadIdx.x < n_bodies)
579 body_ke_t[threadIdx.x] = npt_rdata_partial_Ksum_t[start + threadIdx.x];
580 body_ke_r[threadIdx.x] = npt_rdata_partial_Ksum_r[start + threadIdx.x];
584 body_ke_t[threadIdx.x] = 0;
585 body_ke_r[threadIdx.x] = 0;
589 // reduce the sum within a block
590 int offset = blockDim.x >> 1;
593 if (threadIdx.x < offset)
595 body_ke_t[threadIdx.x] += body_ke_t[threadIdx.x + offset];
596 body_ke_r[threadIdx.x] += body_ke_r[threadIdx.x + offset];
602 // everybody sums up Ksum
603 Ksum_t += body_ke_t[0];
604 Ksum_r += body_ke_r[0];
612 *npt_rdata_Ksum_t = Ksum_t;
613 *npt_rdata_Ksum_r = Ksum_r;
619 \param npt_rdata Thermostat/barostat data for rigid bodies
621 cudaError_t gpu_nph_rigid_reduce_ksum(const gpu_npt_rigid_data& npt_rdata)
623 // setup the grid to run the kernel
624 int block_size = 128;
626 dim3 threads(block_size, 1, 1);
628 // run the kernel: double the block size to accomodate Ksum_t and Ksum_r
629 gpu_nph_rigid_reduce_ksum_kernel<<< grid, threads, 2 * block_size * sizeof(Scalar) >>>(npt_rdata.partial_Ksum_t,
630 npt_rdata.partial_Ksum_r,