libhoomd/cuda/TwoStepNPHRigidGPU.cu

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: ndtrung
  51
  52 #include "QuaternionMath.h"
  53 #include "TwoStepNPTRigidGPU.cuh"
  54
  55 #ifdef WIN32
  56 #include <cassert>
  57 #else
  58 #include <assert.h>
  59 #endif
  60
  61 /*! \file TwoStepNPTRigidGPU.cu
  62     \brief Defines GPU kernel code for NPT integration on the GPU. Used by TwoStepNPTRigidGPU.
  63 */
  64
  65 // Flag for invalid particle index, identical to the sentinel value NO_INDEX in RigidData.h
  66 #define INVALID_INDEX 0xffffffff
  67
  68 /*! Maclaurine expansion
  69     \param x Point to take the expansion
  70
  71 */
  72 __device__ Scalar nph_maclaurin_series(Scalar x)
  73     {
  74     Scalar x2, x4;
  75     x2 = x * x;
  76     x4 = x2 * x2;
  77     return (Scalar(1.0) + Scalar(1.0/6.0) * x2 + Scalar(1.0/120.0) * x4 + Scalar(1.0/5040.0) * x2 * x4 + Scalar(1.0/362880.0) * x4 * x4);
  78     }
  79
  80 /*! Kernel to zero virial contribution from particles from rigid bodies
  81     \param d_virial_rigid Virial contribution from particles in rigid bodies
  82     \param local_num Number of particles in this card
  83 */
  84 extern "C" __global__ void gpu_nph_rigid_zero_virial_rigid_kernel(Scalar *d_virial_rigid,
  85                                                                  unsigned int local_num)
  86     {
  87     int idx = blockIdx.x * blockDim.x + threadIdx.x; // particle's index
  88
  89     if (idx < local_num)
  90         {
  91         d_virial_rigid[idx] = 0;
  92         }
  93
  94     }
  95
  96 /*! Takes the first half-step forward for rigid bodies in the velocity-verlet NVT integration
  97     \param rdata_com Body center of mass
  98     \param d_rigid_group Body group
  99     \param n_group_bodies Number of rigid bodies in my group
 100     \param n_bodies Total umber of rigid bodies
 101     \param box Box dimensions for periodic boundary condition handling
 102     \param npt_rdata_dilation Volume scaling factor
 103     \param npt_rdata_dimension System dimensionality
 104     \param npt_rdata_new box New box sizes
 105 */
 106
 107 extern "C" __global__ void gpu_nph_rigid_remap_kernel(Scalar4 *rdata_com,
 108                                                       unsigned int *d_rigid_group,
 109                                                       unsigned int n_group_bodies,
 110                                                       unsigned int n_bodies,
 111                                                       BoxDim box,
 112                                                       Scalar nph_rdata_dilation,
 113                                                       unsigned int nph_rdata_dimension,
 114                                                       Scalar4 *nph_rdata_new_box)
 115     {
 116     unsigned int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
 117     if (group_idx >= n_group_bodies)
 118         return;
 119
 120     unsigned int idx_body = d_rigid_group[group_idx];
 121
 122     Scalar3 curL = box.getL();
 123     Scalar3 L;
 124
 125     // reset box to new size/shape
 126     L.x = curL.x * nph_rdata_dilation;
 127     L.y = curL.y * nph_rdata_dilation;
 128     if (nph_rdata_dimension == 3)
 129         L.z = curL.z * nph_rdata_dilation;
 130
 131     // copy and setL
 132     BoxDim newBox = box;
 133     newBox.setL(L);
 134
 135     Scalar4 com = rdata_com[idx_body];
 136     Scalar3 f = box.makeFraction(make_scalar3(com.x, com.y, com.z));
 137     Scalar3 pos = newBox.makeCoordinates(f);
 138
 139     // write out results
 140     rdata_com[idx_body] = make_scalar4(pos.x, pos.y, pos.z, 0);
 141
 142     if (idx_body == 0)
 143         {
 144         *(nph_rdata_new_box) = make_scalar4(L.x, L.y, L.z, 0.0f);
 145         }
 146     }
 147
 148
 149 #pragma mark RIGID_STEP_ONE_KERNEL
 150 /*! Takes the first half-step forward for rigid bodies in the velocity-verlet NVT integration
 151     \param rdata_com Body center of mass
 152     \param rdata_vel Body velocity
 153     \param rdata_angmom Angular momentum
 154     \param rdata_angvel Angular velocity
 155     \param rdata_orientation Quaternion
 156     \param rdata_body_image Body image
 157     \param rdata_conjqm Conjugate quaternion momentum
 158     \param d_rigid_mass Body mass
 159     \param d_rigid_mi Body inertia moments
 160     \param n_group_bodies Number of rigid bodies in my group
 161     \param d_rigid_force Body forces
 162     \param d_rigid_torque Body torques
 163     \param d_rigid_group Body indices
 164     \param n_bodies Total umber of rigid bodies
 165     \param npt_rdata_epsilon_dot Barostat velocity
 166     \param npt_rdata_partial_Ksum_t Body translational kinetic energy
 167     \param npt_rdata_partial_Ksum_r Body rotation kinetic energy
 168     \param npt_rdata_nf_t Translational degrees of freedom
 169     \param npt_rdata_nf_r Translational degrees of freedom
 170     \param npt_rdata_dimension System dimesion
 171     \param box Box dimensions for periodic boundary condition handling
 172     \param deltaT Timestep
 173
 174 */
 175
 176 extern "C" __global__ void gpu_nph_rigid_step_one_body_kernel(Scalar4* rdata_com,
 177                                                             Scalar4* rdata_vel,
 178                                                             Scalar4* rdata_angmom,
 179                                                             Scalar4* rdata_angvel,
 180                                                             Scalar4* rdata_orientation,
 181                                                             int3* rdata_body_image,
 182                                                             Scalar4* rdata_conjqm,
 183                                                             Scalar *d_rigid_mass,
 184                                                             Scalar4 *d_rigid_mi,
 185                                                             Scalar4 *d_rigid_force,
 186                                                             Scalar4 *d_rigid_torque,
 187                                                             unsigned int *d_rigid_group,
 188                                                             unsigned int n_group_bodies,
 189                                                             unsigned int n_bodies,
 190                                                             Scalar npt_rdata_epsilon_dot,
 191                                                             Scalar* npt_rdata_partial_Ksum_t,
 192                                                             Scalar* npt_rdata_partial_Ksum_r,
 193                                                             unsigned int npt_rdata_nf_t,
 194                                                             unsigned int npt_rdata_nf_r,
 195                                                             unsigned int npt_rdata_dimension,
 196                                                             BoxDim box,
 197                                                             Scalar deltaT)
 198     {
 199     unsigned int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
 200
 201     if (group_idx >= n_group_bodies)
 202         return;
 203
 204     // do velocity verlet update
 205     // v(t+deltaT/2) = v(t) + (1/2)a*deltaT
 206     // r(t+deltaT) = r(t) + v(t+deltaT/2)*deltaT
 207     Scalar body_mass;
 208     Scalar4 moment_inertia, com, vel, orientation, ex_space, ey_space, ez_space, force, torque, conjqm;
 209     int3 body_image;
 210     Scalar4 mbody, tbody, fquat;
 211
 212     Scalar dt_half = Scalar(0.5) * deltaT;
 213     Scalar onednft, onednfr, tmp, scale_t, scale_r, scale_v, akin_t, akin_r;
 214
 215     onednft = Scalar(1.0) + (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
 216     onednfr = (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
 217
 218     tmp = Scalar(-1.0) * dt_half * onednft * npt_rdata_epsilon_dot;
 219     scale_t = fast::exp(tmp);
 220     tmp = Scalar(-1.0) * dt_half * onednfr * npt_rdata_epsilon_dot;
 221     scale_r = fast::exp(tmp);
 222     tmp = dt_half * npt_rdata_epsilon_dot;
 223     scale_v = deltaT * fast::exp(tmp) * nph_maclaurin_series(tmp);
 224
 225     unsigned int idx_body = d_rigid_group[group_idx];
 226     body_mass = d_rigid_mass[idx_body];
 227     moment_inertia = d_rigid_mi[idx_body];
 228     com = rdata_com[idx_body];
 229     vel = rdata_vel[idx_body];
 230     orientation = rdata_orientation[idx_body];
 231     body_image = rdata_body_image[idx_body];
 232     force = d_rigid_force[idx_body];
 233     torque = d_rigid_torque[idx_body];
 234     conjqm = rdata_conjqm[idx_body];
 235
 236     exyzFromQuaternion(orientation, ex_space, ey_space, ez_space);
 237
 238     // update velocity
 239     Scalar dtfm = dt_half / body_mass;
 240
 241     Scalar4 vel2;
 242     vel2.x = vel.x + dtfm * force.x;
 243     vel2.y = vel.y + dtfm * force.y;
 244     vel2.z = vel.z + dtfm * force.z;
 245     vel2.x *= scale_t;
 246     vel2.y *= scale_t;
 247     vel2.z *= scale_t;
 248     vel2.w = vel.w;
 249
 250     tmp = vel2.x * vel2.x + vel2.y * vel2.y + vel2.z * vel2.z;
 251     akin_t = body_mass * tmp;
 252
 253     // update position
 254     Scalar3 pos2;
 255     pos2.x = com.x + vel2.x * scale_v;
 256     pos2.y = com.y + vel2.y * scale_v;
 257     pos2.z = com.z + vel2.z * scale_v;
 258
 259     // time to fix the periodic boundary conditions
 260     box.wrap(pos2, body_image);
 261
 262     matrix_dot(ex_space, ey_space, ez_space, torque, tbody);
 263     quatvec(orientation, tbody, fquat);
 264
 265     Scalar4 conjqm2;
 266     conjqm2.x = conjqm.x + deltaT * fquat.x;
 267     conjqm2.y = conjqm.y + deltaT * fquat.y;
 268     conjqm2.z = conjqm.z + deltaT * fquat.z;
 269     conjqm2.w = conjqm.w + deltaT * fquat.w;
 270
 271     conjqm2.x *= scale_r;
 272     conjqm2.y *= scale_r;
 273     conjqm2.z *= scale_r;
 274     conjqm2.w *= scale_r;
 275
 276     // use no_squish rotate to update p and q
 277     no_squish_rotate(3, conjqm2, orientation, moment_inertia, dt_half);
 278     no_squish_rotate(2, conjqm2, orientation, moment_inertia, dt_half);
 279     no_squish_rotate(1, conjqm2, orientation, moment_inertia, deltaT);
 280     no_squish_rotate(2, conjqm2, orientation, moment_inertia, dt_half);
 281     no_squish_rotate(3, conjqm2, orientation, moment_inertia, dt_half);
 282
 283     // update the exyz_space
 284     // transform p back to angmom
 285     // update angular velocity
 286     Scalar4 angmom2;
 287     exyzFromQuaternion(orientation, ex_space, ey_space, ez_space);
 288     invquatvec(orientation, conjqm2, mbody);
 289     transpose_dot(ex_space, ey_space, ez_space, mbody, angmom2);
 290
 291     angmom2.x *= Scalar(0.5);
 292     angmom2.y *= Scalar(0.5);
 293     angmom2.z *= Scalar(0.5);
 294
 295     Scalar4 angvel2;
 296     computeAngularVelocity(angmom2, moment_inertia, ex_space, ey_space, ez_space, angvel2);
 297
 298     akin_r = angmom2.x * angvel2.x + angmom2.y * angvel2.y + angmom2.z * angvel2.z;
 299
 300     // write out the results (MEM_TRANSFER: ? bytes)
 301     rdata_com[idx_body] = make_scalar4(pos2.x, pos2.y, pos2.z, com.w);
 302     rdata_vel[idx_body] = vel2;
 303     rdata_angmom[idx_body] = angmom2;
 304     rdata_angvel[idx_body] = angvel2;
 305     rdata_orientation[idx_body] = orientation;
 306     rdata_body_image[idx_body] = body_image;
 307     rdata_conjqm[idx_body] = conjqm2;
 308
 309     npt_rdata_partial_Ksum_t[group_idx] = akin_t;
 310     npt_rdata_partial_Ksum_r[group_idx] = akin_r;
 311     }
 312
 313 /*! \param rigid_data Rigid body data to step forward 1/2 step
 314     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
 315     \param group_size Number of members in the group
 316     \param d_net_force Particle net forces
 317     \param box Box dimensions for periodic boundary condition handling
 318     \param npt_rdata Thermostat/barostat data
 319     \param deltaT Amount of real time to step forward in one time step
 320
 321 */
 322 cudaError_t gpu_nph_rigid_step_one(const gpu_rigid_data_arrays& rigid_data,
 323                                    unsigned int *d_group_members,
 324                                    unsigned int group_size,
 325                                    Scalar4 *d_net_force,
 326                                    const BoxDim& box,
 327                                    const gpu_npt_rigid_data& npt_rdata,
 328                                    Scalar deltaT)
 329     {
 330     unsigned int n_bodies = rigid_data.n_bodies;
 331     unsigned int n_group_bodies = rigid_data.n_group_bodies;
 332
 333     // setup the grid to run the kernel for rigid bodies
 334     int block_size = 64;
 335     int n_blocks = n_group_bodies / block_size + 1;
 336     dim3 body_grid(n_blocks, 1, 1);
 337     dim3 body_threads(block_size, 1, 1);
 338     gpu_nph_rigid_step_one_body_kernel<<< body_grid, body_threads  >>>(rigid_data.com,
 339                                                                        rigid_data.vel,
 340                                                                        rigid_data.angmom,
 341                                                                        rigid_data.angvel,
 342                                                                        rigid_data.orientation,
 343                                                                        rigid_data.body_image,
 344                                                                        rigid_data.conjqm,
 345                                                                        rigid_data.body_mass,
 346                                                                        rigid_data.moment_inertia,
 347                                                                        rigid_data.force,
 348                                                                        rigid_data.torque,
 349                                                                        rigid_data.body_indices,
 350                                                                        n_group_bodies,
 351                                                                        n_bodies,
 352                                                                        npt_rdata.epsilon_dot,
 353                                                                        npt_rdata.partial_Ksum_t,
 354                                                                        npt_rdata.partial_Ksum_r,
 355                                                                        npt_rdata.nf_t,
 356                                                                        npt_rdata.nf_r,
 357                                                                        npt_rdata.dimension,
 358                                                                        box,
 359                                                                        deltaT);
 360
 361     gpu_nph_rigid_remap_kernel<<< body_grid, body_threads >>>(rigid_data.com,
 362                                                               rigid_data.body_indices,
 363                                                               n_group_bodies,
 364                                                               n_bodies,
 365                                                               box,
 366                                                               npt_rdata.dilation,
 367                                                               npt_rdata.dimension,
 368                                                               npt_rdata.new_box);
 369
 370
 371     return cudaSuccess;
 372     }
 373
 374 #pragma mark RIGID_STEP_TWO_KERNEL
 375 //! Takes the 2nd 1/2 step forward in the velocity-verlet NPH integration scheme
 376 /*!
 377     \param rdata_vel Body velocity
 378     \param rdata_angmom Angular momentum
 379     \param rdata_angvel Angular velocity
 380     \param rdata_orientation Quaternion
 381     \param rdata_conjqm Conjugate quaternion momentum
 382     \param d_rigid_mass Body mass
 383     \param d_rigid_mi Body inertia moments
 384     \param d_rigid_force Body forces
 385     \param d_rigid_torque Body torques
 386     \param d_rigid_group Body indices
 387     \param n_group_bodies Number of rigid bodies in my group
 388     \param n_bodies Total number of rigid bodies
 389     \param npt_rdata_eta_dot_t0 Thermostat translational part
 390     \param npt_rdata_eta_dot_r0 Thermostat rotational part
 391     \param npt_rdata_epsilon_dot Barostat velocity
 392     \param npt_rdata_partial_Ksum_t Body translational kinetic energy
 393     \param npt_rdata_partial_Ksum_r Body rotation kinetic energy
 394     \param npt_rdata_nf_t Translational degrees of freedom
 395     \param npt_rdata_nf_r Translational degrees of freedom
 396     \param npt_rdata_dimension System dimesion
 397     \param deltaT Timestep
 398     \param box Box dimensions for periodic boundary condition handling
 399 */
 400
 401 extern "C" __global__ void gpu_nph_rigid_step_two_body_kernel(Scalar4* rdata_vel,
 402                                                               Scalar4* rdata_angmom,
 403                                                               Scalar4* rdata_angvel,
 404                                                               Scalar4* rdata_orientation,
 405                                                               Scalar4* rdata_conjqm,
 406                                                               Scalar *d_rigid_mass,
 407                                                               Scalar4 *d_rigid_mi,
 408                                                               Scalar4 *d_rigid_force,
 409                                                               Scalar4 *d_rigid_torque,
 410                                                               unsigned int *d_rigid_group,
 411                                                               unsigned int n_group_bodies,
 412                                                               unsigned int n_bodies,
 413                                                               Scalar npt_rdata_epsilon_dot,
 414                                                               Scalar* npt_rdata_partial_Ksum_t,
 415                                                               Scalar* npt_rdata_partial_Ksum_r,
 416                                                               unsigned int npt_rdata_nf_t,
 417                                                               unsigned int npt_rdata_nf_r,
 418                                                               unsigned int npt_rdata_dimension,
 419                                                               BoxDim box,
 420                                                               Scalar deltaT)
 421     {
 422     unsigned int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
 423
 424     if (group_idx >= n_group_bodies)
 425         return;
 426
 427     Scalar body_mass;
 428     Scalar4 moment_inertia, vel, ex_space, ey_space, ez_space, orientation, conjqm;
 429     Scalar4 force, torque;
 430     Scalar4 mbody, tbody, fquat;
 431
 432     Scalar dt_half = Scalar(0.5) * deltaT;
 433     Scalar onednft, onednfr, tmp, scale_t, scale_r, akin_t, akin_r;
 434
 435     onednft = Scalar(1.0) + (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
 436     onednfr = (Scalar) (npt_rdata_dimension) / (Scalar) (npt_rdata_nf_t+npt_rdata_nf_r);
 437
 438     tmp = Scalar(-1.0) * dt_half * onednft * npt_rdata_epsilon_dot;
 439     scale_t = exp(tmp);
 440     tmp = Scalar(-1.0) * dt_half * onednfr * npt_rdata_epsilon_dot;
 441     scale_r = exp(tmp);
 442
 443     unsigned int idx_body = d_rigid_group[group_idx];
 444
 445     // Update body velocity and angmom
 446     body_mass = d_rigid_mass[idx_body];
 447     moment_inertia = d_rigid_mi[idx_body];
 448     vel = rdata_vel[idx_body];
 449     force = d_rigid_force[idx_body];
 450     torque = d_rigid_torque[idx_body];
 451     orientation = rdata_orientation[idx_body];
 452     conjqm = rdata_conjqm[idx_body];
 453
 454     exyzFromQuaternion(orientation, ex_space, ey_space, ez_space);
 455
 456     Scalar dtfm = dt_half / body_mass;
 457
 458     // update the velocity
 459     Scalar4 vel2;
 460     vel2.x = scale_t * vel.x + dtfm * force.x;
 461     vel2.y = scale_t * vel.y + dtfm * force.y;
 462     vel2.z = scale_t * vel.z + dtfm * force.z;
 463     vel2.w = 0;
 464
 465     tmp = vel2.x * vel2.x + vel2.y * vel2.y + vel2.z * vel2.z;
 466     akin_t = body_mass * tmp;
 467
 468     // update angular momentum
 469     matrix_dot(ex_space, ey_space, ez_space, torque, tbody);
 470     quatvec(orientation, tbody, fquat);
 471
 472     Scalar4  conjqm2, angmom2;
 473     conjqm2.x = scale_r * conjqm.x + deltaT * fquat.x;
 474     conjqm2.y = scale_r * conjqm.y + deltaT * fquat.y;
 475     conjqm2.z = scale_r * conjqm.z + deltaT * fquat.z;
 476     conjqm2.w = scale_r * conjqm.w + deltaT * fquat.w;
 477
 478     invquatvec(orientation, conjqm2, mbody);
 479     transpose_dot(ex_space, ey_space, ez_space, mbody, angmom2);
 480
 481     angmom2.x *= Scalar(0.5);
 482     angmom2.y *= Scalar(0.5);
 483     angmom2.z *= Scalar(0.5);
 484     angmom2.w = 0;
 485
 486     // update angular velocity
 487     Scalar4 angvel2;
 488     computeAngularVelocity(angmom2, moment_inertia, ex_space, ey_space, ez_space, angvel2);
 489
 490     akin_r = angmom2.x * angvel2.x + angmom2.y * angvel2.y + angmom2.z * angvel2.z;
 491
 492     rdata_vel[idx_body] = vel2;
 493     rdata_angmom[idx_body] = angmom2;
 494     rdata_angvel[idx_body] = angvel2;
 495     rdata_conjqm[idx_body] = conjqm2;
 496
 497     npt_rdata_partial_Ksum_t[group_idx] = akin_t;
 498     npt_rdata_partial_Ksum_r[group_idx] = akin_r;
 499     }
 500
 501 /*! \param rigid_data Rigid body data to step forward 1/2 step
 502     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
 503     \param group_size Number of members in the group
 504     \param d_net_force Particle net forces
 505     \param d_net_virial Particle net virial
 506     \param box Box dimensions for periodic boundary condition handling
 507     \param npt_rdata Thermostat/barostat data
 508     \param deltaT Amount of real time to step forward in one time step
 509
 510 */
 511 cudaError_t gpu_nph_rigid_step_two( const gpu_rigid_data_arrays& rigid_data,
 512                                     unsigned int *d_group_members,
 513                                     unsigned int group_size,
 514                                     Scalar4 *d_net_force,
 515                                     Scalar *d_net_virial,
 516                                     const BoxDim& box,
 517                                     const gpu_npt_rigid_data& npt_rdata,
 518                                     Scalar deltaT)
 519     {
 520     unsigned int n_bodies = rigid_data.n_bodies;
 521     unsigned int n_group_bodies = rigid_data.n_group_bodies;
 522
 523     unsigned int block_size = 64;
 524     unsigned int n_blocks = n_group_bodies / block_size + 1;
 525     dim3 body_grid(n_blocks, 1, 1);
 526     dim3 body_threads(block_size, 1, 1);
 527     gpu_nph_rigid_step_two_body_kernel<<< body_grid, body_threads >>>(rigid_data.vel,
 528                                                                     rigid_data.angmom,
 529                                                                     rigid_data.angvel,
 530                                                                     rigid_data.orientation,
 531                                                                     rigid_data.conjqm,
 532                                                                     rigid_data.body_mass,
 533                                                                     rigid_data.moment_inertia,
 534                                                                     rigid_data.force,
 535                                                                     rigid_data.torque,
 536                                                                     rigid_data.body_indices,
 537                                                                     n_group_bodies,
 538                                                                     n_bodies,
 539                                                                     npt_rdata.epsilon_dot,
 540                                                                     npt_rdata.partial_Ksum_t,
 541                                                                     npt_rdata.partial_Ksum_r,
 542                                                                     npt_rdata.nf_t,
 543                                                                     npt_rdata.nf_r,
 544                                                                     npt_rdata.dimension,
 545                                                                     box,
 546                                                                     deltaT);
 547
 548
 549     return cudaSuccess;
 550     }
 551
 552 #pragma mark RIGID_KINETIC_ENERGY_REDUCTION
 553
 554 //! Shared memory for kinetic energy reduction
 555 extern __shared__ Scalar nph_rigid_sdata[];
 556
 557 /*! Summing the kinetic energy of rigid bodies
 558     \param npt_rdata Thermostat data for rigid bodies
 559
 560 */
 561 extern "C" __global__ void gpu_nph_rigid_reduce_ksum_kernel(Scalar* npt_rdata_partial_Ksum_t,
 562                                                             Scalar* npt_rdata_partial_Ksum_r,
 563                                                             Scalar* npt_rdata_Ksum_t,
 564                                                             Scalar* npt_rdata_Ksum_r,
 565                                                             unsigned int n_bodies)
 566     {
 567     int global_idx = blockIdx.x * blockDim.x + threadIdx.x;
 568
 569     Scalar* body_ke_t = nph_rigid_sdata;
 570     Scalar* body_ke_r = &nph_rigid_sdata[blockDim.x];
 571
 572     Scalar Ksum_t = 0, Ksum_r=0;
 573
 574     // sum up the values in the partial sum via a sliding window
 575     for (int start = 0; start < n_bodies; start += blockDim.x)
 576         {
 577         if (start + threadIdx.x < n_bodies)
 578             {
 579             body_ke_t[threadIdx.x] = npt_rdata_partial_Ksum_t[start + threadIdx.x];
 580             body_ke_r[threadIdx.x] = npt_rdata_partial_Ksum_r[start + threadIdx.x];
 581             }
 582         else
 583             {
 584             body_ke_t[threadIdx.x] = 0;
 585             body_ke_r[threadIdx.x] = 0;
 586             }
 587         __syncthreads();
 588
 589         // reduce the sum within a block
 590         int offset = blockDim.x >> 1;
 591         while (offset > 0)
 592             {
 593             if (threadIdx.x < offset)
 594                 {
 595                 body_ke_t[threadIdx.x] += body_ke_t[threadIdx.x + offset];
 596                 body_ke_r[threadIdx.x] += body_ke_r[threadIdx.x + offset];
 597                 }
 598             offset >>= 1;
 599             __syncthreads();
 600             }
 601
 602         // everybody sums up Ksum
 603         Ksum_t += body_ke_t[0];
 604         Ksum_r += body_ke_r[0];
 605         }
 606
 607     __syncthreads();
 608
 609
 610     if (global_idx == 0)
 611         {
 612         *npt_rdata_Ksum_t = Ksum_t;
 613         *npt_rdata_Ksum_r = Ksum_r;
 614         }
 615
 616     }
 617
 618 /*!
 619     \param npt_rdata Thermostat/barostat data for rigid bodies
 620 */
 621 cudaError_t gpu_nph_rigid_reduce_ksum(const gpu_npt_rigid_data& npt_rdata)
 622     {
 623     // setup the grid to run the kernel
 624     int block_size = 128;
 625     dim3 grid( 1, 1, 1);
 626     dim3 threads(block_size, 1, 1);
 627
 628     // run the kernel: double the block size to accomodate Ksum_t and Ksum_r
 629     gpu_nph_rigid_reduce_ksum_kernel<<< grid, threads, 2 * block_size * sizeof(Scalar) >>>(npt_rdata.partial_Ksum_t,
 630                                                                                           npt_rdata.partial_Ksum_r,
 631                                                                                           npt_rdata.Ksum_t,
 632                                                                                           npt_rdata.Ksum_r,
 633                                                                                           npt_rdata.n_bodies);
 634
 635     return cudaSuccess;
 636     }