libhoomd/cuda/HarmonicAngleForceGPU.cu

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: dnlebard
  51
  52 #include "HarmonicAngleForceGPU.cuh"
  53 #include "TextureTools.h"
  54
  55 #ifdef WIN32
  56 #include <cassert>
  57 #else
  58 #include <assert.h>
  59 #endif
  60
  61 // SMALL a relatively small number
  62 #define SMALL Scalar(0.001)
  63
  64 /*! \file HarmonicAngleForceGPU.cu
  65     \brief Defines GPU kernel code for calculating the harmonic angle forces. Used by HarmonicAngleForceComputeGPU.
  66 */
  67
  68 //! Texture for reading angle parameters
  69 scalar2_tex_t angle_params_tex;
  70
  71 //! Kernel for caculating harmonic angle forces on the GPU
  72 /*! \param d_force Device memory to write computed forces
  73     \param d_virial Device memory to write computed virials
  74     \param virial_pitch Pitch of 2D virial array
  75     \param N number of particles
  76     \param d_pos device array of particle positions
  77     \param d_params Parameters for the angle force
  78     \param box Box dimensions for periodic boundary condition handling
  79     \param alist Angle data to use in calculating the forces
  80     \param pitch Pitch of 2D angles list
  81     \param n_angles_list List of numbers of angles stored on the GPU
  82 */
  83 extern "C" __global__ void gpu_compute_harmonic_angle_forces_kernel(Scalar4* d_force,
  84                                                                     Scalar* d_virial,
  85                                                                     const unsigned int virial_pitch,
  86                                                                     const unsigned int N,
  87                                                                     const Scalar4 *d_pos,
  88                                                                     const Scalar2 *d_params,
  89                                                                     BoxDim box,
  90                                                                     const group_storage<3> *alist,
  91                                                                     const unsigned int *apos_list,
  92                                                                     const unsigned int pitch,
  93                                                                     const unsigned int *n_angles_list)
  94     {
  95     // start by identifying which particle we are to handle
  96     int idx = blockIdx.x * blockDim.x + threadIdx.x;
  97
  98     if (idx >= N)
  99         return;
 100
 101     // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
 102     int n_angles = n_angles_list[idx];
 103
 104     // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
 105     Scalar4 idx_postype = d_pos[idx];  // we can be either a, b, or c in the a-b-c triplet
 106     Scalar3 idx_pos = make_scalar3(idx_postype.x, idx_postype.y, idx_postype.z);
 107     Scalar3 a_pos,b_pos,c_pos; // allocate space for the a,b, and c atom in the a-b-c triplet
 108
 109     // initialize the force to 0
 110     Scalar4 force_idx = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
 111
 112     Scalar fab[3], fcb[3];
 113
 114     // initialize the virial to 0
 115     Scalar virial[6];
 116     for (int i = 0; i < 6; i++)
 117         virial[i] = Scalar(0.0);
 118
 119     // loop over all angles
 120     for (int angle_idx = 0; angle_idx < n_angles; angle_idx++)
 121         {
 122         group_storage<3> cur_angle = alist[pitch*angle_idx + idx];
 123
 124         int cur_angle_x_idx = cur_angle.idx[0];
 125         int cur_angle_y_idx = cur_angle.idx[1];
 126         int cur_angle_type = cur_angle.idx[2];
 127
 128         int cur_angle_abc = apos_list[pitch*angle_idx + idx];
 129
 130         // get the a-particle's position (MEM TRANSFER: 16 bytes)
 131         Scalar4 x_postype = d_pos[cur_angle_x_idx];
 132         Scalar3 x_pos = make_scalar3(x_postype.x, x_postype.y, x_postype.z);
 133         // get the c-particle's position (MEM TRANSFER: 16 bytes)
 134         Scalar4 y_postype = d_pos[cur_angle_y_idx];
 135         Scalar3 y_pos = make_scalar3(y_postype.x, y_postype.y, y_postype.z);
 136
 137         if (cur_angle_abc == 0)
 138             {
 139             a_pos = idx_pos;
 140             b_pos = x_pos;
 141             c_pos = y_pos;
 142             }
 143         if (cur_angle_abc == 1)
 144             {
 145             b_pos = idx_pos;
 146             a_pos = x_pos;
 147             c_pos = y_pos;
 148             }
 149         if (cur_angle_abc == 2)
 150             {
 151             c_pos = idx_pos;
 152             a_pos = x_pos;
 153             b_pos = y_pos;
 154             }
 155
 156         // calculate dr for a-b,c-b,and a-c
 157         Scalar3 dab = a_pos - b_pos;
 158         Scalar3 dcb = c_pos - b_pos;
 159         Scalar3 dac = a_pos - c_pos;
 160
 161         // apply periodic boundary conditions
 162         dab = box.minImage(dab);
 163         dcb = box.minImage(dcb);
 164         dac = box.minImage(dac);
 165
 166         // get the angle parameters (MEM TRANSFER: 8 bytes)
 167         Scalar2 params = texFetchScalar2(d_params, angle_params_tex, cur_angle_type);
 168         Scalar K = params.x;
 169         Scalar t_0 = params.y;
 170
 171         Scalar rsqab = dot(dab, dab);
 172         Scalar rab = sqrtf(rsqab);
 173         Scalar rsqcb = dot(dcb, dcb);
 174         Scalar rcb = sqrtf(rsqcb);
 175
 176         Scalar c_abbc = dot(dab, dcb);
 177         c_abbc /= rab*rcb;
 178
 179         if (c_abbc > Scalar(1.0)) c_abbc = Scalar(1.0);
 180         if (c_abbc < -Scalar(1.0)) c_abbc = -Scalar(1.0);
 181
 182         Scalar s_abbc = sqrtf(Scalar(1.0) - c_abbc*c_abbc);
 183         if (s_abbc < SMALL) s_abbc = SMALL;
 184         s_abbc = Scalar(1.0)/s_abbc;
 185
 186         // actually calculate the force
 187         Scalar dth = fast::acos(c_abbc) - t_0;
 188         Scalar tk = K*dth;
 189
 190         Scalar a = -Scalar(1.0) * tk * s_abbc;
 191         Scalar a11 = a*c_abbc/rsqab;
 192         Scalar a12 = -a / (rab*rcb);
 193         Scalar a22 = a*c_abbc / rsqcb;
 194
 195         fab[0] = a11*dab.x + a12*dcb.x;
 196         fab[1] = a11*dab.y + a12*dcb.y;
 197         fab[2] = a11*dab.z + a12*dcb.z;
 198
 199         fcb[0] = a22*dcb.x + a12*dab.x;
 200         fcb[1] = a22*dcb.y + a12*dab.y;
 201         fcb[2] = a22*dcb.z + a12*dab.z;
 202
 203         // compute 1/3 of the energy, 1/3 for each atom in the angle
 204         Scalar angle_eng = tk*dth*Scalar(Scalar(1.0)/Scalar(6.0));
 205
 206         // upper triangular version of virial tensor
 207         Scalar angle_virial[6];
 208         angle_virial[0] = Scalar(1./3.)*(dab.x*fab[0] + dcb.x*fcb[0]);
 209         angle_virial[1] = Scalar(1./3.)*(dab.y*fab[0] + dcb.y*fcb[0]);
 210         angle_virial[2] = Scalar(1./3.)*(dab.z*fab[0] + dcb.z*fcb[0]);
 211         angle_virial[3] = Scalar(1./3.)*(dab.y*fab[1] + dcb.y*fcb[1]);
 212         angle_virial[4] = Scalar(1./3.)*(dab.z*fab[1] + dcb.z*fcb[1]);
 213         angle_virial[5] = Scalar(1./3.)*(dab.z*fab[2] + dcb.z*fcb[2]);
 214
 215
 216         if (cur_angle_abc == 0)
 217             {
 218             force_idx.x += fab[0];
 219             force_idx.y += fab[1];
 220             force_idx.z += fab[2];
 221             }
 222         if (cur_angle_abc == 1)
 223             {
 224             force_idx.x -= fab[0] + fcb[0];
 225             force_idx.y -= fab[1] + fcb[1];
 226             force_idx.z -= fab[2] + fcb[2];
 227             }
 228         if (cur_angle_abc == 2)
 229             {
 230             force_idx.x += fcb[0];
 231             force_idx.y += fcb[1];
 232             force_idx.z += fcb[2];
 233             }
 234
 235         force_idx.w += angle_eng;
 236
 237         for (int i = 0; i < 6; i++)
 238             virial[i] += angle_virial[i];
 239         }
 240
 241     // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
 242     d_force[idx] = force_idx;
 243     for (int i = 0; i < 6; i++)
 244         d_virial[i*virial_pitch+idx] = virial[i];
 245     }
 246
 247 /*! \param d_force Device memory to write computed forces
 248     \param d_virial Device memory to write computed virials
 249     \param virial_pitch pitch of 2D virial arary
 250     \param N number of particles
 251     \param d_pos device array of particle positions
 252     \param box Box dimensions (in GPU format) to use for periodic boundary conditions
 253     \param atable List of angles stored on the GPU
 254     \param pitch Pitch of 2D angles list
 255     \param n_angles_list List of numbers of angles stored on the GPU
 256     \param d_params K and t_0 params packed as Scalar2 variables
 257     \param n_angle_types Number of angle types in d_params
 258     \param block_size Block size to use when performing calculations
 259     \param compute_capability Device compute capability (200, 300, 350, ...)
 260
 261     \returns Any error code resulting from the kernel launch
 262     \note Always returns cudaSuccess in release builds to avoid the cudaThreadSynchronize()
 263
 264     \a d_params should include one Scalar2 element per angle type. The x component contains K the spring constant
 265     and the y component contains t_0 the equilibrium angle.
 266 */
 267 cudaError_t gpu_compute_harmonic_angle_forces(Scalar4* d_force,
 268                                               Scalar* d_virial,
 269                                               const unsigned int virial_pitch,
 270                                               const unsigned int N,
 271                                               const Scalar4 *d_pos,
 272                                               const BoxDim& box,
 273                                               const group_storage<3> *atable,
 274                                               const unsigned int *apos_list,
 275                                               const unsigned int pitch,
 276                                               const unsigned int *n_angles_list,
 277                                               Scalar2 *d_params,
 278                                               unsigned int n_angle_types,
 279                                               int block_size,
 280                                               const unsigned int compute_capability)
 281     {
 282     assert(d_params);
 283
 284     static unsigned int max_block_size = UINT_MAX;
 285     if (max_block_size == UINT_MAX)
 286         {
 287         cudaFuncAttributes attr;
 288         cudaFuncGetAttributes(&attr, (const void *)gpu_compute_harmonic_angle_forces_kernel);
 289         max_block_size = attr.maxThreadsPerBlock;
 290         }
 291
 292     unsigned int run_block_size = min(block_size, max_block_size);
 293
 294     // setup the grid to run the kernel
 295     dim3 grid( N / run_block_size + 1, 1, 1);
 296     dim3 threads(run_block_size, 1, 1);
 297
 298     // bind the texture on pre sm 35 arches
 299     if (compute_capability < 350)
 300         {
 301         cudaError_t error = cudaBindTexture(0, angle_params_tex, d_params, sizeof(Scalar2) * n_angle_types);
 302         if (error != cudaSuccess)
 303             return error;
 304         }
 305
 306     // run the kernel
 307     gpu_compute_harmonic_angle_forces_kernel<<< grid, threads>>>(d_force, d_virial, virial_pitch, N, d_pos, d_params, box,
 308         atable, apos_list, pitch, n_angles_list);
 309
 310     return cudaSuccess;
 311     }