libhoomd/cuda/TableDihedralForceGPU.cu

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: phillicl
  51
  52 #include "TableDihedralForceGPU.cuh"
  53 #include "TextureTools.h"
  54
  55 #include "VectorMath.h"
  56
  57 #ifdef WIN32
  58 #include <cassert>
  59 #else
  60 #include <assert.h>
  61 #endif
  62
  63 // SMALL a relatively small number
  64 #define SMALL 0.001f
  65
  66 /*! \file TableDihedralForceGPU.cu
  67     \brief Defines GPU kernel code for calculating the table dihedral forces. Used by TableDihedralForceComputeGPU.
  68 */
  69
  70
  71 //! Texture for reading table values
  72 scalar2_tex_t tables_tex;
  73
  74 /*!  This kernel is called to calculate the table dihedral forces on all triples this is defined or
  75
  76     \param d_force Device memory to write computed forces
  77     \param d_virial Device memory to write computed virials
  78     \param virial_pitch Pitch of 2D virial array
  79     \param N number of particles in system
  80     \param device_pos device array of particle positions
  81     \param box Box dimensions used to implement periodic boundary conditions
  82     \param dlist List of dihedrals stored on the GPU
  83     \param pitch Pitch of 2D dihedral list
  84     \param n_dihedrals_list List of numbers of dihedrals stored on the GPU
  85     \param n_dihedral_type number of dihedral types
  86     \param d_tables Tables of the potential and force
  87     \param table_value index helper function
  88     \param delta_phi dihedral delta of the table
  89
  90     See TableDihedralForceCompute for information on the memory layout.
  91
  92     \b Details:
  93     * Table entries are read from tables_tex. Note that currently this is bound to a 1D memory region. Performance tests
  94       at a later date may result in this changing.
  95 */
  96 __global__ void gpu_compute_table_dihedral_forces_kernel(Scalar4* d_force,
  97                                      Scalar* d_virial,
  98                                      const unsigned int virial_pitch,
  99                                      const unsigned int N,
 100                                      const Scalar4 *device_pos,
 101                                      const BoxDim box,
 102                                      const group_storage<4> *dlist,
 103                                      const unsigned int *dihedral_ABCD,
 104                                      const unsigned int pitch,
 105                                      const unsigned int *n_dihedrals_list,
 106                                      const Scalar2 *d_tables,
 107                                      const Index2D table_value,
 108                                      const Scalar delta_phi)
 109     {
 110
 111
 112     // start by identifying which particle we are to handle
 113     unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
 114
 115     if (idx >= N)
 116         return;
 117
 118     // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
 119     int n_dihedrals =n_dihedrals_list[idx];
 120
 121     // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
 122     Scalar4 idx_postype = device_pos[idx];  // we can be either a, b, or c in the a-b-c triplet
 123     Scalar3 idx_pos = make_scalar3(idx_postype.x, idx_postype.y, idx_postype.z);
 124     Scalar3 pos_a,pos_b,pos_c, pos_d; // allocate space for the a,b,c, and d atom in the a-b-c-d set
 125
 126
 127     // initialize the force to 0
 128     Scalar4 force_idx = make_scalar4(0.0f, 0.0f, 0.0f, 0.0f);
 129
 130     // initialize the virial tensor to 0
 131     Scalar virial_idx[6];
 132     for (unsigned int i = 0; i < 6; i++)
 133         virial_idx[i] = 0;
 134
 135     for (int dihedral_idx = 0; dihedral_idx < n_dihedrals; dihedral_idx++)
 136         {
 137         group_storage<4> cur_dihedral = dlist[pitch*dihedral_idx + idx];
 138         unsigned int cur_ABCD = dihedral_ABCD[pitch*dihedral_idx + idx];
 139
 140         int cur_dihedral_x_idx = cur_dihedral.idx[0];
 141         int cur_dihedral_y_idx = cur_dihedral.idx[1];
 142         int cur_dihedral_z_idx = cur_dihedral.idx[2];
 143         int cur_dihedral_type = cur_dihedral.idx[3];
 144         int cur_dihedral_abcd = cur_ABCD;
 145
 146         // get the a-particle's position (MEM TRANSFER: 16 bytes)
 147         Scalar4 x_postype = device_pos[cur_dihedral_x_idx];
 148         Scalar3 x_pos = make_scalar3(x_postype.x, x_postype.y, x_postype.z);
 149         // get the c-particle's position (MEM TRANSFER: 16 bytes)
 150         Scalar4 y_postype = device_pos[cur_dihedral_y_idx];
 151         Scalar3 y_pos = make_scalar3(y_postype.x, y_postype.y, y_postype.z);
 152         // get the d-particle's position (MEM TRANSFER: 16 bytes)
 153         Scalar4 z_postype = device_pos[cur_dihedral_z_idx];
 154         Scalar3 z_pos = make_scalar3(z_postype.x, z_postype.y, z_postype.z);
 155
 156         if (cur_dihedral_abcd == 0)
 157             {
 158             pos_a = idx_pos;
 159             pos_b = x_pos;
 160             pos_c = y_pos;
 161             pos_d = z_pos;
 162             }
 163         if (cur_dihedral_abcd == 1)
 164             {
 165             pos_b = idx_pos;
 166             pos_a = x_pos;
 167             pos_c = y_pos;
 168             pos_d = z_pos;
 169             }
 170         if (cur_dihedral_abcd == 2)
 171             {
 172             pos_c = idx_pos;
 173             pos_a = x_pos;
 174             pos_b = y_pos;
 175             pos_d = z_pos;
 176             }
 177         if (cur_dihedral_abcd == 3)
 178             {
 179             pos_d = idx_pos;
 180             pos_a = x_pos;
 181             pos_b = y_pos;
 182             pos_c = z_pos;
 183             }
 184
 185         // calculate dr for a-b,c-b,and a-c
 186         Scalar3 dab = pos_a - pos_b;
 187         Scalar3 dcb = pos_c - pos_b;
 188         Scalar3 ddc = pos_d - pos_c;
 189
 190         dab = box.minImage(dab);
 191         dcb = box.minImage(dcb);
 192         ddc = box.minImage(ddc);
 193
 194         Scalar3 dcbm = -dcb;
 195         dcbm = box.minImage(dcbm);
 196
 197         // c0 calculation
 198         Scalar sb1 = Scalar(1.0) / (dab.x*dab.x + dab.y*dab.y + dab.z*dab.z);
 199         Scalar sb3 = Scalar(1.0) / (ddc.x*ddc.x + ddc.y*ddc.y + ddc.z*ddc.z);
 200
 201         Scalar rb1 = fast::sqrt(sb1);
 202         Scalar rb3 = fast::sqrt(sb3);
 203
 204         Scalar c0 = (dab.x*ddc.x + dab.y*ddc.y + dab.z*ddc.z) * rb1*rb3;
 205
 206         // 1st and 2nd angle
 207
 208         Scalar b1mag2 = dab.x*dab.x + dab.y*dab.y + dab.z*dab.z;
 209         Scalar b1mag = fast::sqrt(b1mag2);
 210         Scalar b2mag2 = dcb.x*dcb.x + dcb.y*dcb.y + dcb.z*dcb.z;
 211         Scalar b2mag = fast::sqrt(b2mag2);
 212         Scalar b3mag2 = ddc.x*ddc.x + ddc.y*ddc.y + ddc.z*ddc.z;
 213         Scalar b3mag = fast::sqrt(b3mag2);
 214
 215         Scalar ctmp = dab.x*dcb.x + dab.y*dcb.y + dab.z*dcb.z;
 216         Scalar r12c1 = Scalar(1.0) / (b1mag*b2mag);
 217         Scalar c1mag = ctmp * r12c1;
 218
 219         ctmp = dcbm.x*ddc.x + dcbm.y*ddc.y + dcbm.z*ddc.z;
 220         Scalar r12c2 = Scalar(1.0) / (b2mag*b3mag);
 221         Scalar c2mag = ctmp * r12c2;
 222
 223         // cos and sin of 2 angles and final c
 224
 225         Scalar sin2 = Scalar(1.0) - c1mag*c1mag;
 226         if (sin2 < 0.0f) sin2 = 0.0f;
 227         Scalar sc1 = fast::sqrt(sin2);
 228         if (sc1 < SMALL) sc1 = SMALL;
 229         sc1 = Scalar(1.0)/sc1;
 230
 231         sin2 = Scalar(1.0) - c2mag*c2mag;
 232         if (sin2 < 0.0f) sin2 = 0.0f;
 233         Scalar sc2 = fast::sqrt(sin2);
 234         if (sc2 < SMALL) sc2 = SMALL;
 235         sc2 = Scalar(1.0)/sc2;
 236
 237         Scalar s12 = sc1 * sc2;
 238         Scalar c = (c0 + c1mag*c2mag) * s12;
 239
 240         if (c > Scalar(1.0)) c = Scalar(1.0);
 241         if (c < -Scalar(1.0)) c = -Scalar(1.0);
 242
 243         // determinant
 244         Scalar det = dot(dab,make_scalar3(ddc.y*dcb.z-ddc.z*dcb.y,
 245                                           ddc.z*dcb.x-ddc.x*dcb.z,
 246                                           ddc.x*dcb.y-ddc.y*dcb.x));
 247
 248         //phi
 249         Scalar phi = acosf(c);
 250
 251         if (det < 0) phi = -phi;
 252
 253         // precomputed term
 254         Scalar value_f = (Scalar(M_PI)+phi) / delta_phi;
 255
 256         // compute index into the table and read in values
 257         unsigned int value_i = value_f;
 258         Scalar2 VT0 = texFetchScalar2(d_tables, tables_tex, table_value(value_i, cur_dihedral_type));
 259         Scalar2 VT1 = texFetchScalar2(d_tables, tables_tex, table_value(value_i+1, cur_dihedral_type));
 260         // unpack the data
 261         Scalar V0 = VT0.x;
 262         Scalar V1 = VT1.x;
 263         Scalar T0 = VT0.y;
 264         Scalar T1 = VT1.y;
 265
 266         // compute the linear interpolation coefficient
 267         Scalar f = value_f - Scalar(value_i);
 268
 269         // interpolate to get V and T;
 270         Scalar V = V0 + f * (V1 - V0);
 271         Scalar T = T0 + f * (T1 - T0);
 272
 273         // from Blondel and Karplus 1995
 274         vec3<Scalar> A = cross(vec3<Scalar>(dab),vec3<Scalar>(dcbm));
 275         Scalar Asq = dot(A,A);
 276
 277         vec3<Scalar> B = cross(vec3<Scalar>(ddc),vec3<Scalar>(dcbm));
 278         Scalar Bsq = dot(B,B);
 279
 280         Scalar3 f_a = -T*vec_to_scalar3(b2mag/Asq*A);
 281         Scalar3 f_b = -f_a + T/b2mag*vec_to_scalar3(dot(dab,dcbm)/Asq*A-dot(ddc,dcbm)/Bsq*B);
 282         Scalar3 f_c = T*vec_to_scalar3(dot(ddc,dcbm)/Bsq/b2mag*B-dot(dab,dcbm)/Asq/b2mag*A-b2mag/Bsq*B);
 283         Scalar3 f_d = T*b2mag/Bsq*vec_to_scalar3(B);
 284
 285         // Now, apply the force to each individual atom a,b,c,d
 286         // and accumlate the energy/virial
 287
 288         // compute 1/4 of the energy, 1/4 for each atom in the dihedral
 289         Scalar dihedral_eng = V*Scalar(1.0/4.0);
 290
 291         // compute 1/4 of the virial, 1/4 for each atom in the dihedral
 292         // upper triangular version of virial tensor
 293         Scalar dihedral_virial[6];
 294
 295         dihedral_virial[0] = (1./4.)*(dab.x*f_a.x + dcb.x*f_c.x + (ddc.x+dcb.x)*f_d.x);
 296         dihedral_virial[1] = (1./4.)*(dab.y*f_a.x + dcb.y*f_c.x + (ddc.y+dcb.y)*f_d.x);
 297         dihedral_virial[2] = (1./4.)*(dab.z*f_a.x + dcb.z*f_c.x + (ddc.z+dcb.z)*f_d.x);
 298         dihedral_virial[3] = (1./4.)*(dab.y*f_a.y + dcb.y*f_c.y + (ddc.y+dcb.y)*f_d.y);
 299         dihedral_virial[4] = (1./4.)*(dab.z*f_a.y + dcb.z*f_c.y + (ddc.z+dcb.z)*f_d.y);
 300         dihedral_virial[5] = (1./4.)*(dab.z*f_a.z + dcb.z*f_c.z + (ddc.z+dcb.z)*f_d.z);
 301
 302         if (cur_dihedral_abcd == 0)
 303             {
 304             force_idx.x += f_a.x;
 305             force_idx.y += f_a.y;
 306             force_idx.z += f_a.z;
 307             }
 308         if (cur_dihedral_abcd == 1)
 309             {
 310             force_idx.x += f_b.x;
 311             force_idx.y += f_b.y;
 312             force_idx.z += f_b.z;
 313             }
 314         if (cur_dihedral_abcd == 2)
 315             {
 316             force_idx.x += f_c.x;
 317             force_idx.y += f_c.y;
 318             force_idx.z += f_c.z;
 319             }
 320         if (cur_dihedral_abcd == 3)
 321             {
 322             force_idx.x += f_d.x;
 323             force_idx.y += f_d.y;
 324             force_idx.z += f_d.z;
 325             }
 326
 327         force_idx.w += dihedral_eng;
 328         for (int k = 0; k < 6; k++)
 329             virial_idx[k] += dihedral_virial[k];
 330         }
 331
 332     // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
 333     d_force[idx] = force_idx;
 334     for (int k = 0; k < 6; k++)
 335        d_virial[k*virial_pitch+idx] = virial_idx[k];
 336     }
 337
 338
 339 /*! \param d_force Device memory to write computed forces
 340     \param d_virial Device memory to write computed virials
 341     \param virial_pitch pitch of 2D virial array
 342     \param N number of particles
 343     \param device_pos particle positions on the device
 344     \param box Box dimensions used to implement periodic boundary conditions
 345     \param dlist List of dihedrals stored on the GPU
 346     \param pitch Pitch of 2D dihedral list
 347     \param n_dihedrals_list List of numbers of dihedrals stored on the GPU
 348     \param n_dihedral_type number of dihedral types
 349     \param d_tables Tables of the potential and force
 350     \param table_width Number of points in each table
 351     \param table_value indexer helper
 352     \param block_size Block size at which to run the kernel
 353     \param compute_capability Compute capability of the device (200, 300, 350, ...)
 354
 355     \note This is just a kernel driver. See gpu_compute_table_dihedral_forces_kernel for full documentation.
 356 */
 357 cudaError_t gpu_compute_table_dihedral_forces(Scalar4* d_force,
 358                                      Scalar* d_virial,
 359                                      const unsigned int virial_pitch,
 360                                      const unsigned int N,
 361                                      const Scalar4 *device_pos,
 362                                      const BoxDim &box,
 363                                      const group_storage<4> *dlist,
 364                                      const unsigned int *dihedral_ABCD,
 365                                      const unsigned int pitch,
 366                                      const unsigned int *n_dihedrals_list,
 367                                      const Scalar2 *d_tables,
 368                                      const unsigned int table_width,
 369                                      const Index2D &table_value,
 370                                      const unsigned int block_size,
 371                                      const unsigned int compute_capability)
 372     {
 373     assert(d_tables);
 374     assert(table_width > 1);
 375
 376     static unsigned int max_block_size = UINT_MAX;
 377     if (max_block_size == UINT_MAX)
 378         {
 379         cudaFuncAttributes attr;
 380         cudaFuncGetAttributes(&attr, (const void *)gpu_compute_table_dihedral_forces_kernel);
 381         max_block_size = attr.maxThreadsPerBlock;
 382         }
 383
 384     unsigned int run_block_size = min(block_size, max_block_size);
 385
 386     // setup the grid to run the kernel
 387     dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
 388     dim3 threads(run_block_size, 1, 1);
 389
 390     // bind the tables texture on pre sm35 devices
 391     if (compute_capability < 350)
 392         {
 393         tables_tex.normalized = false;
 394         tables_tex.filterMode = cudaFilterModePoint;
 395         cudaError_t error = cudaBindTexture(0, tables_tex, d_tables, sizeof(Scalar2) * table_value.getNumElements());
 396         if (error != cudaSuccess)
 397             return error;
 398         }
 399
 400     Scalar delta_phi = Scalar(2.0*M_PI)/(Scalar)(table_width - 1);
 401
 402     gpu_compute_table_dihedral_forces_kernel<<< grid, threads>>>
 403             (d_force,
 404              d_virial,
 405              virial_pitch,
 406              N,
 407              device_pos,
 408              box,
 409              dlist,
 410              dihedral_ABCD,
 411              pitch,
 412              n_dihedrals_list,
 413              d_tables,
 414              table_value,
 415              delta_phi);
 416
 417     return cudaSuccess;
 418     }
 419
 420 // vim:syntax=cpp