libhoomd/cuda/BondTablePotentialGPU.cu

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: joaander
  51
  52 #include "BondTablePotentialGPU.cuh"
  53 #include "TextureTools.h"
  54
  55
  56 #ifdef WIN32
  57 #include <cassert>
  58 #else
  59 #include <assert.h>
  60 #endif
  61
  62 /*! \file BondTablePotentialGPU.cu
  63     \brief Defines GPU kernel code for calculating the table bond forces. Used by BondTablePotentialGPU.
  64 */
  65
  66
  67 //! Texture for reading table values
  68 scalar2_tex_t tables_tex;
  69
  70 /*!  This kernel is called to calculate the table pair forces on all N particles
  71
  72     \param d_force Device memory to write computed forces
  73     \param d_virial Device memory to write computed virials
  74     \param virial_pitch Pitch of 2D virial array
  75     \param N number of particles in system
  76     \param d_pos device array of particle positions
  77     \param box Box dimensions used to implement periodic boundary conditions
  78     \param blist List of bonds stored on the GPU
  79     \param pitch Pitch of 2D bond list
  80     \param n_bonds_list List of numbers of bonds stored on the GPU
  81     \param n_bond_type number of bond types
  82     \param d_params Parameters for each table associated with a type pair
  83     \param table_value index helper function
  84     \param d_flags Flag allocated on the device for use in checking for bonds that cannot be evaluated
  85
  86     See BondTablePotential for information on the memory layout.
  87
  88     \b Details:
  89     * Table entries are read from tables_tex. Note that currently this is bound to a 1D memory region. Performance tests
  90       at a later date may result in this changing.
  91 */
  92 __global__ void gpu_compute_bondtable_forces_kernel(Scalar4* d_force,
  93                                      Scalar* d_virial,
  94                                      const unsigned int virial_pitch,
  95                                      const unsigned int N,
  96                                      const Scalar4 *d_pos,
  97                                      const BoxDim box,
  98                                      const group_storage<2> *blist,
  99                                      const unsigned int pitch,
 100                                      const unsigned int *n_bonds_list,
 101                                      const unsigned int n_bond_type,
 102                                      const Scalar2 *d_tables,
 103                                      const Scalar4 *d_params,
 104                                      const Index2D table_value,
 105                                      unsigned int *d_flags)
 106     {
 107
 108
 109     // read in params for easy and fast access in the kernel
 110     extern __shared__ Scalar4 s_params[];
 111     for (unsigned int cur_offset = 0; cur_offset < n_bond_type; cur_offset += blockDim.x)
 112         {
 113         if (cur_offset + threadIdx.x < n_bond_type)
 114             s_params[cur_offset + threadIdx.x] = d_params[cur_offset + threadIdx.x];
 115         }
 116     __syncthreads();
 117
 118
 119     // start by identifying which particle we are to handle
 120     unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
 121
 122     if (idx >= N)
 123         return;
 124
 125     // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
 126     int n_bonds =n_bonds_list[idx];
 127
 128     // read in the position of our particle.
 129     Scalar4 postype = d_pos[idx];
 130     Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
 131
 132     // initialize the force to 0
 133     Scalar4 force = make_scalar4(0.0f, 0.0f, 0.0f, 0.0f);
 134     // initialize the virial tensor to 0
 135     Scalar virial[6];
 136     for (unsigned int i = 0; i < 6; i++)
 137         virial[i] = 0;
 138
 139     // loop over neighbors
 140     for (int bond_idx = 0; bond_idx < n_bonds; bond_idx++)
 141         {
 142         // MEM TRANSFER: 8 bytes
 143         group_storage<2> cur_bond = blist[pitch*bond_idx + idx];
 144
 145         int cur_bond_idx = cur_bond.idx[0];
 146         int cur_bond_type = cur_bond.idx[1];
 147
 148         // get the bonded particle's position (MEM_TRANSFER: 16 bytes)
 149         Scalar4 neigh_postype = d_pos[cur_bond_idx];
 150         Scalar3 neigh_pos = make_scalar3(neigh_postype.x, neigh_postype.y, neigh_postype.z);
 151
 152         // calculate dr (FLOPS: 3)
 153         Scalar3 dx = pos - neigh_pos;
 154
 155         // apply periodic boundary conditions (FLOPS: 12)
 156         dx = box.minImage(dx);
 157
 158         // access needed parameters
 159         Scalar4 params = s_params[cur_bond_type];
 160         Scalar rmin = params.x;
 161         Scalar rmax = params.y;
 162         Scalar delta_r = params.z;
 163
 164         // calculate r
 165         Scalar rsq = dot(dx, dx);
 166         Scalar r = sqrtf(rsq);
 167
 168         if (r < rmax && r >= rmin)
 169             {
 170             // precomputed term
 171             Scalar value_f = (r - rmin) / delta_r;
 172
 173             // compute index into the table and read in values
 174             unsigned int value_i = floor(value_f);
 175
 176             Scalar2 VF0 = texFetchScalar2(d_tables, tables_tex, table_value(value_i, cur_bond_type));
 177             Scalar2 VF1 = texFetchScalar2(d_tables, tables_tex, table_value(value_i+1, cur_bond_type));
 178             // unpack the data
 179             Scalar V0 = VF0.x;
 180             Scalar V1 = VF1.x;
 181             Scalar F0 = VF0.y;
 182             Scalar F1 = VF1.y;
 183
 184             // compute the linear interpolation coefficient
 185             Scalar f = value_f - Scalar(value_i);
 186
 187             // interpolate to get V and F;
 188             Scalar V = V0 + f * (V1 - V0);
 189             Scalar F = F0 + f * (F1 - F0);
 190
 191             // convert to standard variables used by the other pair computes in HOOMD-blue
 192             Scalar forcemag_divr = 0.0f;
 193             if (r > 0.0f)
 194                 forcemag_divr = F / r;
 195             Scalar bond_eng = V;
 196             // calculate the virial
 197             Scalar force_div2r = Scalar(0.5) * forcemag_divr;
 198             virial[0] += dx.x * dx.x * force_div2r; // xx
 199             virial[1] += dx.x * dx.y * force_div2r; // xy
 200             virial[2] += dx.x * dx.z * force_div2r; // xz
 201             virial[3] += dx.y * dx.y * force_div2r; // yy
 202             virial[4] += dx.y * dx.z * force_div2r; // yz
 203             virial[5] += dx.z * dx.z * force_div2r; // zz
 204
 205             // add up the force vector components (FLOPS: 7)
 206             force.x += dx.x * forcemag_divr;
 207             force.y += dx.y * forcemag_divr;
 208             force.z += dx.z * forcemag_divr;
 209             force.w += bond_eng * 0.5f;
 210             }
 211         else
 212             {
 213             *d_flags = 1;
 214             }
 215         }
 216
 217
 218     // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes);
 219     d_force[idx] = force;
 220     for (unsigned int i = 0; i < 6 ; i++)
 221         d_virial[i*virial_pitch + idx] = virial[i];
 222     }
 223
 224
 225 /*! \param d_force Device memory to write computed forces
 226     \param d_virial Device memory to write computed virials
 227     \param virial_pitch pitch of 2D virial array
 228     \param N number of particles
 229     \param d_pos particle positions on the device
 230     \param box Box dimensions used to implement periodic boundary conditions
 231     \param blist List of bonds stored on the GPU
 232     \param pitch Pitch of 2D bond list
 233     \param n_bonds_list List of numbers of bonds stored on the GPU
 234     \param n_bond_type number of bond types
 235     \param d_tables Tables of the potential and force
 236     \param d_params Parameters for each table associated with a type pair
 237     \param table_width Number of entries in the table
 238     \param table_value indexer helper
 239     \param d_flags flags on the device - a 1 will be written if evaluation
 240                    of forces failed for any bond
 241     \param block_size Block size at which to run the kernel
 242     \param compute_capability Compute capability of the execution device (200, 3000, 350, ...)
 243
 244     \note This is just a kernel driver. See gpu_compute_bondtable_forces_kernel for full documentation.
 245 */
 246 cudaError_t gpu_compute_bondtable_forces(Scalar4* d_force,
 247                                      Scalar* d_virial,
 248                                      const unsigned int virial_pitch,
 249                                      const unsigned int N,
 250                                      const Scalar4 *d_pos,
 251                                      const BoxDim &box,
 252                                      const group_storage<2> *blist,
 253                                      const unsigned int pitch,
 254                                      const unsigned int *n_bonds_list,
 255                                      const unsigned int n_bond_type,
 256                                      const Scalar2 *d_tables,
 257                                      const Scalar4 *d_params,
 258                                      const unsigned int table_width,
 259                                      const Index2D &table_value,
 260                                      unsigned int *d_flags,
 261                                      const unsigned int block_size,
 262                                      const unsigned int compute_capability)
 263     {
 264     assert(d_params);
 265     assert(d_tables);
 266     assert(n_bond_type > 0);
 267     assert(table_width > 1);
 268
 269     static unsigned int max_block_size = UINT_MAX;
 270     if (max_block_size == UINT_MAX)
 271         {
 272         cudaFuncAttributes attr;
 273         cudaFuncGetAttributes(&attr, (const void *)gpu_compute_bondtable_forces_kernel);
 274         max_block_size = attr.maxThreadsPerBlock;
 275         }
 276
 277     unsigned int run_block_size = min(block_size, max_block_size);
 278
 279     // setup the grid to run the kernel
 280     dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
 281     dim3 threads(run_block_size, 1, 1);
 282
 283     // bind the tables texture only on pre sm 35 arches
 284     if (compute_capability < 350)
 285         {
 286         tables_tex.normalized = false;
 287         tables_tex.filterMode = cudaFilterModePoint;
 288         cudaError_t error = cudaBindTexture(0, tables_tex, d_tables, sizeof(Scalar2) * table_value.getNumElements());
 289         if (error != cudaSuccess)
 290             return error;
 291         }
 292
 293     gpu_compute_bondtable_forces_kernel<<< grid, threads, sizeof(Scalar4)*n_bond_type >>>
 294             (d_force,
 295              d_virial,
 296              virial_pitch,
 297              N,
 298              d_pos,
 299              box,
 300              blist,
 301              pitch,
 302              n_bonds_list,
 303              n_bond_type,
 304              d_tables,
 305              d_params,
 306              table_value,
 307              d_flags);
 308
 309     return cudaSuccess;
 310     }
 311
 312 // vim:syntax=cpp