2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: joaander
52 #include "BondTablePotentialGPU.cuh"
53 #include "TextureTools.h"
62 /*! \file BondTablePotentialGPU.cu
63 \brief Defines GPU kernel code for calculating the table bond forces. Used by BondTablePotentialGPU.
67 //! Texture for reading table values
68 scalar2_tex_t tables_tex;
70 /*! This kernel is called to calculate the table pair forces on all N particles
72 \param d_force Device memory to write computed forces
73 \param d_virial Device memory to write computed virials
74 \param virial_pitch Pitch of 2D virial array
75 \param N number of particles in system
76 \param d_pos device array of particle positions
77 \param box Box dimensions used to implement periodic boundary conditions
78 \param blist List of bonds stored on the GPU
79 \param pitch Pitch of 2D bond list
80 \param n_bonds_list List of numbers of bonds stored on the GPU
81 \param n_bond_type number of bond types
82 \param d_params Parameters for each table associated with a type pair
83 \param table_value index helper function
84 \param d_flags Flag allocated on the device for use in checking for bonds that cannot be evaluated
86 See BondTablePotential for information on the memory layout.
89 * Table entries are read from tables_tex. Note that currently this is bound to a 1D memory region. Performance tests
90 at a later date may result in this changing.
92 __global__ void gpu_compute_bondtable_forces_kernel(Scalar4* d_force,
94 const unsigned int virial_pitch,
98 const group_storage<2> *blist,
99 const unsigned int pitch,
100 const unsigned int *n_bonds_list,
101 const unsigned int n_bond_type,
102 const Scalar2 *d_tables,
103 const Scalar4 *d_params,
104 const Index2D table_value,
105 unsigned int *d_flags)
109 // read in params for easy and fast access in the kernel
110 extern __shared__ Scalar4 s_params[];
111 for (unsigned int cur_offset = 0; cur_offset < n_bond_type; cur_offset += blockDim.x)
113 if (cur_offset + threadIdx.x < n_bond_type)
114 s_params[cur_offset + threadIdx.x] = d_params[cur_offset + threadIdx.x];
119 // start by identifying which particle we are to handle
120 unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
125 // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
126 int n_bonds =n_bonds_list[idx];
128 // read in the position of our particle.
129 Scalar4 postype = d_pos[idx];
130 Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
132 // initialize the force to 0
133 Scalar4 force = make_scalar4(0.0f, 0.0f, 0.0f, 0.0f);
134 // initialize the virial tensor to 0
136 for (unsigned int i = 0; i < 6; i++)
139 // loop over neighbors
140 for (int bond_idx = 0; bond_idx < n_bonds; bond_idx++)
142 // MEM TRANSFER: 8 bytes
143 group_storage<2> cur_bond = blist[pitch*bond_idx + idx];
145 int cur_bond_idx = cur_bond.idx[0];
146 int cur_bond_type = cur_bond.idx[1];
148 // get the bonded particle's position (MEM_TRANSFER: 16 bytes)
149 Scalar4 neigh_postype = d_pos[cur_bond_idx];
150 Scalar3 neigh_pos = make_scalar3(neigh_postype.x, neigh_postype.y, neigh_postype.z);
152 // calculate dr (FLOPS: 3)
153 Scalar3 dx = pos - neigh_pos;
155 // apply periodic boundary conditions (FLOPS: 12)
156 dx = box.minImage(dx);
158 // access needed parameters
159 Scalar4 params = s_params[cur_bond_type];
160 Scalar rmin = params.x;
161 Scalar rmax = params.y;
162 Scalar delta_r = params.z;
165 Scalar rsq = dot(dx, dx);
166 Scalar r = sqrtf(rsq);
168 if (r < rmax && r >= rmin)
171 Scalar value_f = (r - rmin) / delta_r;
173 // compute index into the table and read in values
174 unsigned int value_i = floor(value_f);
176 Scalar2 VF0 = texFetchScalar2(d_tables, tables_tex, table_value(value_i, cur_bond_type));
177 Scalar2 VF1 = texFetchScalar2(d_tables, tables_tex, table_value(value_i+1, cur_bond_type));
184 // compute the linear interpolation coefficient
185 Scalar f = value_f - Scalar(value_i);
187 // interpolate to get V and F;
188 Scalar V = V0 + f * (V1 - V0);
189 Scalar F = F0 + f * (F1 - F0);
191 // convert to standard variables used by the other pair computes in HOOMD-blue
192 Scalar forcemag_divr = 0.0f;
194 forcemag_divr = F / r;
196 // calculate the virial
197 Scalar force_div2r = Scalar(0.5) * forcemag_divr;
198 virial[0] += dx.x * dx.x * force_div2r; // xx
199 virial[1] += dx.x * dx.y * force_div2r; // xy
200 virial[2] += dx.x * dx.z * force_div2r; // xz
201 virial[3] += dx.y * dx.y * force_div2r; // yy
202 virial[4] += dx.y * dx.z * force_div2r; // yz
203 virial[5] += dx.z * dx.z * force_div2r; // zz
205 // add up the force vector components (FLOPS: 7)
206 force.x += dx.x * forcemag_divr;
207 force.y += dx.y * forcemag_divr;
208 force.z += dx.z * forcemag_divr;
209 force.w += bond_eng * 0.5f;
218 // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes);
219 d_force[idx] = force;
220 for (unsigned int i = 0; i < 6 ; i++)
221 d_virial[i*virial_pitch + idx] = virial[i];
225 /*! \param d_force Device memory to write computed forces
226 \param d_virial Device memory to write computed virials
227 \param virial_pitch pitch of 2D virial array
228 \param N number of particles
229 \param d_pos particle positions on the device
230 \param box Box dimensions used to implement periodic boundary conditions
231 \param blist List of bonds stored on the GPU
232 \param pitch Pitch of 2D bond list
233 \param n_bonds_list List of numbers of bonds stored on the GPU
234 \param n_bond_type number of bond types
235 \param d_tables Tables of the potential and force
236 \param d_params Parameters for each table associated with a type pair
237 \param table_width Number of entries in the table
238 \param table_value indexer helper
239 \param d_flags flags on the device - a 1 will be written if evaluation
240 of forces failed for any bond
241 \param block_size Block size at which to run the kernel
242 \param compute_capability Compute capability of the execution device (200, 3000, 350, ...)
244 \note This is just a kernel driver. See gpu_compute_bondtable_forces_kernel for full documentation.
246 cudaError_t gpu_compute_bondtable_forces(Scalar4* d_force,
248 const unsigned int virial_pitch,
249 const unsigned int N,
250 const Scalar4 *d_pos,
252 const group_storage<2> *blist,
253 const unsigned int pitch,
254 const unsigned int *n_bonds_list,
255 const unsigned int n_bond_type,
256 const Scalar2 *d_tables,
257 const Scalar4 *d_params,
258 const unsigned int table_width,
259 const Index2D &table_value,
260 unsigned int *d_flags,
261 const unsigned int block_size,
262 const unsigned int compute_capability)
266 assert(n_bond_type > 0);
267 assert(table_width > 1);
269 static unsigned int max_block_size = UINT_MAX;
270 if (max_block_size == UINT_MAX)
272 cudaFuncAttributes attr;
273 cudaFuncGetAttributes(&attr, (const void *)gpu_compute_bondtable_forces_kernel);
274 max_block_size = attr.maxThreadsPerBlock;
277 unsigned int run_block_size = min(block_size, max_block_size);
279 // setup the grid to run the kernel
280 dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
281 dim3 threads(run_block_size, 1, 1);
283 // bind the tables texture only on pre sm 35 arches
284 if (compute_capability < 350)
286 tables_tex.normalized = false;
287 tables_tex.filterMode = cudaFilterModePoint;
288 cudaError_t error = cudaBindTexture(0, tables_tex, d_tables, sizeof(Scalar2) * table_value.getNumElements());
289 if (error != cudaSuccess)
293 gpu_compute_bondtable_forces_kernel<<< grid, threads, sizeof(Scalar4)*n_bond_type >>>