2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: phillicl
52 #include "TableDihedralForceGPU.cuh"
53 #include "TextureTools.h"
55 #include "VectorMath.h"
63 // SMALL a relatively small number
66 /*! \file TableDihedralForceGPU.cu
67 \brief Defines GPU kernel code for calculating the table dihedral forces. Used by TableDihedralForceComputeGPU.
71 //! Texture for reading table values
72 scalar2_tex_t tables_tex;
74 /*! This kernel is called to calculate the table dihedral forces on all triples this is defined or
76 \param d_force Device memory to write computed forces
77 \param d_virial Device memory to write computed virials
78 \param virial_pitch Pitch of 2D virial array
79 \param N number of particles in system
80 \param device_pos device array of particle positions
81 \param box Box dimensions used to implement periodic boundary conditions
82 \param dlist List of dihedrals stored on the GPU
83 \param pitch Pitch of 2D dihedral list
84 \param n_dihedrals_list List of numbers of dihedrals stored on the GPU
85 \param n_dihedral_type number of dihedral types
86 \param d_tables Tables of the potential and force
87 \param table_value index helper function
88 \param delta_phi dihedral delta of the table
90 See TableDihedralForceCompute for information on the memory layout.
93 * Table entries are read from tables_tex. Note that currently this is bound to a 1D memory region. Performance tests
94 at a later date may result in this changing.
96 __global__ void gpu_compute_table_dihedral_forces_kernel(Scalar4* d_force,
98 const unsigned int virial_pitch,
100 const Scalar4 *device_pos,
102 const group_storage<4> *dlist,
103 const unsigned int *dihedral_ABCD,
104 const unsigned int pitch,
105 const unsigned int *n_dihedrals_list,
106 const Scalar2 *d_tables,
107 const Index2D table_value,
108 const Scalar delta_phi)
112 // start by identifying which particle we are to handle
113 unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
118 // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
119 int n_dihedrals =n_dihedrals_list[idx];
121 // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
122 Scalar4 idx_postype = device_pos[idx]; // we can be either a, b, or c in the a-b-c triplet
123 Scalar3 idx_pos = make_scalar3(idx_postype.x, idx_postype.y, idx_postype.z);
124 Scalar3 pos_a,pos_b,pos_c, pos_d; // allocate space for the a,b,c, and d atom in the a-b-c-d set
127 // initialize the force to 0
128 Scalar4 force_idx = make_scalar4(0.0f, 0.0f, 0.0f, 0.0f);
130 // initialize the virial tensor to 0
131 Scalar virial_idx[6];
132 for (unsigned int i = 0; i < 6; i++)
135 for (int dihedral_idx = 0; dihedral_idx < n_dihedrals; dihedral_idx++)
137 group_storage<4> cur_dihedral = dlist[pitch*dihedral_idx + idx];
138 unsigned int cur_ABCD = dihedral_ABCD[pitch*dihedral_idx + idx];
140 int cur_dihedral_x_idx = cur_dihedral.idx[0];
141 int cur_dihedral_y_idx = cur_dihedral.idx[1];
142 int cur_dihedral_z_idx = cur_dihedral.idx[2];
143 int cur_dihedral_type = cur_dihedral.idx[3];
144 int cur_dihedral_abcd = cur_ABCD;
146 // get the a-particle's position (MEM TRANSFER: 16 bytes)
147 Scalar4 x_postype = device_pos[cur_dihedral_x_idx];
148 Scalar3 x_pos = make_scalar3(x_postype.x, x_postype.y, x_postype.z);
149 // get the c-particle's position (MEM TRANSFER: 16 bytes)
150 Scalar4 y_postype = device_pos[cur_dihedral_y_idx];
151 Scalar3 y_pos = make_scalar3(y_postype.x, y_postype.y, y_postype.z);
152 // get the d-particle's position (MEM TRANSFER: 16 bytes)
153 Scalar4 z_postype = device_pos[cur_dihedral_z_idx];
154 Scalar3 z_pos = make_scalar3(z_postype.x, z_postype.y, z_postype.z);
156 if (cur_dihedral_abcd == 0)
163 if (cur_dihedral_abcd == 1)
170 if (cur_dihedral_abcd == 2)
177 if (cur_dihedral_abcd == 3)
185 // calculate dr for a-b,c-b,and a-c
186 Scalar3 dab = pos_a - pos_b;
187 Scalar3 dcb = pos_c - pos_b;
188 Scalar3 ddc = pos_d - pos_c;
190 dab = box.minImage(dab);
191 dcb = box.minImage(dcb);
192 ddc = box.minImage(ddc);
195 dcbm = box.minImage(dcbm);
198 Scalar sb1 = Scalar(1.0) / (dab.x*dab.x + dab.y*dab.y + dab.z*dab.z);
199 Scalar sb3 = Scalar(1.0) / (ddc.x*ddc.x + ddc.y*ddc.y + ddc.z*ddc.z);
201 Scalar rb1 = fast::sqrt(sb1);
202 Scalar rb3 = fast::sqrt(sb3);
204 Scalar c0 = (dab.x*ddc.x + dab.y*ddc.y + dab.z*ddc.z) * rb1*rb3;
208 Scalar b1mag2 = dab.x*dab.x + dab.y*dab.y + dab.z*dab.z;
209 Scalar b1mag = fast::sqrt(b1mag2);
210 Scalar b2mag2 = dcb.x*dcb.x + dcb.y*dcb.y + dcb.z*dcb.z;
211 Scalar b2mag = fast::sqrt(b2mag2);
212 Scalar b3mag2 = ddc.x*ddc.x + ddc.y*ddc.y + ddc.z*ddc.z;
213 Scalar b3mag = fast::sqrt(b3mag2);
215 Scalar ctmp = dab.x*dcb.x + dab.y*dcb.y + dab.z*dcb.z;
216 Scalar r12c1 = Scalar(1.0) / (b1mag*b2mag);
217 Scalar c1mag = ctmp * r12c1;
219 ctmp = dcbm.x*ddc.x + dcbm.y*ddc.y + dcbm.z*ddc.z;
220 Scalar r12c2 = Scalar(1.0) / (b2mag*b3mag);
221 Scalar c2mag = ctmp * r12c2;
223 // cos and sin of 2 angles and final c
225 Scalar sin2 = Scalar(1.0) - c1mag*c1mag;
226 if (sin2 < 0.0f) sin2 = 0.0f;
227 Scalar sc1 = fast::sqrt(sin2);
228 if (sc1 < SMALL) sc1 = SMALL;
229 sc1 = Scalar(1.0)/sc1;
231 sin2 = Scalar(1.0) - c2mag*c2mag;
232 if (sin2 < 0.0f) sin2 = 0.0f;
233 Scalar sc2 = fast::sqrt(sin2);
234 if (sc2 < SMALL) sc2 = SMALL;
235 sc2 = Scalar(1.0)/sc2;
237 Scalar s12 = sc1 * sc2;
238 Scalar c = (c0 + c1mag*c2mag) * s12;
240 if (c > Scalar(1.0)) c = Scalar(1.0);
241 if (c < -Scalar(1.0)) c = -Scalar(1.0);
244 Scalar det = dot(dab,make_scalar3(ddc.y*dcb.z-ddc.z*dcb.y,
245 ddc.z*dcb.x-ddc.x*dcb.z,
246 ddc.x*dcb.y-ddc.y*dcb.x));
249 Scalar phi = acosf(c);
251 if (det < 0) phi = -phi;
254 Scalar value_f = (Scalar(M_PI)+phi) / delta_phi;
256 // compute index into the table and read in values
257 unsigned int value_i = value_f;
258 Scalar2 VT0 = texFetchScalar2(d_tables, tables_tex, table_value(value_i, cur_dihedral_type));
259 Scalar2 VT1 = texFetchScalar2(d_tables, tables_tex, table_value(value_i+1, cur_dihedral_type));
266 // compute the linear interpolation coefficient
267 Scalar f = value_f - Scalar(value_i);
269 // interpolate to get V and T;
270 Scalar V = V0 + f * (V1 - V0);
271 Scalar T = T0 + f * (T1 - T0);
273 // from Blondel and Karplus 1995
274 vec3<Scalar> A = cross(vec3<Scalar>(dab),vec3<Scalar>(dcbm));
275 Scalar Asq = dot(A,A);
277 vec3<Scalar> B = cross(vec3<Scalar>(ddc),vec3<Scalar>(dcbm));
278 Scalar Bsq = dot(B,B);
280 Scalar3 f_a = -T*vec_to_scalar3(b2mag/Asq*A);
281 Scalar3 f_b = -f_a + T/b2mag*vec_to_scalar3(dot(dab,dcbm)/Asq*A-dot(ddc,dcbm)/Bsq*B);
282 Scalar3 f_c = T*vec_to_scalar3(dot(ddc,dcbm)/Bsq/b2mag*B-dot(dab,dcbm)/Asq/b2mag*A-b2mag/Bsq*B);
283 Scalar3 f_d = T*b2mag/Bsq*vec_to_scalar3(B);
285 // Now, apply the force to each individual atom a,b,c,d
286 // and accumlate the energy/virial
288 // compute 1/4 of the energy, 1/4 for each atom in the dihedral
289 Scalar dihedral_eng = V*Scalar(1.0/4.0);
291 // compute 1/4 of the virial, 1/4 for each atom in the dihedral
292 // upper triangular version of virial tensor
293 Scalar dihedral_virial[6];
295 dihedral_virial[0] = (1./4.)*(dab.x*f_a.x + dcb.x*f_c.x + (ddc.x+dcb.x)*f_d.x);
296 dihedral_virial[1] = (1./4.)*(dab.y*f_a.x + dcb.y*f_c.x + (ddc.y+dcb.y)*f_d.x);
297 dihedral_virial[2] = (1./4.)*(dab.z*f_a.x + dcb.z*f_c.x + (ddc.z+dcb.z)*f_d.x);
298 dihedral_virial[3] = (1./4.)*(dab.y*f_a.y + dcb.y*f_c.y + (ddc.y+dcb.y)*f_d.y);
299 dihedral_virial[4] = (1./4.)*(dab.z*f_a.y + dcb.z*f_c.y + (ddc.z+dcb.z)*f_d.y);
300 dihedral_virial[5] = (1./4.)*(dab.z*f_a.z + dcb.z*f_c.z + (ddc.z+dcb.z)*f_d.z);
302 if (cur_dihedral_abcd == 0)
304 force_idx.x += f_a.x;
305 force_idx.y += f_a.y;
306 force_idx.z += f_a.z;
308 if (cur_dihedral_abcd == 1)
310 force_idx.x += f_b.x;
311 force_idx.y += f_b.y;
312 force_idx.z += f_b.z;
314 if (cur_dihedral_abcd == 2)
316 force_idx.x += f_c.x;
317 force_idx.y += f_c.y;
318 force_idx.z += f_c.z;
320 if (cur_dihedral_abcd == 3)
322 force_idx.x += f_d.x;
323 force_idx.y += f_d.y;
324 force_idx.z += f_d.z;
327 force_idx.w += dihedral_eng;
328 for (int k = 0; k < 6; k++)
329 virial_idx[k] += dihedral_virial[k];
332 // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
333 d_force[idx] = force_idx;
334 for (int k = 0; k < 6; k++)
335 d_virial[k*virial_pitch+idx] = virial_idx[k];
339 /*! \param d_force Device memory to write computed forces
340 \param d_virial Device memory to write computed virials
341 \param virial_pitch pitch of 2D virial array
342 \param N number of particles
343 \param device_pos particle positions on the device
344 \param box Box dimensions used to implement periodic boundary conditions
345 \param dlist List of dihedrals stored on the GPU
346 \param pitch Pitch of 2D dihedral list
347 \param n_dihedrals_list List of numbers of dihedrals stored on the GPU
348 \param n_dihedral_type number of dihedral types
349 \param d_tables Tables of the potential and force
350 \param table_width Number of points in each table
351 \param table_value indexer helper
352 \param block_size Block size at which to run the kernel
353 \param compute_capability Compute capability of the device (200, 300, 350, ...)
355 \note This is just a kernel driver. See gpu_compute_table_dihedral_forces_kernel for full documentation.
357 cudaError_t gpu_compute_table_dihedral_forces(Scalar4* d_force,
359 const unsigned int virial_pitch,
360 const unsigned int N,
361 const Scalar4 *device_pos,
363 const group_storage<4> *dlist,
364 const unsigned int *dihedral_ABCD,
365 const unsigned int pitch,
366 const unsigned int *n_dihedrals_list,
367 const Scalar2 *d_tables,
368 const unsigned int table_width,
369 const Index2D &table_value,
370 const unsigned int block_size,
371 const unsigned int compute_capability)
374 assert(table_width > 1);
376 static unsigned int max_block_size = UINT_MAX;
377 if (max_block_size == UINT_MAX)
379 cudaFuncAttributes attr;
380 cudaFuncGetAttributes(&attr, (const void *)gpu_compute_table_dihedral_forces_kernel);
381 max_block_size = attr.maxThreadsPerBlock;
384 unsigned int run_block_size = min(block_size, max_block_size);
386 // setup the grid to run the kernel
387 dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
388 dim3 threads(run_block_size, 1, 1);
390 // bind the tables texture on pre sm35 devices
391 if (compute_capability < 350)
393 tables_tex.normalized = false;
394 tables_tex.filterMode = cudaFilterModePoint;
395 cudaError_t error = cudaBindTexture(0, tables_tex, d_tables, sizeof(Scalar2) * table_value.getNumElements());
396 if (error != cudaSuccess)
400 Scalar delta_phi = Scalar(2.0*M_PI)/(Scalar)(table_width - 1);
402 gpu_compute_table_dihedral_forces_kernel<<< grid, threads>>>