2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: dnlebard
52 #include "CGCMMAngleForceGPU.cuh"
53 #include "TextureTools.h"
61 // small number. cutoff for igoring the angle as being ill defined.
62 #define SMALL Scalar(0.001)
64 /*! \file CGCMMAngleForceGPU.cu
65 \brief Defines GPU kernel code for calculating the CGCMM angle forces. Used by CGCMMAngleForceComputeGPU.
68 //! Texture for reading angle parameters
69 scalar2_tex_t angle_params_tex;
71 //! Texture for reading angle CGCMM S-R parameters
72 scalar2_tex_t angle_CGCMMsr_tex; // MISSING EPSILON!!! sigma=.x, rcut=.y
74 //! Texture for reading angle CGCMM Epsilon-pow/pref parameters
75 scalar4_tex_t angle_CGCMMepow_tex; // now with EPSILON=.x, pow1=.y, pow2=.z, pref=.w
77 //! Kernel for caculating CGCMM angle forces on the GPU
78 /*! \param d_force Device memory to write computed forces
79 \param d_virial Device memory to write computed virials
80 \param virial_pitch pitch of 2D virial array
81 \param N number of particles
82 \param d_pos particle positions on the device
83 \param box Box dimensions for periodic boundary condition handling
84 \param alist Angle data to use in calculating the forces
85 \param pitch Pitch of 2D angles list
86 \param n_angles_list List of numbers of angles stored on the GPU
88 extern "C" __global__ void gpu_compute_CGCMM_angle_forces_kernel(Scalar4* d_force,
90 const unsigned int virial_pitch,
94 const group_storage<3> *alist,
95 const unsigned int *apos_list,
96 const unsigned int pitch,
97 const unsigned int *n_angles_list,
100 Scalar4 *d_CGCMMepow)
102 // start by identifying which particle we are to handle
103 int idx = blockIdx.x * blockDim.x + threadIdx.x;
108 // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
109 int n_angles =n_angles_list[idx];
111 // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
112 Scalar4 idx_postype = d_pos[idx]; // we can be either a, b, or c in the a-b-c triplet
113 Scalar3 idx_pos = make_scalar3(idx_postype.x, idx_postype.y, idx_postype.z);
114 Scalar3 a_pos,b_pos,c_pos; // allocate space for the a,b, and c atom in the a-b-c triplet
116 // initialize the force to 0
117 Scalar4 force_idx = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
119 Scalar fab[3], fcb[3];
120 Scalar fac, eac, vac[6];
122 // initialize the virial to 0
123 Scalar virial_idx[6];
124 for (int i = 0; i < 6; i++)
125 virial_idx[i] = Scalar(0.0);
127 // loop over all angles
128 for (int angle_idx = 0; angle_idx < n_angles; angle_idx++)
130 group_storage<3> cur_angle = alist[pitch*angle_idx + idx];
132 int cur_angle_x_idx = cur_angle.idx[0];
133 int cur_angle_y_idx = cur_angle.idx[1];
135 // store the a and c positions to accumlate their forces
136 int cur_angle_type = cur_angle.idx[2];
137 int cur_angle_abc = apos_list[pitch*angle_idx + idx];
139 // get the a-particle's position (MEM TRANSFER: 16 bytes)
140 Scalar4 x_postype = d_pos[cur_angle_x_idx];
141 Scalar3 x_pos = make_scalar3(x_postype.x, x_postype.y, x_postype.z);
142 // get the c-particle's position (MEM TRANSFER: 16 bytes)
143 Scalar4 y_postype = d_pos[cur_angle_y_idx];
144 Scalar3 y_pos = make_scalar3(y_postype.x, y_postype.y, y_postype.z);
146 if (cur_angle_abc == 0)
152 if (cur_angle_abc == 1)
158 if (cur_angle_abc == 2)
165 // calculate dr for a-b,c-b,and a-c
166 Scalar3 dab = a_pos - b_pos;
167 Scalar3 dcb = c_pos - b_pos;
168 Scalar3 dac = a_pos - c_pos;
170 // apply periodic boundary conditions
171 dab = box.minImage(dab);
172 dcb = box.minImage(dcb);
173 dac = box.minImage(dac);
175 // get the angle parameters (MEM TRANSFER: 8 bytes)
176 Scalar2 params = texFetchScalar2(d_params, angle_params_tex, cur_angle_type);
178 Scalar t_0 = params.y;
180 Scalar rsqab = dot(dab, dab);
181 Scalar rab = sqrtf(rsqab);
182 Scalar rsqcb = dot(dcb, dcb);;
183 Scalar rcb = sqrtf(rsqcb);
184 Scalar rsqac = dot(dac, dac);
185 Scalar rac = sqrtf(rsqac);
187 Scalar c_abbc = dot(dab, dcb);
190 if (c_abbc > Scalar(1.0)) c_abbc = Scalar(1.0);
191 if (c_abbc < -Scalar(1.0)) c_abbc = -Scalar(1.0);
193 Scalar s_abbc = sqrtf(Scalar(1.0) - c_abbc*c_abbc);
194 if (s_abbc < SMALL) s_abbc = SMALL;
195 s_abbc = Scalar(1.0)/s_abbc;
197 //////////////////////////////////////////
198 // THIS CODE DOES THE 1-3 LJ repulsions //
199 //////////////////////////////////////////////////////////////////////////////
202 for (int i=0; i < 6; i++)
203 vac[i] = Scalar(0.0);
205 // get the angle E-S-R parameters (MEM TRANSFER: 12 bytes)
206 const Scalar2 cgSR = texFetchScalar2(d_CGCMMsr, angle_CGCMMsr_tex, cur_angle_type);
208 Scalar cgsigma = cgSR.x;
209 Scalar cgrcut = cgSR.y;
213 const Scalar4 cgEPOW = texFetchScalar4(d_CGCMMepow, angle_CGCMMepow_tex, cur_angle_type);
215 // get the angle pow/pref parameters (MEM TRANSFER: 12 bytes)
216 Scalar cgeps = cgEPOW.x;
217 Scalar cgpow1 = cgEPOW.y;
218 Scalar cgpow2 = cgEPOW.z;
219 Scalar cgpref = cgEPOW.w;
221 Scalar cgratio = cgsigma/rac;
222 // INTERESTING NOTE: POW has weird behavior depending
223 // on the inputted parameters. Try sigma=2.05, versus sigma=0.05
224 // in cgcmm_angle_force_test.cc 4 particle test
225 fac = cgpref*cgeps / rsqac * (cgpow1*fast::pow(cgratio,cgpow1) - cgpow2*fast::pow(cgratio,cgpow2));
226 eac = cgeps + cgpref*cgeps * (fast::pow(cgratio,cgpow1) - fast::pow(cgratio,cgpow2));
228 vac[0] = fac * dac.x*dac.x;
229 vac[1] = fac * dac.x*dac.y;
230 vac[2] = fac * dac.x*dac.z;
231 vac[3] = fac * dac.y*dac.y;
232 vac[4] = fac * dac.y*dac.z;
233 vac[5] = fac * dac.z*dac.z;
235 //////////////////////////////////////////////////////////////////////////////
237 // actually calculate the force
238 Scalar dth = fast::acos(c_abbc) - t_0;
241 Scalar a = -Scalar(1.0) * tk * s_abbc;
242 Scalar a11 = a*c_abbc/rsqab;
243 Scalar a12 = -a / (rab*rcb);
244 Scalar a22 = a*c_abbc / rsqcb;
246 fab[0] = a11*dab.x + a12*dcb.x;
247 fab[1] = a11*dab.y + a12*dcb.y;
248 fab[2] = a11*dab.z + a12*dcb.z;
250 fcb[0] = a22*dcb.x + a12*dab.x;
251 fcb[1] = a22*dcb.y + a12*dab.y;
252 fcb[2] = a22*dcb.z + a12*dab.z;
254 // compute 1/3 of the energy, 1/3 for each atom in the angle
255 Scalar angle_eng = (Scalar(0.5)*tk*dth + eac)*Scalar(Scalar(1.0)/Scalar(3.0));
257 Scalar angle_virial[6];
258 angle_virial[0] = (1.f/3.f) * ( dab.x*fab[0] + dcb.x*fcb[0] );
259 angle_virial[1] = (1.f/3.f) * ( dab.y*fab[0] + dcb.y*fcb[0] );
260 angle_virial[2] = (1.f/3.f) * ( dab.z*fab[0] + dcb.z*fcb[0] );
261 angle_virial[3] = (1.f/3.f) * ( dab.y*fab[1] + dcb.y*fcb[1] );
262 angle_virial[4] = (1.f/3.f) * ( dab.z*fab[1] + dcb.z*fcb[1] );
263 angle_virial[5] = (1.f/3.f) * ( dab.z*fab[2] + dcb.z*fcb[2] );
265 for (int i = 0; i < 6; i++)
266 angle_virial[i] += (1.f/3.f)*vac[i];
268 if (cur_angle_abc == 0)
270 force_idx.x += fab[0] + fac*dac.x;
271 force_idx.y += fab[1] + fac*dac.y;
272 force_idx.z += fab[2] + fac*dac.z;
274 if (cur_angle_abc == 1)
276 force_idx.x -= fab[0] + fcb[0];
277 force_idx.y -= fab[1] + fcb[1];
278 force_idx.z -= fab[2] + fcb[2];
280 if (cur_angle_abc == 2)
282 force_idx.x += fcb[0] - fac*dac.x;
283 force_idx.y += fcb[1] - fac*dac.y;
284 force_idx.z += fcb[2] - fac*dac.z;
287 force_idx.w += angle_eng;
288 for (int i = 0; i < 6; i++)
289 virial_idx[i] += angle_virial[i];
292 // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
293 d_force[idx] = force_idx;
294 for (int i = 0; i < 6; i++)
295 d_virial[i*virial_pitch+idx] = virial_idx[i];
298 /*! \param d_force Device memory to write computed forces
299 \param d_virial Device memory to write computed virials
300 \param virial_pitch pitch of 2D virial array
301 \param N number of particles
302 \param d_pos particle positions on the device
303 \param box Box dimensions (in GPU format) to use for periodic boundary conditions
304 \param atable List of angles stored on the GPU
305 \param pitch Pitch of 2D angles list
306 \param n_angles_list List of numbers of angles stored on the GPU
307 \param d_params K and t_0 params packed as Scalar2 variables
308 \param d_CGCMMsr sigma, and rcut packed as a Scalar2
309 \param d_CGCMMepow epsilon, pow1, pow2, and prefactor packed as a Scalar4
310 \param n_angle_types Number of angle types in d_params
311 \param block_size Block size to use when performing calculations
312 \param compute_capability Compute capability of the device (200, 300, 350, ...)
314 \returns Any error code resulting from the kernel launch
315 \note Always returns cudaSuccess in release builds to avoid the cudaThreadSynchronize()
317 \a d_params should include one Scalar2 element per angle type. The x component contains K the spring constant
318 and the y component contains t_0 the equilibrium angle.
320 cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
322 const unsigned int virial_pitch,
323 const unsigned int N,
324 const Scalar4 *d_pos,
326 const group_storage<3> *atable,
327 const unsigned int *apos_list,
328 const unsigned int pitch,
329 const unsigned int *n_angles_list,
332 Scalar4 *d_CGCMMepow,
333 unsigned int n_angle_types,
335 const unsigned int compute_capability)
341 static unsigned int max_block_size = UINT_MAX;
342 if (max_block_size == UINT_MAX)
344 cudaFuncAttributes attr;
345 cudaFuncGetAttributes(&attr, (const void *)gpu_compute_CGCMM_angle_forces_kernel);
346 max_block_size = attr.maxThreadsPerBlock;
349 unsigned int run_block_size = min(block_size, max_block_size);
351 // setup the grid to run the kernel
352 dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
353 dim3 threads(run_block_size, 1, 1);
355 // bind the textures on pre sm 35 arches
356 if (compute_capability < 350)
358 cudaError_t error = cudaBindTexture(0, angle_params_tex, d_params, sizeof(Scalar2) * n_angle_types);
359 if (error != cudaSuccess)
362 error = cudaBindTexture(0, angle_CGCMMsr_tex, d_CGCMMsr, sizeof(Scalar2) * n_angle_types);
363 if (error != cudaSuccess)
366 error = cudaBindTexture(0, angle_CGCMMepow_tex, d_CGCMMepow, sizeof(Scalar4) * n_angle_types);
367 if (error != cudaSuccess)
372 gpu_compute_CGCMM_angle_forces_kernel<<< grid, threads>>>(d_force,