2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: dnlebard
52 #include "HarmonicAngleForceGPU.cuh"
53 #include "TextureTools.h"
61 // SMALL a relatively small number
62 #define SMALL Scalar(0.001)
64 /*! \file HarmonicAngleForceGPU.cu
65 \brief Defines GPU kernel code for calculating the harmonic angle forces. Used by HarmonicAngleForceComputeGPU.
68 //! Texture for reading angle parameters
69 scalar2_tex_t angle_params_tex;
71 //! Kernel for caculating harmonic angle forces on the GPU
72 /*! \param d_force Device memory to write computed forces
73 \param d_virial Device memory to write computed virials
74 \param virial_pitch Pitch of 2D virial array
75 \param N number of particles
76 \param d_pos device array of particle positions
77 \param d_params Parameters for the angle force
78 \param box Box dimensions for periodic boundary condition handling
79 \param alist Angle data to use in calculating the forces
80 \param pitch Pitch of 2D angles list
81 \param n_angles_list List of numbers of angles stored on the GPU
83 extern "C" __global__ void gpu_compute_harmonic_angle_forces_kernel(Scalar4* d_force,
85 const unsigned int virial_pitch,
88 const Scalar2 *d_params,
90 const group_storage<3> *alist,
91 const unsigned int *apos_list,
92 const unsigned int pitch,
93 const unsigned int *n_angles_list)
95 // start by identifying which particle we are to handle
96 int idx = blockIdx.x * blockDim.x + threadIdx.x;
101 // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
102 int n_angles = n_angles_list[idx];
104 // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
105 Scalar4 idx_postype = d_pos[idx]; // we can be either a, b, or c in the a-b-c triplet
106 Scalar3 idx_pos = make_scalar3(idx_postype.x, idx_postype.y, idx_postype.z);
107 Scalar3 a_pos,b_pos,c_pos; // allocate space for the a,b, and c atom in the a-b-c triplet
109 // initialize the force to 0
110 Scalar4 force_idx = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
112 Scalar fab[3], fcb[3];
114 // initialize the virial to 0
116 for (int i = 0; i < 6; i++)
117 virial[i] = Scalar(0.0);
119 // loop over all angles
120 for (int angle_idx = 0; angle_idx < n_angles; angle_idx++)
122 group_storage<3> cur_angle = alist[pitch*angle_idx + idx];
124 int cur_angle_x_idx = cur_angle.idx[0];
125 int cur_angle_y_idx = cur_angle.idx[1];
126 int cur_angle_type = cur_angle.idx[2];
128 int cur_angle_abc = apos_list[pitch*angle_idx + idx];
130 // get the a-particle's position (MEM TRANSFER: 16 bytes)
131 Scalar4 x_postype = d_pos[cur_angle_x_idx];
132 Scalar3 x_pos = make_scalar3(x_postype.x, x_postype.y, x_postype.z);
133 // get the c-particle's position (MEM TRANSFER: 16 bytes)
134 Scalar4 y_postype = d_pos[cur_angle_y_idx];
135 Scalar3 y_pos = make_scalar3(y_postype.x, y_postype.y, y_postype.z);
137 if (cur_angle_abc == 0)
143 if (cur_angle_abc == 1)
149 if (cur_angle_abc == 2)
156 // calculate dr for a-b,c-b,and a-c
157 Scalar3 dab = a_pos - b_pos;
158 Scalar3 dcb = c_pos - b_pos;
159 Scalar3 dac = a_pos - c_pos;
161 // apply periodic boundary conditions
162 dab = box.minImage(dab);
163 dcb = box.minImage(dcb);
164 dac = box.minImage(dac);
166 // get the angle parameters (MEM TRANSFER: 8 bytes)
167 Scalar2 params = texFetchScalar2(d_params, angle_params_tex, cur_angle_type);
169 Scalar t_0 = params.y;
171 Scalar rsqab = dot(dab, dab);
172 Scalar rab = sqrtf(rsqab);
173 Scalar rsqcb = dot(dcb, dcb);
174 Scalar rcb = sqrtf(rsqcb);
176 Scalar c_abbc = dot(dab, dcb);
179 if (c_abbc > Scalar(1.0)) c_abbc = Scalar(1.0);
180 if (c_abbc < -Scalar(1.0)) c_abbc = -Scalar(1.0);
182 Scalar s_abbc = sqrtf(Scalar(1.0) - c_abbc*c_abbc);
183 if (s_abbc < SMALL) s_abbc = SMALL;
184 s_abbc = Scalar(1.0)/s_abbc;
186 // actually calculate the force
187 Scalar dth = fast::acos(c_abbc) - t_0;
190 Scalar a = -Scalar(1.0) * tk * s_abbc;
191 Scalar a11 = a*c_abbc/rsqab;
192 Scalar a12 = -a / (rab*rcb);
193 Scalar a22 = a*c_abbc / rsqcb;
195 fab[0] = a11*dab.x + a12*dcb.x;
196 fab[1] = a11*dab.y + a12*dcb.y;
197 fab[2] = a11*dab.z + a12*dcb.z;
199 fcb[0] = a22*dcb.x + a12*dab.x;
200 fcb[1] = a22*dcb.y + a12*dab.y;
201 fcb[2] = a22*dcb.z + a12*dab.z;
203 // compute 1/3 of the energy, 1/3 for each atom in the angle
204 Scalar angle_eng = tk*dth*Scalar(Scalar(1.0)/Scalar(6.0));
206 // upper triangular version of virial tensor
207 Scalar angle_virial[6];
208 angle_virial[0] = Scalar(1./3.)*(dab.x*fab[0] + dcb.x*fcb[0]);
209 angle_virial[1] = Scalar(1./3.)*(dab.y*fab[0] + dcb.y*fcb[0]);
210 angle_virial[2] = Scalar(1./3.)*(dab.z*fab[0] + dcb.z*fcb[0]);
211 angle_virial[3] = Scalar(1./3.)*(dab.y*fab[1] + dcb.y*fcb[1]);
212 angle_virial[4] = Scalar(1./3.)*(dab.z*fab[1] + dcb.z*fcb[1]);
213 angle_virial[5] = Scalar(1./3.)*(dab.z*fab[2] + dcb.z*fcb[2]);
216 if (cur_angle_abc == 0)
218 force_idx.x += fab[0];
219 force_idx.y += fab[1];
220 force_idx.z += fab[2];
222 if (cur_angle_abc == 1)
224 force_idx.x -= fab[0] + fcb[0];
225 force_idx.y -= fab[1] + fcb[1];
226 force_idx.z -= fab[2] + fcb[2];
228 if (cur_angle_abc == 2)
230 force_idx.x += fcb[0];
231 force_idx.y += fcb[1];
232 force_idx.z += fcb[2];
235 force_idx.w += angle_eng;
237 for (int i = 0; i < 6; i++)
238 virial[i] += angle_virial[i];
241 // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
242 d_force[idx] = force_idx;
243 for (int i = 0; i < 6; i++)
244 d_virial[i*virial_pitch+idx] = virial[i];
247 /*! \param d_force Device memory to write computed forces
248 \param d_virial Device memory to write computed virials
249 \param virial_pitch pitch of 2D virial arary
250 \param N number of particles
251 \param d_pos device array of particle positions
252 \param box Box dimensions (in GPU format) to use for periodic boundary conditions
253 \param atable List of angles stored on the GPU
254 \param pitch Pitch of 2D angles list
255 \param n_angles_list List of numbers of angles stored on the GPU
256 \param d_params K and t_0 params packed as Scalar2 variables
257 \param n_angle_types Number of angle types in d_params
258 \param block_size Block size to use when performing calculations
259 \param compute_capability Device compute capability (200, 300, 350, ...)
261 \returns Any error code resulting from the kernel launch
262 \note Always returns cudaSuccess in release builds to avoid the cudaThreadSynchronize()
264 \a d_params should include one Scalar2 element per angle type. The x component contains K the spring constant
265 and the y component contains t_0 the equilibrium angle.
267 cudaError_t gpu_compute_harmonic_angle_forces(Scalar4* d_force,
269 const unsigned int virial_pitch,
270 const unsigned int N,
271 const Scalar4 *d_pos,
273 const group_storage<3> *atable,
274 const unsigned int *apos_list,
275 const unsigned int pitch,
276 const unsigned int *n_angles_list,
278 unsigned int n_angle_types,
280 const unsigned int compute_capability)
284 static unsigned int max_block_size = UINT_MAX;
285 if (max_block_size == UINT_MAX)
287 cudaFuncAttributes attr;
288 cudaFuncGetAttributes(&attr, (const void *)gpu_compute_harmonic_angle_forces_kernel);
289 max_block_size = attr.maxThreadsPerBlock;
292 unsigned int run_block_size = min(block_size, max_block_size);
294 // setup the grid to run the kernel
295 dim3 grid( N / run_block_size + 1, 1, 1);
296 dim3 threads(run_block_size, 1, 1);
298 // bind the texture on pre sm 35 arches
299 if (compute_capability < 350)
301 cudaError_t error = cudaBindTexture(0, angle_params_tex, d_params, sizeof(Scalar2) * n_angle_types);
302 if (error != cudaSuccess)
307 gpu_compute_harmonic_angle_forces_kernel<<< grid, threads>>>(d_force, d_virial, virial_pitch, N, d_pos, d_params, box,
308 atable, apos_list, pitch, n_angles_list);