2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: joaander
52 #include "ComputeThermoGPU.cuh"
60 //! Shared memory used in reducing the sums
61 extern __shared__ Scalar3 compute_thermo_sdata[];
62 //! Shared memory used in reducing the sums of the pressure tensor
63 extern __shared__ Scalar compute_pressure_tensor_sdata[];
65 /*! \file ComputeThermoGPU.cu
66 \brief Defines GPU kernel code for computing thermodynamic properties on the GPU. Used by ComputeThermoGPU.
69 //! Perform partial sums of the thermo properties on the GPU
70 /*! \param d_scratch Scratch space to hold partial sums. One element is written per block
71 \param d_net_force Net force / pe array from ParticleData
72 \param d_net_virial Net virial array from ParticleData
73 \param virial_pitch pitch of 2D virial array
74 \param d_velocity Particle velocity and mass array from ParticleData
75 \param d_group_members List of group members for which to sum properties
76 \param group_size Number of particles in the group
78 All partial sums are packaged up in a Scalar4 to keep pointer management down.
79 - 2*Kinetic energy is summed in .x
80 - Potential energy is summed in .y
83 One thread is executed per group member. That thread reads in the values for its member into shared memory
84 and then the block performs a reduction in parallel to produce a partial sum output for the block. These
85 partial sums are written to d_scratch[blockIdx.x]. sizeof(Scalar3)*block_size of dynamic shared memory are needed
86 for this kernel to run.
89 __global__ void gpu_compute_thermo_partial_sums(Scalar4 *d_scratch,
92 const unsigned int virial_pitch,
94 unsigned int *d_group_members,
95 unsigned int group_size)
97 // determine which particle this thread works on
98 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
100 Scalar3 my_element; // element of scratch space read in
101 if (group_idx < group_size)
103 unsigned int idx = d_group_members[group_idx];
105 // update positions to the next timestep and update velocities to the next half step
106 Scalar4 net_force = d_net_force[idx];
107 Scalar net_isotropic_virial;
108 // (1/3)*trace of virial tensor
109 net_isotropic_virial = Scalar(1.0/3.0)*
110 (d_net_virial[0*virial_pitch+idx] // xx
111 +d_net_virial[3*virial_pitch+idx] // yy
112 +d_net_virial[5*virial_pitch+idx]); // zz
113 Scalar4 vel = d_velocity[idx];
116 // compute our contribution to the sum
117 my_element.x = mass * (vel.x*vel.x + vel.y*vel.y + vel.z*vel.z);
118 my_element.y = net_force.w;
119 my_element.z = net_isotropic_virial;
123 // non-participating thread: contribute 0 to the sum
124 my_element = make_scalar3(0, 0, 0);
127 compute_thermo_sdata[threadIdx.x] = my_element;
130 // reduce the sum in parallel
131 int offs = blockDim.x >> 1;
134 if (threadIdx.x < offs)
136 compute_thermo_sdata[threadIdx.x].x += compute_thermo_sdata[threadIdx.x + offs].x;
137 compute_thermo_sdata[threadIdx.x].y += compute_thermo_sdata[threadIdx.x + offs].y;
138 compute_thermo_sdata[threadIdx.x].z += compute_thermo_sdata[threadIdx.x + offs].z;
144 // write out our partial sum
145 if (threadIdx.x == 0)
147 Scalar3 res = compute_thermo_sdata[0];
148 d_scratch[blockIdx.x] = make_scalar4(res.x, res.y, res.z, 0);
152 //! Perform partial sums of the pressure tensor on the GPU
153 /*! \param d_scratch Scratch space to hold partial sums. One element is written per block
154 \param d_net_force Net force / pe array from ParticleData
155 \param d_net_virial Net virial array from ParticleData
156 \param virial_pitch pitch of 2D virial array
157 \param d_velocity Particle velocity and mass array from ParticleData
158 \param d_group_members List of group members for which to sum properties
159 \param group_size Number of particles in the group
161 One thread is executed per group member. That thread reads in the six values (components of the presure tensor)
162 for its member into shared memory and then the block performs a reduction in parallel to produce a partial sum output for the block.
163 These partial sums are written to d_scratch[i*gridDim.x + blockIdx.x], where i=0..5 is the index of the component.
164 For this kernel to run, 6*sizeof(Scalar)*block_size of dynamic shared memory are needed.
167 __global__ void gpu_compute_pressure_tensor_partial_sums(Scalar *d_scratch,
168 Scalar4 *d_net_force,
169 Scalar *d_net_virial,
170 const unsigned int virial_pitch,
172 unsigned int *d_group_members,
173 unsigned int group_size)
175 // determine which particle this thread works on
176 int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
178 Scalar my_element[6]; // element of scratch space read in
179 if (group_idx < group_size)
181 unsigned int idx = d_group_members[group_idx];
183 // compute contribution to pressure tensor and store it in my_element
184 Scalar4 vel = d_velocity[idx];
186 my_element[0] = mass*vel.x*vel.x + d_net_virial[0*virial_pitch+idx]; // xx
187 my_element[1] = mass*vel.x*vel.y + d_net_virial[1*virial_pitch+idx]; // xy
188 my_element[2] = mass*vel.x*vel.z + d_net_virial[2*virial_pitch+idx]; // xz
189 my_element[3] = mass*vel.y*vel.y + d_net_virial[3*virial_pitch+idx]; // yy
190 my_element[4] = mass*vel.y*vel.z + d_net_virial[4*virial_pitch+idx]; // yz
191 my_element[5] = mass*vel.z*vel.z + d_net_virial[5*virial_pitch+idx]; // zz
195 // non-participating thread: contribute 0 to the sum
204 for (unsigned int i = 0; i < 6; i++)
205 compute_pressure_tensor_sdata[i*blockDim.x+threadIdx.x] = my_element[i];
209 // reduce the sum in parallel
210 int offs = blockDim.x >> 1;
213 if (threadIdx.x < offs)
215 for (unsigned int i = 0; i < 6; i++)
216 compute_pressure_tensor_sdata[i*blockDim.x+threadIdx.x] += compute_pressure_tensor_sdata[i*blockDim.x + threadIdx.x + offs];
222 // write out our partial sum
223 if (threadIdx.x == 0)
225 for (unsigned int i = 0; i < 6; i++)
226 d_scratch[gridDim.x * i + blockIdx.x] = compute_pressure_tensor_sdata[i*blockDim.x];
230 //! Complete partial sums and compute final thermodynamic quantities (for pressure, only isotropic contribution)
231 /*! \param d_properties Property array to write final values
232 \param d_scratch Partial sums
233 \param ndof Number of degrees of freedom this group posesses
234 \param box Box the particles are in
235 \param D Dimensionality of the system
236 \param group_size Number of particles in the group
237 \param num_partial_sums Number of partial sums in \a d_scratch
238 \param external_virial External contribution to virial (1/3 trace)
241 Only one block is executed. In that block, the partial sums are read in and reduced to final values. From the final
242 sums, the thermodynamic properties are computed and written to d_properties.
244 sizeof(Scalar3)*block_size bytes of shared memory are needed for this kernel to run.
246 __global__ void gpu_compute_thermo_final_sums(Scalar *d_properties,
251 unsigned int group_size,
252 unsigned int num_partial_sums,
253 Scalar external_virial
256 Scalar3 final_sum = make_scalar3(Scalar(0.0), Scalar(0.0), Scalar(0.0));
258 // sum up the values in the partial sum via a sliding window
259 for (int start = 0; start < num_partial_sums; start += blockDim.x)
262 if (start + threadIdx.x < num_partial_sums)
264 Scalar4 scratch = d_scratch[start + threadIdx.x];
265 compute_thermo_sdata[threadIdx.x] = make_scalar3(scratch.x, scratch.y, scratch.z);
268 compute_thermo_sdata[threadIdx.x] = make_scalar3(Scalar(0.0), Scalar(0.0), Scalar(0.0));
271 // reduce the sum in parallel
272 int offs = blockDim.x >> 1;
275 if (threadIdx.x < offs)
277 compute_thermo_sdata[threadIdx.x].x += compute_thermo_sdata[threadIdx.x + offs].x;
278 compute_thermo_sdata[threadIdx.x].y += compute_thermo_sdata[threadIdx.x + offs].y;
279 compute_thermo_sdata[threadIdx.x].z += compute_thermo_sdata[threadIdx.x + offs].z;
285 if (threadIdx.x == 0)
287 final_sum.x += compute_thermo_sdata[0].x;
288 final_sum.y += compute_thermo_sdata[0].y;
289 final_sum.z += compute_thermo_sdata[0].z;
293 if (threadIdx.x == 0)
295 // compute final quantities
296 Scalar ke_total = final_sum.x * Scalar(0.5);
297 Scalar pe_total = final_sum.y;
298 Scalar W = final_sum.z + external_virial;
300 // compute the temperature
301 Scalar temperature = Scalar(2.0) * Scalar(ke_total) / Scalar(ndof);
303 // compute the pressure
304 // volume/area & other 2D stuff needed
307 Scalar3 L = box.getL();
311 // "volume" is area in 2D
313 // W needs to be corrected since the 1/3 factor is built in
314 W *= Scalar(3.0)/Scalar(2.0);
318 volume = L.x * L.y * L.z;
321 // pressure: P = (N * K_B * T + W)/V
322 Scalar pressure = (Scalar(2.0) * ke_total / Scalar(D) + W) / volume;
324 // fill out the GPUArray
325 d_properties[thermo_index::temperature] = temperature;
326 d_properties[thermo_index::pressure] = pressure;
327 d_properties[thermo_index::kinetic_energy] = Scalar(ke_total);
328 d_properties[thermo_index::potential_energy] = Scalar(pe_total);
332 //! Complete partial sums and compute final pressure tensor
333 /*! \param d_properties Property array to write final values
334 \param d_scratch Partial sums
335 \param box Box the particles are in
336 \param group_size Number of particles in the group
337 \param num_partial_sums Number of partial sums in \a d_scratch
339 \param external_virial_xx External contribution to virial (xx component)
340 \param external_virial_xy External contribution to virial (xy component)
341 \param external_virial_xz External contribution to virial (xz component)
342 \param external_virial_yy External contribution to virial (yy component)
343 \param external_virial_yz External contribution to virial (yz component)
344 \param external_virial_zz External contribution to virial (zz component)
346 Only one block is executed. In that block, the partial sums are read in and reduced to final values. From the final
347 sums, the thermodynamic properties are computed and written to d_properties.
349 6*sizeof(Scalar)*block_size bytes of shared memory are needed for this kernel to run.
351 __global__ void gpu_compute_pressure_tensor_final_sums(Scalar *d_properties,
354 unsigned int group_size,
355 unsigned int num_partial_sums,
356 Scalar external_virial_xx,
357 Scalar external_virial_xy,
358 Scalar external_virial_xz,
359 Scalar external_virial_yy,
360 Scalar external_virial_yz,
361 Scalar external_virial_zz,
366 final_sum[0] = external_virial_xx;
367 final_sum[1] = external_virial_xy;
368 final_sum[2] = external_virial_xz;
369 final_sum[3] = external_virial_yy;
370 final_sum[4] = external_virial_yz;
371 final_sum[5] = external_virial_zz;
373 // sum up the values in the partial sum via a sliding window
374 for (int start = 0; start < num_partial_sums; start += blockDim.x)
377 if (start + threadIdx.x < num_partial_sums)
379 for (unsigned int i = 0; i < 6; i++)
380 compute_pressure_tensor_sdata[i * blockDim.x + threadIdx.x] = d_scratch[i*num_partial_sums + start + threadIdx.x];
383 for (unsigned int i = 0; i < 6; i++)
384 compute_pressure_tensor_sdata[i * blockDim.x + threadIdx.x] = Scalar(0.0);
387 // reduce the sum in parallel
388 int offs = blockDim.x >> 1;
391 if (threadIdx.x < offs)
393 for (unsigned int i = 0; i < 6; i++)
394 compute_pressure_tensor_sdata[i*blockDim.x + threadIdx.x] += compute_pressure_tensor_sdata[i*blockDim.x + threadIdx.x + offs];
400 if (threadIdx.x == 0)
402 for (unsigned int i = 0; i < 6; i++)
403 final_sum[i] += compute_pressure_tensor_sdata[i*blockDim.x];
407 if (threadIdx.x == 0)
409 // fill out the GPUArray
410 // we have thus far calculated the sum of the kinetic part of the pressure tensor
411 // and the virial part, the definition includes an inverse factor of the box volume
412 Scalar V = box.getVolume(twod);
414 d_properties[thermo_index::pressure_xx] = final_sum[0]/V;
415 d_properties[thermo_index::pressure_xy] = final_sum[1]/V;
416 d_properties[thermo_index::pressure_xz] = final_sum[2]/V;
417 d_properties[thermo_index::pressure_yy] = final_sum[3]/V;
418 d_properties[thermo_index::pressure_yz] = final_sum[4]/V;
419 d_properties[thermo_index::pressure_zz] = final_sum[5]/V;
423 //! Compute thermodynamic properties of a group on the GPU
424 /*! \param d_properties Array to write computed properties
425 \param d_vel particle velocities and masses on the GPU
426 \param d_group_members List of group members
427 \param group_size Number of group members
428 \param box Box the particles are in
429 \param args Additional arguments
430 \param compute_pressure_tensor whether to compute the full pressure tensor
432 This function drives gpu_compute_thermo_partial_sums and gpu_compute_thermo_final_sums, see them for details.
435 cudaError_t gpu_compute_thermo(Scalar *d_properties,
437 unsigned int *d_group_members,
438 unsigned int group_size,
440 const compute_thermo_args& args,
441 const bool compute_pressure_tensor
444 assert(d_properties);
446 assert(d_group_members);
447 assert(args.d_net_force);
448 assert(args.d_net_virial);
449 assert(args.d_scratch);
451 dim3 grid(args.n_blocks, 1, 1);
452 dim3 threads(args.block_size, 1, 1);
453 unsigned int shared_bytes = sizeof(Scalar3)*args.block_size;
455 Scalar external_virial = Scalar(1.0/3.0)*(args.external_virial_xx
456 + args.external_virial_yy
457 + args.external_virial_zz);
459 gpu_compute_thermo_partial_sums<<<grid,threads, shared_bytes>>>(args.d_scratch,
468 if (compute_pressure_tensor)
470 assert(args.d_scratch_pressure_tensor);
472 shared_bytes = 6 * sizeof(Scalar) * args.block_size;
474 gpu_compute_pressure_tensor_partial_sums<<<grid, threads, shared_bytes>>>(args.d_scratch_pressure_tensor,
483 // setup the grid to run the final kernel
484 int final_block_size = 512;
485 grid = dim3(1, 1, 1);
486 threads = dim3(final_block_size, 1, 1);
487 shared_bytes = sizeof(Scalar3)*final_block_size;
490 gpu_compute_thermo_final_sums<<<grid, threads, shared_bytes>>>(d_properties,
499 if (compute_pressure_tensor)
501 shared_bytes = 6 * sizeof(Scalar) * final_block_size;
503 gpu_compute_pressure_tensor_final_sums<<<grid, threads, shared_bytes>>>(d_properties,
504 args.d_scratch_pressure_tensor,
508 args.external_virial_xx,
509 args.external_virial_xy,
510 args.external_virial_xz,
511 args.external_virial_yy,
512 args.external_virial_yz,
513 args.external_virial_zz,