2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: jglaser
52 #include "ParticleData.cuh"
54 /*! \file ParticleData.cu
55 \brief ImplementsGPU kernel code and data structure functions used by ParticleData
62 #include <thrust/iterator/zip_iterator.h>
63 #include <thrust/iterator/counting_iterator.h>
64 #include <thrust/scatter.h>
65 #include <thrust/device_ptr.h>
67 #include "kernels/scan.cuh"
69 //! A tuple of pdata pointers
70 typedef thrust::tuple <
71 thrust::device_ptr<unsigned int>, // tag
72 thrust::device_ptr<Scalar4>, // pos
73 thrust::device_ptr<Scalar4>, // vel
74 thrust::device_ptr<Scalar3>, // accel
75 thrust::device_ptr<Scalar>, // charge
76 thrust::device_ptr<Scalar>, // diameter
77 thrust::device_ptr<int3>, // image
78 thrust::device_ptr<unsigned int>, // body
79 thrust::device_ptr<Scalar4>, // orientation
80 thrust::device_ptr<unsigned int> // communication flags
83 //! A zip iterator for filtering particle data
84 typedef thrust::zip_iterator<pdata_it_tuple_gpu> pdata_zip_gpu;
86 //! A tuple of pdata fields
87 typedef thrust::tuple <
96 Scalar4, // orientation
97 unsigned int // communication flags
100 //! Kernel to partition particle data
101 __global__ void gpu_scatter_particle_data_kernel(
102 const unsigned int N,
103 const Scalar4 *d_pos,
104 const Scalar4 *d_vel,
105 const Scalar3 *d_accel,
106 const Scalar *d_charge,
107 const Scalar *d_diameter,
109 const unsigned int *d_body,
110 const Scalar4 *d_orientation,
111 const unsigned int *d_tag,
112 unsigned int *d_rtag,
115 Scalar3 *d_accel_alt,
116 Scalar *d_charge_alt,
117 Scalar *d_diameter_alt,
119 unsigned int *d_body_alt,
120 Scalar4 *d_orientation_alt,
121 unsigned int *d_tag_alt,
122 pdata_element *d_out,
123 unsigned int *d_comm_flags,
124 unsigned int *d_comm_flags_out,
125 const unsigned int *d_scan)
127 unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
129 if (idx >= N) return;
131 bool remove = d_comm_flags[idx];
133 unsigned int scan_remove = d_scan[idx];
134 unsigned int scan_keep = idx - scan_remove;
141 p.accel = d_accel[idx];
142 p.charge = d_charge[idx];
143 p.diameter = d_diameter[idx];
144 p.image = d_image[idx];
145 p.body = d_body[idx];
146 p.orientation = d_orientation[idx];
148 d_out[scan_remove] = p;
149 d_comm_flags_out[scan_remove] = d_comm_flags[idx];
151 // reset communication flags
152 d_comm_flags[idx] = 0;
155 d_rtag[p.tag] = NOT_LOCAL;
159 d_pos_alt[scan_keep] = d_pos[idx];
160 d_vel_alt[scan_keep] = d_vel[idx];
161 d_accel_alt[scan_keep] = d_accel[idx];
162 d_charge_alt[scan_keep] = d_charge[idx];
163 d_diameter_alt[scan_keep] = d_diameter[idx];
164 d_image_alt[scan_keep] = d_image[idx];
165 d_body_alt[scan_keep] = d_body[idx];
166 d_orientation_alt[scan_keep] = d_orientation[idx];
167 unsigned int tag = d_tag[idx];
168 d_tag_alt[scan_keep] = tag;
171 d_rtag[tag] = scan_keep;
176 __global__ void gpu_select_sent_particles(
178 unsigned int *d_comm_flags,
181 unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
183 if (idx >= N) return;
184 d_tmp[idx] = d_comm_flags[idx] ? 1 : 0;
187 /*! \param N Number of local particles
188 \param d_pos Device array of particle positions
189 \param d_vel Device array of particle velocities
190 \param d_accel Device array of particle accelerations
191 \param d_charge Device array of particle charges
192 \param d_diameter Device array of particle diameters
193 \param d_image Device array of particle images
194 \param d_body Device array of particle body tags
195 \param d_orientation Device array of particle orientations
196 \param d_tag Device array of particle tags
197 \param d_rtag Device array for reverse-lookup table
198 \param d_pos_alt Device array of particle positions (output)
199 \param d_vel_alt Device array of particle velocities (output)
200 \param d_accel_alt Device array of particle accelerations (output)
201 \param d_charge_alt Device array of particle charges (output)
202 \param d_diameter_alt Device array of particle diameters (output)
203 \param d_image_alt Device array of particle images (output)
204 \param d_body_alt Device array of particle body tags (output)
205 \param d_orientation_alt Device array of particle orientations (output)
206 \param d_out Output array for packed particle data
207 \param max_n_out Maximum number of elements to write to output array
209 \returns Number of elements marked for removal
211 unsigned int gpu_pdata_remove(const unsigned int N,
212 const Scalar4 *d_pos,
213 const Scalar4 *d_vel,
214 const Scalar3 *d_accel,
215 const Scalar *d_charge,
216 const Scalar *d_diameter,
218 const unsigned int *d_body,
219 const Scalar4 *d_orientation,
220 const unsigned int *d_tag,
221 unsigned int *d_rtag,
224 Scalar3 *d_accel_alt,
225 Scalar *d_charge_alt,
226 Scalar *d_diameter_alt,
228 unsigned int *d_body_alt,
229 Scalar4 *d_orientation_alt,
230 unsigned int *d_tag_alt,
231 pdata_element *d_out,
232 unsigned int *d_comm_flags,
233 unsigned int *d_comm_flags_out,
234 unsigned int max_n_out,
236 mgpu::ContextPtr mgpu_context)
240 // partition particle data into local and removed particles
241 unsigned int block_size =512;
242 unsigned int n_blocks = N/block_size+1;
244 // select nonzero communication flags
245 gpu_select_sent_particles<<<n_blocks, block_size>>>(
250 // perform a scan over the array of ones and zeroes
251 mgpu::Scan<mgpu::MgpuScanTypeExc>(d_tmp,
252 N, (unsigned int) 0, mgpu::plus<unsigned int>(),
253 (unsigned int *)NULL, &n_out, d_tmp, *mgpu_context);
255 // Don't write past end of buffer
256 if (n_out <= max_n_out)
258 // partition particle data into local and removed particles
259 unsigned int block_size =512;
260 unsigned int n_blocks = N/block_size+1;
262 gpu_scatter_particle_data_kernel<<<n_blocks, block_size>>>(
289 // return elements written to output stream
293 //! A converter from pdata_element to a tuple of tag and a tuple of pdata entries
294 /*! Writes the tag into the rtag table at the same time
296 struct to_pdata_tuple_gpu : public thrust::unary_function<const pdata_element, pdata_tuple_gpu>
298 __device__ const pdata_tuple_gpu operator() (const pdata_element p)
301 return thrust::make_tuple(
311 0 // communication flags
317 /*! \param old_nparticles old local particle count
318 \param num_add_ptls Number of particles in input array
319 \param d_pos Device array of particle positions
320 \param d_vel Device iarray of particle velocities
321 \param d_accel Device array of particle accelerations
322 \param d_charge Device array of particle charges
323 \param d_diameter Device array of particle diameters
324 \param d_image Device array of particle images
325 \param d_body Device array of particle body tags
326 \param d_orientation Device array of particle orientations
327 \param d_tag Device array of particle tags
328 \param d_rtag Device array for reverse-lookup table
329 \param d_in Device array of packed input particle data
330 \param d_comm_flags Device array of communication flags (pdata)
332 void gpu_pdata_add_particles(const unsigned int old_nparticles,
333 const unsigned int num_add_ptls,
340 unsigned int *d_body,
341 Scalar4 *d_orientation,
343 unsigned int *d_rtag,
344 const pdata_element *d_in,
345 unsigned int *d_comm_flags)
347 // wrap device arrays into thrust ptr
348 thrust::device_ptr<Scalar4> pos_ptr(d_pos);
349 thrust::device_ptr<Scalar4> vel_ptr(d_vel);
350 thrust::device_ptr<Scalar3> accel_ptr(d_accel);
351 thrust::device_ptr<Scalar> charge_ptr(d_charge);
352 thrust::device_ptr<Scalar> diameter_ptr(d_diameter);
353 thrust::device_ptr<int3> image_ptr(d_image);
354 thrust::device_ptr<unsigned int> body_ptr(d_body);
355 thrust::device_ptr<Scalar4> orientation_ptr(d_orientation);
356 thrust::device_ptr<unsigned int> tag_ptr(d_tag);
357 thrust::device_ptr<unsigned int> comm_flags_ptr(d_comm_flags);
360 thrust::device_ptr<const pdata_element> in_ptr(d_in);
362 // Construct zip iterator
363 pdata_zip_gpu pdata_begin(
377 pdata_zip_gpu pdata_end = pdata_begin + old_nparticles;
379 // wrap reverse-lookup table
380 thrust::device_ptr<unsigned int> rtag_ptr(d_rtag);
382 typedef thrust::counting_iterator<unsigned int> count_it;
384 // add new particles at the end
385 thrust::transform(in_ptr, in_ptr + num_add_ptls, pdata_end, to_pdata_tuple_gpu());
388 thrust::counting_iterator<unsigned int> idx(old_nparticles);
389 thrust::scatter(idx, idx + num_add_ptls, tag_ptr+old_nparticles, rtag_ptr);