Fix potential problem in Messenger related to MPI window
[hoomd-blue.git] / libhoomd / cuda / ParticleData.cu
blob6d369626fa5e2997a5f0c0d3955b3b519e9c4d37
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: jglaser
52 #include "ParticleData.cuh"
54 /*! \file ParticleData.cu
55     \brief ImplementsGPU kernel code and data structure functions used by ParticleData
58 #ifdef ENABLE_MPI
60 #include <iterator>
62 #include <thrust/iterator/zip_iterator.h>
63 #include <thrust/iterator/counting_iterator.h>
64 #include <thrust/scatter.h>
65 #include <thrust/device_ptr.h>
67 #include "kernels/scan.cuh"
69 //! A tuple of pdata pointers
70 typedef thrust::tuple <
71     thrust::device_ptr<unsigned int>,  // tag
72     thrust::device_ptr<Scalar4>,       // pos
73     thrust::device_ptr<Scalar4>,       // vel
74     thrust::device_ptr<Scalar3>,       // accel
75     thrust::device_ptr<Scalar>,        // charge
76     thrust::device_ptr<Scalar>,        // diameter
77     thrust::device_ptr<int3>,          // image
78     thrust::device_ptr<unsigned int>,  // body
79     thrust::device_ptr<Scalar4>,       // orientation
80     thrust::device_ptr<unsigned int>   // communication flags
81     > pdata_it_tuple_gpu;
83 //! A zip iterator for filtering particle data
84 typedef thrust::zip_iterator<pdata_it_tuple_gpu> pdata_zip_gpu;
86 //! A tuple of pdata fields
87 typedef thrust::tuple <
88     unsigned int,  // tag
89     Scalar4,       // pos
90     Scalar4,       // vel
91     Scalar3,       // accel
92     Scalar,        // charge
93     Scalar,        // diameter
94     int3,          // image
95     unsigned int,  // body
96     Scalar4,       // orientation
97     unsigned int   // communication flags
98     > pdata_tuple_gpu;
100 //! Kernel to partition particle data
101 __global__ void gpu_scatter_particle_data_kernel(
102     const unsigned int N,
103     const Scalar4 *d_pos,
104     const Scalar4 *d_vel,
105     const Scalar3 *d_accel,
106     const Scalar *d_charge,
107     const Scalar *d_diameter,
108     const int3 *d_image,
109     const unsigned int *d_body,
110     const Scalar4 *d_orientation,
111     const unsigned int *d_tag,
112     unsigned int *d_rtag,
113     Scalar4 *d_pos_alt,
114     Scalar4 *d_vel_alt,
115     Scalar3 *d_accel_alt,
116     Scalar *d_charge_alt,
117     Scalar *d_diameter_alt,
118     int3 *d_image_alt,
119     unsigned int *d_body_alt,
120     Scalar4 *d_orientation_alt,
121     unsigned int *d_tag_alt,
122     pdata_element *d_out,
123     unsigned int *d_comm_flags,
124     unsigned int *d_comm_flags_out,
125     const unsigned int *d_scan)
126     {
127     unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
129     if (idx >= N) return;
131     bool remove = d_comm_flags[idx];
133     unsigned int scan_remove = d_scan[idx];
134     unsigned int scan_keep = idx - scan_remove;
136     if (remove)
137         {
138         pdata_element p;
139         p.pos = d_pos[idx];
140         p.vel = d_vel[idx];
141         p.accel = d_accel[idx];
142         p.charge = d_charge[idx];
143         p.diameter = d_diameter[idx];
144         p.image = d_image[idx];
145         p.body = d_body[idx];
146         p.orientation = d_orientation[idx];
147         p.tag = d_tag[idx];
148         d_out[scan_remove] = p;
149         d_comm_flags_out[scan_remove] = d_comm_flags[idx];
151         // reset communication flags
152         d_comm_flags[idx] = 0;
154         // reset rtag
155         d_rtag[p.tag] = NOT_LOCAL;
156         }
157     else
158         {
159         d_pos_alt[scan_keep] = d_pos[idx];
160         d_vel_alt[scan_keep] = d_vel[idx];
161         d_accel_alt[scan_keep] = d_accel[idx];
162         d_charge_alt[scan_keep] = d_charge[idx];
163         d_diameter_alt[scan_keep] = d_diameter[idx];
164         d_image_alt[scan_keep] = d_image[idx];
165         d_body_alt[scan_keep] = d_body[idx];
166         d_orientation_alt[scan_keep] = d_orientation[idx];
167         unsigned int tag = d_tag[idx];
168         d_tag_alt[scan_keep] = tag;
170         // update rtag
171         d_rtag[tag] = scan_keep;
172         }
174     }
176 __global__ void gpu_select_sent_particles(
177     unsigned int N,
178     unsigned int *d_comm_flags,
179     unsigned int *d_tmp)
180     {
181     unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
183     if (idx >= N) return;
184     d_tmp[idx] = d_comm_flags[idx] ? 1 : 0;
185     }
187 /*! \param N Number of local particles
188     \param d_pos Device array of particle positions
189     \param d_vel Device array of particle velocities
190     \param d_accel Device array of particle accelerations
191     \param d_charge Device array of particle charges
192     \param d_diameter Device array of particle diameters
193     \param d_image Device array of particle images
194     \param d_body Device array of particle body tags
195     \param d_orientation Device array of particle orientations
196     \param d_tag Device array of particle tags
197     \param d_rtag Device array for reverse-lookup table
198     \param d_pos_alt Device array of particle positions (output)
199     \param d_vel_alt Device array of particle velocities (output)
200     \param d_accel_alt Device array of particle accelerations (output)
201     \param d_charge_alt Device array of particle charges (output)
202     \param d_diameter_alt Device array of particle diameters (output)
203     \param d_image_alt Device array of particle images (output)
204     \param d_body_alt Device array of particle body tags (output)
205     \param d_orientation_alt Device array of particle orientations (output)
206     \param d_out Output array for packed particle data
207     \param max_n_out Maximum number of elements to write to output array
209     \returns Number of elements marked for removal
210  */
211 unsigned int gpu_pdata_remove(const unsigned int N,
212                     const Scalar4 *d_pos,
213                     const Scalar4 *d_vel,
214                     const Scalar3 *d_accel,
215                     const Scalar *d_charge,
216                     const Scalar *d_diameter,
217                     const int3 *d_image,
218                     const unsigned int *d_body,
219                     const Scalar4 *d_orientation,
220                     const unsigned int *d_tag,
221                     unsigned int *d_rtag,
222                     Scalar4 *d_pos_alt,
223                     Scalar4 *d_vel_alt,
224                     Scalar3 *d_accel_alt,
225                     Scalar *d_charge_alt,
226                     Scalar *d_diameter_alt,
227                     int3 *d_image_alt,
228                     unsigned int *d_body_alt,
229                     Scalar4 *d_orientation_alt,
230                     unsigned int *d_tag_alt,
231                     pdata_element *d_out,
232                     unsigned int *d_comm_flags,
233                     unsigned int *d_comm_flags_out,
234                     unsigned int max_n_out,
235                     unsigned int *d_tmp,
236                     mgpu::ContextPtr mgpu_context)
237     {
238     unsigned int n_out;
240     // partition particle data into local and removed particles
241     unsigned int block_size =512;
242     unsigned int n_blocks = N/block_size+1;
244     // select nonzero communication flags
245     gpu_select_sent_particles<<<n_blocks, block_size>>>(
246         N,
247         d_comm_flags,
248         d_tmp);
250     // perform a scan over the array of ones and zeroes
251     mgpu::Scan<mgpu::MgpuScanTypeExc>(d_tmp,
252         N, (unsigned int) 0, mgpu::plus<unsigned int>(),
253         (unsigned int *)NULL, &n_out, d_tmp, *mgpu_context);
255     // Don't write past end of buffer
256     if (n_out <= max_n_out)
257         {
258         // partition particle data into local and removed particles
259         unsigned int block_size =512;
260         unsigned int n_blocks = N/block_size+1;
262         gpu_scatter_particle_data_kernel<<<n_blocks, block_size>>>(
263             N,
264             d_pos,
265             d_vel,
266             d_accel,
267             d_charge,
268             d_diameter,
269             d_image,
270             d_body,
271             d_orientation,
272             d_tag,
273             d_rtag,
274             d_pos_alt,
275             d_vel_alt,
276             d_accel_alt,
277             d_charge_alt,
278             d_diameter_alt,
279             d_image_alt,
280             d_body_alt,
281             d_orientation_alt,
282             d_tag_alt,
283             d_out,
284             d_comm_flags,
285             d_comm_flags_out,
286             d_tmp);
287         }
289     // return elements written to output stream
290     return n_out;
291     }
293 //! A converter from pdata_element to a tuple of tag and a tuple of pdata entries
294 /*! Writes the tag into the rtag table at the same time
295  */
296 struct to_pdata_tuple_gpu : public thrust::unary_function<const pdata_element, pdata_tuple_gpu>
297     {
298     __device__ const pdata_tuple_gpu operator() (const pdata_element p)
299         {
300         // make tuple
301         return thrust::make_tuple(
302             p.tag,
303             p.pos,
304             p.vel,
305             p.accel,
306             p.charge,
307             p.diameter,
308             p.image,
309             p.body,
310             p.orientation,
311             0 // communication flags
312             );
313         }
314     };
317 /*! \param old_nparticles old local particle count
318     \param num_add_ptls Number of particles in input array
319     \param d_pos Device array of particle positions
320     \param d_vel Device iarray of particle velocities
321     \param d_accel Device array of particle accelerations
322     \param d_charge Device array of particle charges
323     \param d_diameter Device array of particle diameters
324     \param d_image Device array of particle images
325     \param d_body Device array of particle body tags
326     \param d_orientation Device array of particle orientations
327     \param d_tag Device array of particle tags
328     \param d_rtag Device array for reverse-lookup table
329     \param d_in Device array of packed input particle data
330     \param d_comm_flags Device array of communication flags (pdata)
332 void gpu_pdata_add_particles(const unsigned int old_nparticles,
333                     const unsigned int num_add_ptls,
334                     Scalar4 *d_pos,
335                     Scalar4 *d_vel,
336                     Scalar3 *d_accel,
337                     Scalar *d_charge,
338                     Scalar *d_diameter,
339                     int3 *d_image,
340                     unsigned int *d_body,
341                     Scalar4 *d_orientation,
342                     unsigned int *d_tag,
343                     unsigned int *d_rtag,
344                     const pdata_element *d_in,
345                     unsigned int *d_comm_flags)
346     {
347     // wrap device arrays into thrust ptr
348     thrust::device_ptr<Scalar4> pos_ptr(d_pos);
349     thrust::device_ptr<Scalar4> vel_ptr(d_vel);
350     thrust::device_ptr<Scalar3> accel_ptr(d_accel);
351     thrust::device_ptr<Scalar> charge_ptr(d_charge);
352     thrust::device_ptr<Scalar> diameter_ptr(d_diameter);
353     thrust::device_ptr<int3> image_ptr(d_image);
354     thrust::device_ptr<unsigned int> body_ptr(d_body);
355     thrust::device_ptr<Scalar4> orientation_ptr(d_orientation);
356     thrust::device_ptr<unsigned int> tag_ptr(d_tag);
357     thrust::device_ptr<unsigned int> comm_flags_ptr(d_comm_flags);
359     // wrap input array
360     thrust::device_ptr<const pdata_element> in_ptr(d_in);
362     // Construct zip iterator
363     pdata_zip_gpu pdata_begin(
364        thrust::make_tuple(
365             tag_ptr,
366             pos_ptr,
367             vel_ptr,
368             accel_ptr,
369             charge_ptr,
370             diameter_ptr,
371             image_ptr,
372             body_ptr,
373             orientation_ptr,
374             comm_flags_ptr
375             )
376         );
377     pdata_zip_gpu pdata_end = pdata_begin + old_nparticles;
379     // wrap reverse-lookup table
380     thrust::device_ptr<unsigned int> rtag_ptr(d_rtag);
382     typedef thrust::counting_iterator<unsigned int> count_it;
384     // add new particles at the end
385     thrust::transform(in_ptr, in_ptr + num_add_ptls, pdata_end, to_pdata_tuple_gpu());
387     // update rtags
388     thrust::counting_iterator<unsigned int> idx(old_nparticles);
389     thrust::scatter(idx, idx + num_add_ptls, tag_ptr+old_nparticles, rtag_ptr);
390     }
392 #endif // ENABLE_MPI