Fix potential problem in Messenger related to MPI window
[hoomd-blue.git] / libhoomd / computes_gpu / PPPMForceComputeGPU.cc
blob44d118effd6bb2820b190690fb35156b9ebbcdc7
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: sbarr
52 /*! \file PPPMForceComputeGPU.cc
53 \brief Defines the PPPMForceComputeGPU class
56 #ifdef SINGLE_PRECISION
57 #define CUFFT_TRANSFORM_TYPE CUFFT_C2C
58 #else
59 #define CUFFT_TRANSFORM_TYPE CUFFT_Z2Z
60 #endif
63 #include "PotentialPair.h"
64 #include "PPPMForceComputeGPU.h"
66 #include <boost/python.hpp>
67 using namespace boost::python;
69 #include <boost/bind.hpp>
70 using namespace boost;
72 using namespace std;
74 /*! \param sysdef System to compute bond forces on
75 \param nlist Neighbor list
76 \param group Particle group
79 PPPMForceComputeGPU::PPPMForceComputeGPU(boost::shared_ptr<SystemDefinition> sysdef,
80 boost::shared_ptr<NeighborList> nlist,
81 boost::shared_ptr<ParticleGroup> group)
82 : PPPMForceCompute(sysdef, nlist, group), m_block_size(256),m_first_run(true)
85 // can't run on the GPU if there aren't any GPUs in the execution configuration
86 if (!exec_conf->isCUDAEnabled())
88 m_exec_conf->msg->error() << "Creating a PPMForceComputeGPU with no GPU in the execution configuration" << endl;
89 throw std::runtime_error("Error initializing PPMForceComputeGPU");
91 CHECK_CUDA_ERROR();
94 PPPMForceComputeGPU::~PPPMForceComputeGPU()
96 cufftDestroy(plan);
99 /*!
100 \param Nx Number of grid points in x direction
101 \param Ny Number of grid points in y direction
102 \param Nz Number of grid points in z direction
103 \param order Number of grid points in each direction to assign charges to
104 \param kappa Screening parameter in erfc
105 \param rcut Short-ranged cutoff, used for computing the relative force error
107 Sets parameters for the long-ranged part of the electrostatics calculation and updates the
108 parameters on the GPU.
110 void PPPMForceComputeGPU::setParams(int Nx, int Ny, int Nz, int order, Scalar kappa, Scalar rcut)
112 PPPMForceCompute::setParams(Nx, Ny, Nz, order, kappa, rcut);
113 cufftPlan3d(&plan, Nx, Ny, Nz, CUFFT_TRANSFORM_TYPE);
115 GPUArray<Scalar> n_energy_sum(Nx*Ny*Nz, exec_conf);
116 m_energy_sum.swap(n_energy_sum);
118 GPUArray<Scalar> n_v_xx_sum(Nx*Ny*Nz, exec_conf);
119 m_v_xx_sum.swap(n_v_xx_sum);
121 GPUArray<Scalar> n_v_xy_sum(Nx*Ny*Nz, exec_conf);
122 m_v_xy_sum.swap(n_v_xy_sum);
124 GPUArray<Scalar> n_v_xz_sum(Nx*Ny*Nz, exec_conf);
125 m_v_xz_sum.swap(n_v_xz_sum);
127 GPUArray<Scalar> n_v_yy_sum(Nx*Ny*Nz, exec_conf);
128 m_v_yy_sum.swap(n_v_yy_sum);
130 GPUArray<Scalar> n_v_yz_sum(Nx*Ny*Nz, exec_conf);
131 m_v_yz_sum.swap(n_v_yz_sum);
133 GPUArray<Scalar> n_v_zz_sum(Nx*Ny*Nz, exec_conf);
134 m_v_zz_sum.swap(n_v_zz_sum);
136 GPUArray<Scalar> n_o_data(Nx*Ny*Nz, exec_conf);
137 o_data.swap(n_o_data);
144 /*! Internal method for computing the forces on the GPU.
145 \post The force data on the GPU is written with the calculated forces
147 \param timestep Current time step of the simulation
149 Calls gpu_compute_harmonic_bond_forces to do the dirty work.
151 void PPPMForceComputeGPU::computeForces(unsigned int timestep)
153 if (!m_params_set)
155 m_exec_conf->msg->error() << "charge.pppm: setParams must be called prior to computeForces()" << endl;
156 throw std::runtime_error("Error computing forces in PPPMForceComputeGPU");
159 unsigned int group_size = m_group->getNumMembers();
160 // just drop out if the group is an empty group
161 if (group_size == 0)
162 return;
164 // start the profile
165 if (m_prof) m_prof->push(exec_conf, "PPPM");
167 assert(m_pdata);
169 ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
170 ArrayHandle<Scalar> d_charge(m_pdata->getCharges(), access_location::device, access_mode::read);
172 BoxDim box = m_pdata->getBox();
173 ArrayHandle<CUFFTCOMPLEX> d_rho_real_space(m_rho_real_space, access_location::device, access_mode::readwrite);
174 ArrayHandle<CUFFTCOMPLEX> d_Ex(m_Ex, access_location::device, access_mode::readwrite);
175 ArrayHandle<CUFFTCOMPLEX> d_Ey(m_Ey, access_location::device, access_mode::readwrite);
176 ArrayHandle<CUFFTCOMPLEX> d_Ez(m_Ez, access_location::device, access_mode::readwrite);
177 ArrayHandle<Scalar3> d_kvec(m_kvec, access_location::device, access_mode::readwrite);
178 ArrayHandle<Scalar> d_green_hat(m_green_hat, access_location::device, access_mode::readwrite);
179 ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::readwrite);
180 ArrayHandle<Scalar3> d_field(m_field, access_location::device, access_mode::readwrite);
182 { //begin ArrayHandle scope
183 ArrayHandle<Scalar4> d_force(m_force,access_location::device,access_mode::overwrite);
184 ArrayHandle<Scalar> d_virial(m_virial,access_location::device,access_mode::overwrite);
186 // reset virial
187 cudaMemset(d_virial.data, 0, sizeof(Scalar)*m_virial.getNumElements());
189 //Zero pppm forces on ALL particles, including neutral ones that aren't taken
190 //care of in calculate_forces_kernel
191 cudaMemset(d_force.data, 0, sizeof(Scalar4)*m_force.getNumElements());
193 // access the group
194 ArrayHandle< unsigned int > d_index_array(m_group->getIndexArray(), access_location::device, access_mode::read);
196 if(m_box_changed || m_first_run)
198 Scalar3 L = box.getL();
199 Scalar temp = floor(((m_kappa*L.x/(M_PI*m_Nx)) * pow(-log(EPS_HOC),0.25)));
200 int nbx = (int)temp;
201 temp = floor(((m_kappa*L.y/(M_PI*m_Ny)) * pow(-log(EPS_HOC),0.25)));
202 int nby = (int)temp;
203 temp = floor(((m_kappa*L.z/(M_PI*m_Nz)) * pow(-log(EPS_HOC),0.25)));
204 int nbz = (int)temp;
206 ArrayHandle<Scalar> d_vg(m_vg, access_location::device, access_mode::readwrite);;
207 ArrayHandle<Scalar> d_gf_b(m_gf_b, access_location::device, access_mode::readwrite);
208 reset_kvec_green_hat(box,
209 m_Nx,
210 m_Ny,
211 m_Nz,
212 nbx,
213 nby,
214 nbz,
215 m_order,
216 m_kappa,
217 d_kvec.data,
218 d_green_hat.data,
219 d_vg.data,
220 d_gf_b.data,
221 m_block_size);
224 Scalar scale = Scalar(1.0)/((Scalar)(m_Nx * m_Ny * m_Nz));
225 m_energy_virial_factor = 0.5 * L.x * L.y * L.z * scale * scale;
226 m_box_changed = false;
227 m_first_run = false;
230 // run the kernel in parallel on all GPUs
232 gpu_compute_pppm_forces(d_force.data,
233 m_pdata->getN(),
234 d_pos.data,
235 d_charge.data,
236 box,
237 m_Nx,
238 m_Ny,
239 m_Nz,
240 m_order,
241 h_rho_coeff.data,
242 d_rho_real_space.data,
243 plan,
244 d_Ex.data,
245 d_Ey.data,
246 d_Ez.data,
247 d_kvec.data,
248 d_green_hat.data,
249 d_field.data,
250 d_index_array.data,
251 group_size,
252 m_block_size,
253 m_exec_conf->getComputeCapability());
255 if (exec_conf->isCUDAErrorCheckingEnabled())
256 CHECK_CUDA_ERROR();
258 // If there are exclusions, correct for the long-range part of the potential
259 if( m_nlist->getExclusionsSet())
261 ArrayHandle<unsigned int> d_exlist(m_nlist->getExListArray(), access_location::device, access_mode::read);
262 ArrayHandle<unsigned int> d_n_ex(m_nlist->getNExArray(), access_location::device, access_mode::read);
263 Index2D nex = m_nlist->getExListIndexer();
265 fix_exclusions(d_force.data,
266 d_virial.data,
267 m_virial.getPitch(),
268 m_pdata->getN(),
269 d_pos.data,
270 d_charge.data,
271 box,
272 d_n_ex.data,
273 d_exlist.data,
274 nex,
275 m_kappa,
276 d_index_array.data,
277 group_size,
278 m_block_size,
279 m_exec_conf->getComputeCapability());
280 if (exec_conf->isCUDAErrorCheckingEnabled())
281 CHECK_CUDA_ERROR();
283 } // end ArrayHandle scope
285 PDataFlags flags = m_pdata->getFlags();
287 if(flags[pdata_flag::potential_energy] || flags[pdata_flag::pressure_tensor] || flags[pdata_flag::isotropic_virial])
289 ArrayHandle<Scalar> d_vg(m_vg, access_location::device, access_mode::readwrite);
290 ArrayHandle<Scalar> d_energy_sum(m_energy_sum, access_location::device, access_mode::readwrite);
291 ArrayHandle<Scalar> d_v_xx_sum(m_v_xx_sum, access_location::device, access_mode::readwrite);
292 ArrayHandle<Scalar> d_v_xy_sum(m_v_xy_sum, access_location::device, access_mode::readwrite);
293 ArrayHandle<Scalar> d_v_xz_sum(m_v_xz_sum, access_location::device, access_mode::readwrite);
294 ArrayHandle<Scalar> d_v_yy_sum(m_v_yy_sum, access_location::device, access_mode::readwrite);
295 ArrayHandle<Scalar> d_v_yz_sum(m_v_yz_sum, access_location::device, access_mode::readwrite);
296 ArrayHandle<Scalar> d_v_zz_sum(m_v_zz_sum, access_location::device, access_mode::readwrite);
297 ArrayHandle<Scalar> d_o_data(o_data, access_location::device, access_mode::readwrite);
298 Scalar pppm_virial_energy[7];
299 gpu_compute_pppm_thermo(m_Nx,
300 m_Ny,
301 m_Nz,
302 d_rho_real_space.data,
303 d_vg.data,
304 d_green_hat.data,
305 d_o_data.data,
306 d_energy_sum.data,
307 d_v_xx_sum.data,
308 d_v_xy_sum.data,
309 d_v_xz_sum.data,
310 d_v_yy_sum.data,
311 d_v_yz_sum.data,
312 d_v_zz_sum.data,
313 pppm_virial_energy,
314 m_block_size);
316 Scalar3 L = box.getL();
318 pppm_virial_energy[0] *= m_energy_virial_factor;
319 pppm_virial_energy[0] -= m_q2 * m_kappa / Scalar(1.772453850905516027298168);
320 pppm_virial_energy[0] -= 0.5*M_PI*m_q*m_q / (m_kappa*m_kappa* L.x * L.y * L.z);
322 for(int i = 1; i < 7; i++) {
323 pppm_virial_energy[i] *= m_energy_virial_factor;
326 // apply the correction to particle 0
328 ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::readwrite);
329 ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::readwrite);
330 unsigned int virial_pitch = m_virial.getPitch();
332 h_force.data[0].w += pppm_virial_energy[0];
333 h_virial.data[0*virial_pitch+0] += pppm_virial_energy[1]; // xx
334 h_virial.data[1*virial_pitch+0] += pppm_virial_energy[2]; // xy
335 h_virial.data[2*virial_pitch+0] += pppm_virial_energy[3]; // xz
336 h_virial.data[3*virial_pitch+0] += pppm_virial_energy[4]; // yy
337 h_virial.data[4*virial_pitch+0] += pppm_virial_energy[5]; // yz
338 h_virial.data[5*virial_pitch+0] += pppm_virial_energy[6]; // zz
341 if (m_prof) m_prof->pop(exec_conf);
344 void export_PPPMForceComputeGPU()
346 class_<PPPMForceComputeGPU, boost::shared_ptr<PPPMForceComputeGPU>, bases<PPPMForceCompute>, boost::noncopyable >
347 ("PPPMForceComputeGPU", init< boost::shared_ptr<SystemDefinition>,
348 boost::shared_ptr<NeighborList>,
349 boost::shared_ptr<ParticleGroup> >())
350 .def("setBlockSize", &PPPMForceComputeGPU::setBlockSize)