Merge branch 'maint'
[hoomd-blue.git] / libhoomd / computes / PPPMForceCompute.cc
blob7684c0b2210f1e695e3d6d6a4f3524737c827aa5
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: sbarr
52 #ifdef WIN32
53 #pragma warning( push )
54 #pragma warning( disable : 4244 )
55 #endif
57 #include <boost/python.hpp>
58 #include <boost/bind.hpp>
60 #include "PPPMForceCompute.h"
62 #include <iostream>
63 #include <sstream>
64 #include <stdexcept>
65 #include <math.h>
67 using namespace boost;
68 using namespace boost::python;
69 using namespace std;
71 //! Coefficients of a power expansion of sin(x)/x
72 const Scalar cpu_sinc_coeff[] = {Scalar(1.0), Scalar(-1.0/6.0), Scalar(1.0/120.0),
73 Scalar(-1.0/5040.0),Scalar(1.0/362880.0),
74 Scalar(-1.0/39916800.0)};
77 /*! \file PPPMForceCompute.cc
78 \brief Contains code for the PPPMForceCompute class
81 /*! \param sysdef System to compute forces on
82 \param nlist Neighbor list
83 \param group Particle group
84 \post Memory is allocated, and forces are zeroed.
86 PPPMForceCompute::PPPMForceCompute(boost::shared_ptr<SystemDefinition> sysdef,
87 boost::shared_ptr<NeighborList> nlist,
88 boost::shared_ptr<ParticleGroup> group)
89 : ForceCompute(sysdef), m_params_set(false), m_nlist(nlist), m_group(group),
90 fft_in(NULL), fft_ex(NULL), fft_ey(NULL), fft_ez(NULL)
92 m_exec_conf->msg->notice(5) << "Constructing PPPMForceCompute" << endl;
94 assert(m_pdata);
95 assert(m_nlist);
97 m_box_changed = false;
98 m_boxchange_connection = m_pdata->connectBoxChange(bind(&PPPMForceCompute::slotBoxChanged, this));
101 PPPMForceCompute::~PPPMForceCompute()
103 m_exec_conf->msg->notice(5) << "Destroying PPPMForceCompute" << endl;
105 if (fft_in)
106 free(fft_in);
107 if (fft_ex)
108 free(fft_ex);
109 if (fft_ey)
110 free(fft_ey);
111 if (fft_ez)
112 free(fft_ez);
114 m_boxchange_connection.disconnect();
117 /*! \param Nx Number of grid points in x direction
118 \param Ny Number of grid points in y direction
119 \param Nz Number of grid points in z direction
120 \param order Number of grid points in each direction to assign charges to
121 \param kappa Screening parameter in erfc
122 \param rcut Short-ranged cutoff, used for computing the relative force error
124 Sets parameters for the long-ranged part of the electrostatics calculation
126 void PPPMForceCompute::setParams(int Nx, int Ny, int Nz, int order, Scalar kappa, Scalar rcut)
128 m_params_set = true;
129 m_Nx = Nx;
130 m_Ny = Ny;
131 m_Nz = Nz;
132 m_order = order;
133 m_kappa = kappa;
134 m_rcut = rcut;
135 first_run = 0;
137 if(!(m_Nx == 2)&& !(m_Nx == 4)&& !(m_Nx == 8)&& !(m_Nx == 16)&& !(m_Nx == 32)&& !(m_Nx == 64)&& !(m_Nx == 128)&& !(m_Nx == 256)&& !(m_Nx == 512)&& !(m_Nx == 1024))
139 m_exec_conf->msg->warning() << "charge.pppm: PPPM X gridsize should be a power of 2 for the best performance" << endl;
141 if(!(m_Ny == 2)&& !(m_Ny == 4)&& !(m_Ny == 8)&& !(m_Ny == 16)&& !(m_Ny == 32)&& !(m_Ny == 64)&& !(m_Ny == 128)&& !(m_Ny == 256)&& !(m_Ny == 512)&& !(m_Ny == 1024))
143 m_exec_conf->msg->warning() << "charge.pppm: PPPM Y gridsize should be a power of 2 for the best performance" << endl;
145 if(!(m_Nz == 2)&& !(m_Nz == 4)&& !(m_Nz == 8)&& !(m_Nz == 16)&& !(m_Nz == 32)&& !(m_Nz == 64)&& !(m_Nz == 128)&& !(m_Nz == 256)&& !(m_Nz == 512)&& !(m_Nz == 1024))
147 m_exec_conf->msg->warning() << "charge.pppm: PPPM Z gridsize should be a power of 2 for the best performance" << endl;
149 if (m_order * (2*m_order +1) > CONSTANT_SIZE)
151 m_exec_conf->msg->error() << "charge.pppm: interpolation order too high, doesn't fit into constant array" << endl;
152 throw std::runtime_error("Error initializing PPPMForceCompute");
154 if (m_order > MaxOrder)
156 m_exec_conf->msg->error() << "charge.pppm: interpolation order too high, max is " << MaxOrder << endl;
157 throw std::runtime_error("Error initializing PPPMForceCompute");
160 GPUArray<CUFFTCOMPLEX> n_rho_real_space(Nx*Ny*Nz, exec_conf);
161 m_rho_real_space.swap(n_rho_real_space);
162 GPUArray<Scalar> n_green_hat(Nx*Ny*Nz, exec_conf);
163 m_green_hat.swap(n_green_hat);
165 GPUArray<Scalar> n_vg(6*Nx*Ny*Nz, exec_conf);
166 m_vg.swap(n_vg);
169 GPUArray<Scalar3> n_kvec(Nx*Ny*Nz, exec_conf);
170 m_kvec.swap(n_kvec);
171 GPUArray<CUFFTCOMPLEX> n_Ex(Nx*Ny*Nz, exec_conf);
172 m_Ex.swap(n_Ex);
173 GPUArray<CUFFTCOMPLEX> n_Ey(Nx*Ny*Nz, exec_conf);
174 m_Ey.swap(n_Ey);
175 GPUArray<CUFFTCOMPLEX> n_Ez(Nx*Ny*Nz, exec_conf);
176 m_Ez.swap(n_Ez);
177 GPUArray<Scalar> n_gf_b(order, exec_conf);
178 m_gf_b.swap(n_gf_b);
179 GPUArray<Scalar> n_rho_coeff(order*(2*order+1), exec_conf);
180 m_rho_coeff.swap(n_rho_coeff);
181 GPUArray<Scalar3> n_field(Nx*Ny*Nz, exec_conf);
182 m_field.swap(n_field);
183 const BoxDim& box = m_pdata->getBox();
184 ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
186 // get system charge
187 m_q = 0.f;
188 m_q2 = 0.0;
189 for(int i = 0; i < (int)m_pdata->getN(); i++) {
190 m_q += h_charge.data[i];
191 m_q2 += h_charge.data[i]*h_charge.data[i];
193 if(fabs(m_q) > 0.0)
194 m_exec_conf->msg->warning() << "charge.pppm: system in not neutral, the net charge is " << m_q << endl;
196 // compute RMS force error
197 Scalar3 L = box.getL();
198 Scalar hx = L.x/(Scalar)Nx;
199 Scalar hy = L.y/(Scalar)Ny;
200 Scalar hz = L.z/(Scalar)Nz;
201 Scalar lprx = PPPMForceCompute::rms(hx, L.x, (int)m_pdata->getN());
202 Scalar lpry = PPPMForceCompute::rms(hy, L.y, (int)m_pdata->getN());
203 Scalar lprz = PPPMForceCompute::rms(hz, L.z, (int)m_pdata->getN());
204 Scalar lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
205 Scalar spr = 2.0*m_q2*exp(-m_kappa*m_kappa*m_rcut*m_rcut) / sqrt((int)m_pdata->getN()*m_rcut*L.x*L.y*L.z);
207 double RMS_error = MAX(lpr,spr);
208 if(RMS_error > 0.1) {
209 printf("!!!!!!!\n!!!!!!!\n!!!!!!!\nWARNING RMS error of %g is probably too high %f %f\n!!!!!!!\n!!!!!!!\n!!!!!!!\n", RMS_error, lpr, spr);
211 else{
212 printf("Notice: PPPM RMS error: %g\n", RMS_error);
215 PPPMForceCompute::compute_rho_coeff();
217 // compute k vectors, virial contribution and Green's function
218 PPPMForceCompute::reset_kvec_green_hat_cpu();
220 Scalar scale = 1.0f/((Scalar)(Nx * Ny * Nz));
221 m_energy_virial_factor = 0.5 * L.x * L.y * L.z * scale * scale;
224 std::vector< std::string > PPPMForceCompute::getProvidedLogQuantities()
226 vector<string> list;
227 list.push_back("pppm_energy");
228 return list;
231 /*! \param quantity Name of the quantity to get the log value of
232 \param timestep Current time step of the simulation
234 Scalar PPPMForceCompute::getLogValue(const std::string& quantity, unsigned int timestep)
236 if (quantity == string("pppm_energy"))
238 compute(timestep);
239 Scalar energy = calcEnergySum();
240 return energy;
242 else
244 m_exec_conf->msg->error() << "charge.pppm: " << quantity << " is not a valid log quantity" << endl;
245 throw runtime_error("Error getting log value");
249 /*! Actually perform the force computation
250 \param timestep Current time step
253 void PPPMForceCompute::computeForces(unsigned int timestep)
255 if (!m_params_set)
257 m_exec_conf->msg->error() << "charge.pppm: setParams must be called prior to computeForces()" << endl;
258 throw std::runtime_error("Error computing forces in PPPMForceCompute");
261 // start by updating the neighborlist
262 m_nlist->compute(timestep);
264 // start the profile for this compute
265 if (m_prof) m_prof->push("PPPM force");
266 int dim[3];
267 dim[0] = m_Nx;
268 dim[1] = m_Ny;
269 dim[2] = m_Nz;
271 if(first_run == 0)
273 first_run = 1;
274 fft_in = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
275 fft_ex = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
276 fft_ey = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
277 fft_ez = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
279 fft_forward = kiss_fftnd_alloc(dim, 3, 0, NULL, NULL);
280 fft_inverse = kiss_fftnd_alloc(dim, 3, 1, NULL, NULL);
283 if(m_box_changed)
285 const BoxDim& box = m_pdata->getBox();
286 Scalar3 L = box.getL();
287 PPPMForceCompute::reset_kvec_green_hat_cpu();
288 Scalar scale = Scalar(1.0)/((Scalar)(m_Nx * m_Ny * m_Nz));
289 m_energy_virial_factor = 0.5 * L.x * L.y * L.z * scale * scale;
290 m_box_changed = false;
293 PPPMForceCompute::assign_charges_to_grid();
295 //FFTs go next
297 { // scoping array handles
298 ArrayHandle<CUFFTCOMPLEX> h_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
299 for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++) {
300 fft_in[i].r = (Scalar) h_rho_real_space.data[i].x;
301 fft_in[i].i = (Scalar)0.0;
304 kiss_fftnd(fft_forward, &fft_in[0], &fft_in[0]);
306 for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++) {
307 h_rho_real_space.data[i].x = fft_in[i].r;
308 h_rho_real_space.data[i].y = fft_in[i].i;
313 PPPMForceCompute::combined_green_e();
315 //More FFTs
317 { // scoping array handles
318 ArrayHandle<CUFFTCOMPLEX> h_Ex(m_Ex, access_location::host, access_mode::readwrite);
319 ArrayHandle<CUFFTCOMPLEX> h_Ey(m_Ey, access_location::host, access_mode::readwrite);
320 ArrayHandle<CUFFTCOMPLEX> h_Ez(m_Ez, access_location::host, access_mode::readwrite);
322 for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++)
324 fft_ex[i].r = (Scalar) h_Ex.data[i].x;
325 fft_ex[i].i = (Scalar) h_Ex.data[i].y;
327 fft_ey[i].r = (Scalar) h_Ey.data[i].x;
328 fft_ey[i].i = (Scalar) h_Ey.data[i].y;
330 fft_ez[i].r = (Scalar) h_Ez.data[i].x;
331 fft_ez[i].i = (Scalar) h_Ez.data[i].y;
335 kiss_fftnd(fft_inverse, &fft_ex[0], &fft_ex[0]);
336 kiss_fftnd(fft_inverse, &fft_ey[0], &fft_ey[0]);
337 kiss_fftnd(fft_inverse, &fft_ez[0], &fft_ez[0]);
339 for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++)
341 h_Ex.data[i].x = fft_ex[i].r;
342 h_Ex.data[i].y = fft_ex[i].i;
344 h_Ey.data[i].x = fft_ey[i].r;
345 h_Ey.data[i].y = fft_ey[i].i;
347 h_Ez.data[i].x = fft_ez[i].r;
348 h_Ez.data[i].y = fft_ez[i].i;
352 PPPMForceCompute::calculate_forces();
354 // If there are exclusions, correct for the long-range part of the potential
355 if( m_nlist->getExclusionsSet())
357 PPPMForceCompute::fix_exclusions_cpu();
360 // access flags and correct energy and virial if needed
361 PDataFlags flags = this->m_pdata->getFlags();
362 if (flags[pdata_flag::potential_energy] || flags[pdata_flag::pressure_tensor] || flags[pdata_flag::isotropic_virial])
364 fix_thermo_quantities();
367 if (m_prof) m_prof->pop();
370 Scalar PPPMForceCompute::rms(Scalar h, Scalar prd, Scalar natoms)
372 int m;
373 Scalar sum = 0.0;
374 Scalar acons[8][7];
376 acons[1][0] = 2.0 / 3.0;
377 acons[2][0] = 1.0 / 50.0;
378 acons[2][1] = 5.0 / 294.0;
379 acons[3][0] = 1.0 / 588.0;
380 acons[3][1] = 7.0 / 1440.0;
381 acons[3][2] = 21.0 / 3872.0;
382 acons[4][0] = 1.0 / 4320.0;
383 acons[4][1] = 3.0 / 1936.0;
384 acons[4][2] = 7601.0 / 2271360.0;
385 acons[4][3] = 143.0 / 28800.0;
386 acons[5][0] = 1.0 / 23232.0;
387 acons[5][1] = 7601.0 / 13628160.0;
388 acons[5][2] = 143.0 / 69120.0;
389 acons[5][3] = 517231.0 / 106536960.0;
390 acons[5][4] = 106640677.0 / 11737571328.0;
391 acons[6][0] = 691.0 / 68140800.0;
392 acons[6][1] = 13.0 / 57600.0;
393 acons[6][2] = 47021.0 / 35512320.0;
394 acons[6][3] = 9694607.0 / 2095994880.0;
395 acons[6][4] = 733191589.0 / 59609088000.0;
396 acons[6][5] = 326190917.0 / 11700633600.0;
397 acons[7][0] = 1.0 / 345600.0;
398 acons[7][1] = 3617.0 / 35512320.0;
399 acons[7][2] = 745739.0 / 838397952.0;
400 acons[7][3] = 56399353.0 / 12773376000.0;
401 acons[7][4] = 25091609.0 / 1560084480.0;
402 acons[7][5] = 1755948832039.0 / 36229939200000.0;
403 acons[7][6] = 4887769399.0 / 37838389248.0;
405 for (m = 0; m < m_order; m++)
406 sum += acons[m_order][m] * pow(h*m_kappa,Scalar(2.0)*(Scalar)m);
407 Scalar value = m_q2 * pow(h*m_kappa,(Scalar)m_order) *
408 sqrt(m_kappa*prd*sqrt(2.0*M_PI)*sum/natoms) / (prd*prd);
409 return value;
413 void PPPMForceCompute::compute_rho_coeff()
415 int j, k, l, m;
416 Scalar s;
417 Scalar a[136];
418 ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::readwrite);
420 // usage: a[x][y] = a[y + x*(2*m_order+1)]
422 for(l=0; l<m_order; l++)
424 for(m=0; m<(2*m_order+1); m++)
426 a[m + l*(2*m_order +1)] = Scalar(0.0);
430 for (k = -m_order; k <= m_order; k++)
431 for (l = 0; l < m_order; l++) {
432 a[(k+m_order) + l * (2*m_order+1)] = Scalar(0.0);
435 a[m_order + 0 * (2*m_order+1)] = Scalar(1.0);
436 for (j = 1; j < m_order; j++) {
437 for (k = -j; k <= j; k += 2) {
438 s = 0.0;
439 for (l = 0; l < j; l++) {
440 a[(k + m_order) + (l+1)*(2*m_order+1)] = (a[(k+1+m_order) + l * (2*m_order + 1)] - a[(k-1+m_order) + l * (2*m_order + 1)]) / (l+1);
441 s += pow(0.5,(double) (l+1)) * (a[(k-1+m_order) + l * (2*m_order + 1)] + pow(-1.0,(double) l) * a[(k+1+m_order) + l * (2*m_order + 1)] ) / (double)(l+1);
443 a[k+m_order + 0 * (2*m_order+1)] = s;
447 m = 0;
448 for (k = -(m_order-1); k < m_order; k += 2) {
449 for (l = 0; l < m_order; l++) {
450 h_rho_coeff.data[m + l*(2*m_order +1)] = a[k+m_order + l * (2*m_order + 1)];
452 m++;
456 void PPPMForceCompute::compute_gf_denom()
458 int k,l,m;
459 ArrayHandle<Scalar> h_gf_b(m_gf_b, access_location::host, access_mode::readwrite);
460 for (l = 1; l < m_order; l++) h_gf_b.data[l] = 0.0;
461 h_gf_b.data[0] = 1.0;
463 for (m = 1; m < m_order; m++) {
464 for (l = m; l > 0; l--) {
465 h_gf_b.data[l] = 4.0 * (h_gf_b.data[l]*(l-m)*(l-m-0.5)-h_gf_b.data[l-1]*(l-m-1)*(l-m-1));
467 h_gf_b.data[0] = 4.0 * (h_gf_b.data[0]*(l-m)*(l-m-0.5));
470 int ifact = 1;
471 for (k = 1; k < 2*m_order; k++) ifact *= k;
472 Scalar gaminv = 1.0/ifact;
473 for (l = 0; l < m_order; l++) h_gf_b.data[l] *= gaminv;
476 Scalar PPPMForceCompute::gf_denom(Scalar x, Scalar y, Scalar z)
478 int l ;
479 Scalar sx,sy,sz;
480 ArrayHandle<Scalar> h_gf_b(m_gf_b, access_location::host, access_mode::readwrite);
481 sz = sy = sx = 0.0;
482 for (l = m_order-1; l >= 0; l--) {
483 sx = h_gf_b.data[l] + sx*x;
484 sy = h_gf_b.data[l] + sy*y;
485 sz = h_gf_b.data[l] + sz*z;
487 Scalar s = sx*sy*sz;
488 return s*s;
492 //! GPU implementation of sinc(x)==sin(x)/x
493 inline Scalar sinc(Scalar x)
495 Scalar sinc = 0;
497 if (x*x <= Scalar(1.0))
499 Scalar term = Scalar(1.0);
500 for (unsigned int i = 0; i < 6; ++i)
502 sinc += cpu_sinc_coeff[i] * term;
503 term *= x*x;
506 else
508 sinc = sin(x)/x;
511 return sinc;
514 void PPPMForceCompute::reset_kvec_green_hat_cpu()
516 ArrayHandle<Scalar3> h_kvec(m_kvec, access_location::host, access_mode::readwrite);
517 const BoxDim& box = m_pdata->getBox();
518 Scalar3 L = box.getL();
520 // compute reciprocal lattice vectors
521 Scalar3 a1 = box.getLatticeVector(0);
522 Scalar3 a2 = box.getLatticeVector(1);
523 Scalar3 a3 = box.getLatticeVector(2);
525 Scalar V_box = box.getVolume();
526 Scalar3 b1 = Scalar(2.0*M_PI)*make_scalar3(a2.y*a3.z-a2.z*a3.y, a2.z*a3.x-a2.x*a3.z, a2.x*a3.y-a2.y*a3.x)/V_box;
527 Scalar3 b2 = Scalar(2.0*M_PI)*make_scalar3(a3.y*a1.z-a3.z*a1.y, a3.z*a1.x-a3.x*a1.z, a3.x*a1.y-a3.y*a1.x)/V_box;
528 Scalar3 b3 = Scalar(2.0*M_PI)*make_scalar3(a1.y*a2.z-a1.z*a2.y, a1.z*a2.x-a1.x*a2.z, a1.x*a2.y-a1.y*a2.x)/V_box;
530 // Set up the k-vectors
531 int ix, iy, iz, kper, lper, mper, k, l, m;
532 for (ix = 0; ix < m_Nx; ix++) {
533 Scalar3 j;
534 j.x = ix > m_Nx/2 ? ix - m_Nx : ix;
535 for (iy = 0; iy < m_Ny; iy++) {
536 j.y = iy > m_Ny/2 ? iy - m_Ny : iy;
537 for (iz = 0; iz < m_Nz; iz++) {
538 j.z = iz > m_Nz/2 ? iz - m_Nz : iz;
539 h_kvec.data[iz + m_Nz * (iy + m_Ny * ix)] = j.x*b1+j.y*b2+j.z*b3;
544 // Set up constants for virial calculation
545 ArrayHandle<Scalar> h_vg(m_vg, access_location::host, access_mode::readwrite);;
546 for(int x = 0; x < m_Nx; x++)
548 for(int y = 0; y < m_Ny; y++)
550 for(int z = 0; z < m_Nz; z++)
552 Scalar3 kvec = h_kvec.data[z + m_Nz * (y + m_Ny * x)];
553 Scalar sqk = kvec.x*kvec.x;
554 sqk += kvec.y*kvec.y;
555 sqk += kvec.z*kvec.z;
557 int grid_point = z + m_Nz * (y + m_Ny * x);
558 if (sqk == 0.0)
560 h_vg.data[0 + 6*grid_point] = Scalar(0.0);
561 h_vg.data[1 + 6*grid_point] = Scalar(0.0);
562 h_vg.data[2 + 6*grid_point] = Scalar(0.0);
563 h_vg.data[3 + 6*grid_point] = Scalar(0.0);
564 h_vg.data[4 + 6*grid_point] = Scalar(0.0);
565 h_vg.data[5 + 6*grid_point] = Scalar(0.0);
567 else
569 Scalar vterm = -2.0 * (1.0/sqk + 0.25/(m_kappa*m_kappa));
570 h_vg.data[0 + 6*grid_point] = 1.0 + vterm*kvec.x*kvec.x;
571 h_vg.data[1 + 6*grid_point] = vterm*kvec.x*kvec.y;
572 h_vg.data[2 + 6*grid_point] = vterm*kvec.x*kvec.z;
573 h_vg.data[3 + 6*grid_point] = 1.0 + vterm*kvec.y*kvec.y;
574 h_vg.data[4 + 6*grid_point] = vterm*kvec.y*kvec.z;
575 h_vg.data[5 + 6*grid_point] = 1.0 + vterm*kvec.z*kvec.z;
582 // Set up the grid based Green's function
583 ArrayHandle<Scalar> h_green_hat(m_green_hat, access_location::host, access_mode::readwrite);
584 Scalar snx, sny, snz, snx2, sny2, snz2;
585 Scalar argx, argy, argz, wx, wy, wz, qx, qy, qz;
586 Scalar sum1, dot1, dot2;
587 Scalar numerator, denominator, sqk;
589 Scalar3 kH = Scalar(2.0*M_PI)*make_scalar3(Scalar(1.0)/(Scalar)m_Nx,
590 Scalar(1.0)/(Scalar)m_Ny,
591 Scalar(1.0)/(Scalar)m_Nz);
592 Scalar xprd = L.x;
593 Scalar yprd = L.y;
594 Scalar zprd_slab = L.z;
596 Scalar form = 1.0;
598 PPPMForceCompute::compute_gf_denom();
600 Scalar temp = floor(((m_kappa*xprd/(M_PI*m_Nx)) *
601 pow(-log(EPS_HOC),0.25)));
602 int nbx = (int)temp;
604 temp = floor(((m_kappa*yprd/(M_PI*m_Ny)) *
605 pow(-log(EPS_HOC),0.25)));
606 int nby = (int)temp;
608 temp = floor(((m_kappa*zprd_slab/(M_PI*m_Nz)) *
609 pow(-log(EPS_HOC),0.25)));
610 int nbz = (int)temp;
612 Scalar3 kvec,kn, kn1, kn2, kn3;
613 Scalar arg_gauss, gauss;
615 for (m = 0; m < m_Nz; m++) {
616 mper = m - m_Nz*(2*m/m_Nz);
617 snz = sin(0.5*kH.z*mper);
618 snz2 = snz*snz;
620 for (l = 0; l < m_Ny; l++) {
621 lper = l - m_Ny*(2*l/m_Ny);
622 sny = sin(0.5*kH.y*lper);
623 sny2 = sny*sny;
625 for (k = 0; k < m_Nx; k++) {
626 kper = k - m_Nx*(2*k/m_Nx);
627 snx = sin(0.5*kH.x*kper);
628 snx2 = snx*snx;
630 kvec = kper*b1 + lper*b2 + mper*b3;
631 sqk = dot(kvec, kvec);
633 if (sqk != 0.0) {
634 numerator = form*12.5663706/sqk;
635 denominator = gf_denom(snx2,sny2,snz2);
637 sum1 = 0.0;
638 for (ix = -nbx; ix <= nbx; ix++) {
639 qx = (kper+(Scalar)(m_Nx*ix));
640 kn1 = b1*qx;
641 argx = 0.5*qx*kH.x;
642 wx = pow(sinc(argx),m_order);
643 for (iy = -nby; iy <= nby; iy++) {
644 qy = (lper+(Scalar)(m_Ny*iy));
645 kn2 = b2*qy;
646 argy = 0.5*qy*kH.y;
647 wy = pow(sinc(argy),m_order);
648 for (iz = -nbz; iz <= nbz; iz++) {
649 qz = (mper+(Scalar)(m_Nz*iz));
650 kn3 = b3*qz;
651 argz = 0.5*qz*kH.z;
652 wz = pow(sinc(argz),m_order);
654 kn = kn1 + kn2 + kn3;
656 dot1 = dot(kn,kvec);
657 dot2 = dot(kn,kn);
659 arg_gauss = Scalar(0.25)*dot2/m_kappa/m_kappa;
660 gauss = exp(-arg_gauss);
662 sum1 += (dot1/dot2) * gauss* pow(Scalar(wx*wy*wz),Scalar(2.0));
666 h_green_hat.data[m + m_Nz * (l + m_Ny * k)] = numerator*sum1/denominator;
667 } else h_green_hat.data[m + m_Nz * (l + m_Ny * k)] = 0.0;
673 void PPPMForceCompute::assign_charges_to_grid()
676 const BoxDim& box = m_pdata->getBox();
678 ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
679 ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
681 ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::read);
682 ArrayHandle<CUFFTCOMPLEX> h_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
684 memset(h_rho_real_space.data, 0, sizeof(CUFFTCOMPLEX)*m_Nx*m_Ny*m_Nz);
686 Scalar V_cell = box.getVolume()/(Scalar)(m_Nx*m_Ny*m_Nz);
688 for(int i = 0; i < (int)m_pdata->getN(); i++)
690 Scalar qi = h_charge.data[i];
691 Scalar3 posi;
692 posi.x = h_pos.data[i].x;
693 posi.y = h_pos.data[i].y;
694 posi.z = h_pos.data[i].z;
696 //normalize position to gridsize:
697 Scalar3 pos_frac = box.makeFraction(posi);
698 pos_frac.x *= (Scalar)m_Nx;
699 pos_frac.y *= (Scalar)m_Ny;
700 pos_frac.z *= (Scalar)m_Nz;
702 Scalar shift, shiftone, x0, y0, z0, dx, dy, dz;
703 int nlower, nupper, mx, my, mz, nxi, nyi, nzi;
705 nlower = -(m_order-1)/2;
706 nupper = m_order/2;
708 if (m_order % 2)
710 shift =0.5;
711 shiftone = 0.0;
713 else
715 shift = 0.0;
716 shiftone = 0.5;
719 nxi = (int)(pos_frac.x + shift);
720 nyi = (int)(pos_frac.y + shift);
721 nzi = (int)(pos_frac.z + shift);
723 dx = shiftone+(Scalar)nxi-pos_frac.x;
724 dy = shiftone+(Scalar)nyi-pos_frac.y;
725 dz = shiftone+(Scalar)nzi-pos_frac.z;
727 int n,m,l,k;
728 Scalar result;
729 int mult_fact = 2*m_order+1;
731 x0 = qi / V_cell;
732 for (n = nlower; n <= nupper; n++) {
733 mx = n+nxi;
734 if(mx >= m_Nx) mx -= m_Nx;
735 if(mx < 0) mx += m_Nx;
736 result = Scalar(0.0);
737 for (k = m_order-1; k >= 0; k--) {
738 result = h_rho_coeff.data[n-nlower + k*mult_fact] + result * dx;
740 y0 = x0*result;
741 for (m = nlower; m <= nupper; m++) {
742 my = m+nyi;
743 if(my >= m_Ny) my -= m_Ny;
744 if(my < 0) my += m_Ny;
745 result = Scalar(0.0);
746 for (k = m_order-1; k >= 0; k--) {
747 result = h_rho_coeff.data[m-nlower + k*mult_fact] + result * dy;
749 z0 = y0*result;
750 for (l = nlower; l <= nupper; l++) {
751 mz = l+nzi;
752 if(mz >= m_Nz) mz -= m_Nz;
753 if(mz < 0) mz += m_Nz;
754 result = Scalar(0.0);
755 for (k = m_order-1; k >= 0; k--) {
756 result = h_rho_coeff.data[l-nlower + k*mult_fact] + result * dz;
758 h_rho_real_space.data[mz + m_Nz * (my + m_Ny * mx)].x += z0*result;
766 void PPPMForceCompute::combined_green_e()
769 ArrayHandle<Scalar3> h_kvec(m_kvec, access_location::host, access_mode::readwrite);
770 ArrayHandle<Scalar> h_green_hat(m_green_hat, access_location::host, access_mode::readwrite);
771 ArrayHandle<CUFFTCOMPLEX> h_Ex(m_Ex, access_location::host, access_mode::readwrite);
772 ArrayHandle<CUFFTCOMPLEX> h_Ey(m_Ey, access_location::host, access_mode::readwrite);
773 ArrayHandle<CUFFTCOMPLEX> h_Ez(m_Ez, access_location::host, access_mode::readwrite);
774 ArrayHandle<CUFFTCOMPLEX> h_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
776 unsigned int NNN = m_Nx*m_Ny*m_Nz;
777 for(unsigned int i = 0; i < NNN; i++)
780 CUFFTCOMPLEX rho_local = h_rho_real_space.data[i];
781 Scalar scale_times_green = h_green_hat.data[i] / ((Scalar)(NNN));
782 rho_local.x *= scale_times_green;
783 rho_local.y *= scale_times_green;
785 h_Ex.data[i].x = h_kvec.data[i].x * rho_local.y;
786 h_Ex.data[i].y = -h_kvec.data[i].x * rho_local.x;
788 h_Ey.data[i].x = h_kvec.data[i].y * rho_local.y;
789 h_Ey.data[i].y = -h_kvec.data[i].y * rho_local.x;
791 h_Ez.data[i].x = h_kvec.data[i].z * rho_local.y;
792 h_Ez.data[i].y = -h_kvec.data[i].z * rho_local.x;
796 void PPPMForceCompute::calculate_forces()
798 const BoxDim& box = m_pdata->getBox();
800 ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
801 ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
803 ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::overwrite);
804 ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::overwrite);
806 // there are enough other checks on the input data: but it doesn't hurt to be safe
807 assert(h_force.data);
808 assert(h_virial.data);
810 // Zero data for force calculation.
811 memset((void*)h_force.data,0,sizeof(Scalar4)*m_force.getNumElements());
812 memset((void*)h_virial.data,0,sizeof(Scalar)*m_virial.getNumElements());
814 ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::read);
815 ArrayHandle<CUFFTCOMPLEX> h_Ex(m_Ex, access_location::host, access_mode::readwrite);
816 ArrayHandle<CUFFTCOMPLEX> h_Ey(m_Ey, access_location::host, access_mode::readwrite);
817 ArrayHandle<CUFFTCOMPLEX> h_Ez(m_Ez, access_location::host, access_mode::readwrite);
819 for(int i = 0; i < (int)m_pdata->getN(); i++)
821 Scalar qi = h_charge.data[i];
822 Scalar3 posi;
823 posi.x = h_pos.data[i].x;
824 posi.y = h_pos.data[i].y;
825 posi.z = h_pos.data[i].z;
827 //normalize position to gridsize:
828 Scalar3 pos_frac = box.makeFraction(posi);
829 pos_frac.x *= (Scalar)m_Nx;
830 pos_frac.y *= (Scalar)m_Ny;
831 pos_frac.z *= (Scalar)m_Nz;
833 Scalar shift, shiftone, x0, y0, z0, dx, dy, dz;
834 int nlower, nupper, mx, my, mz, nxi, nyi, nzi;
836 nlower = -(m_order-1)/2;
837 nupper = m_order/2;
839 if (m_order % 2)
841 shift =0.5;
842 shiftone = 0.0;
844 else
846 shift = 0.0;
847 shiftone = 0.5;
850 nxi = (int)(pos_frac.x + shift);
851 nyi = (int)(pos_frac.y + shift);
852 nzi = (int)(pos_frac.z + shift);
854 dx = shiftone+(Scalar)nxi-pos_frac.x;
855 dy = shiftone+(Scalar)nyi-pos_frac.y;
856 dz = shiftone+(Scalar)nzi-pos_frac.z;
858 int n,m,l,k;
859 Scalar result;
860 int mult_fact = 2*m_order+1;
861 for (n = nlower; n <= nupper; n++) {
862 mx = n+nxi;
863 if(mx >= m_Nx) mx -= m_Nx;
864 if(mx < 0) mx += m_Nx;
865 result = Scalar(0.0);
866 for (k = m_order-1; k >= 0; k--) {
867 result = h_rho_coeff.data[n-nlower + k*mult_fact] + result * dx;
869 x0 = result;
870 for (m = nlower; m <= nupper; m++) {
871 my = m+nyi;
872 if(my >= m_Ny) my -= m_Ny;
873 if(my < 0) my += m_Ny;
874 result = Scalar(0.0);
875 for (k = m_order-1; k >= 0; k--) {
876 result = h_rho_coeff.data[m-nlower + k*mult_fact] + result * dy;
878 y0 = x0*result;
879 for (l = nlower; l <= nupper; l++) {
880 mz = l+nzi;
881 if(mz >= m_Nz) mz -= m_Nz;
882 if(mz < 0) mz += m_Nz;
883 result = Scalar(0.0);
884 for (k = m_order-1; k >= 0; k--) {
885 result = h_rho_coeff.data[l-nlower + k*mult_fact] + result * dz;
887 z0 = y0*result;
888 Scalar local_field_x = h_Ex.data[mz + m_Nz * (my + m_Ny * mx)].x;
889 Scalar local_field_y = h_Ey.data[mz + m_Nz * (my + m_Ny * mx)].x;
890 Scalar local_field_z = h_Ez.data[mz + m_Nz * (my + m_Ny * mx)].x;
891 h_force.data[i].x += qi*z0*local_field_x;
892 h_force.data[i].y += qi*z0*local_field_y;
893 h_force.data[i].z += qi*z0*local_field_z;
901 void PPPMForceCompute::fix_exclusions_cpu()
903 unsigned int group_size = m_group->getNumMembers();
904 // just drop out if the group is an empty group
905 if (group_size == 0)
906 return;
908 ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::readwrite);
909 ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::readwrite);
910 unsigned int virial_pitch = m_virial.getPitch();
912 // there are enough other checks on the input data: but it doesn't hurt to be safe
913 assert(h_force.data);
915 ArrayHandle< unsigned int > d_group_members(m_group->getIndexArray(), access_location::host, access_mode::read);
916 const BoxDim& box = m_pdata->getBox();
917 ArrayHandle<unsigned int> d_exlist(m_nlist->getExListArray(), access_location::host, access_mode::read);
918 ArrayHandle<unsigned int> d_n_ex(m_nlist->getNExArray(), access_location::host, access_mode::read);
919 Index2D nex = m_nlist->getExListIndexer();
921 ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
922 ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
924 for(unsigned int i = 0; i < group_size; i++)
926 Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
927 Scalar virial[6];
928 for (unsigned int k = 0; k < 6; k++)
929 virial[k] = Scalar(0.0);
930 unsigned int idx = d_group_members.data[i];
931 Scalar3 posi;
932 posi.x = h_pos.data[idx].x;
933 posi.y = h_pos.data[idx].y;
934 posi.z = h_pos.data[idx].z;
935 Scalar qi = h_charge.data[idx];
937 unsigned int n_neigh = d_n_ex.data[idx];
938 const Scalar sqrtpi = sqrtf(M_PI);
939 unsigned int cur_j = 0;
941 for (unsigned int neigh_idx = 0; neigh_idx < n_neigh; neigh_idx++)
943 cur_j = d_exlist.data[nex(idx, neigh_idx)];
944 // get the neighbor's position
945 Scalar3 posj;
946 posj.x = h_pos.data[cur_j].x;
947 posj.y = h_pos.data[cur_j].y;
948 posj.z = h_pos.data[cur_j].z;
949 Scalar qj = h_charge.data[cur_j];
950 Scalar3 dx = posi - posj;
952 // apply periodic boundary conditions:
953 dx = box.minImage(dx);
955 Scalar rsq = dot(dx, dx);
956 Scalar r = sqrtf(rsq);
957 Scalar qiqj = qi * qj;
958 Scalar erffac = erf(m_kappa * r) / r;
959 Scalar force_divr = qiqj * (-Scalar(2.0) * exp(-rsq * m_kappa * m_kappa) * m_kappa / (sqrtpi * rsq) + erffac / rsq);
960 Scalar pair_eng = qiqj * erffac;
961 virial[0]+= Scalar(0.5) * dx.x * dx.x * force_divr;
962 virial[1]+= Scalar(0.5) * dx.y * dx.x * force_divr;
963 virial[2]+= Scalar(0.5) * dx.z * dx.x * force_divr;
964 virial[3]+= Scalar(0.5) * dx.y * dx.y * force_divr;
965 virial[4]+= Scalar(0.5) * dx.z * dx.y * force_divr;
966 virial[5]+= Scalar(0.5) * dx.z * dx.z * force_divr;
967 force.x += dx.x * force_divr;
968 force.y += dx.y * force_divr;
969 force.z += dx.z * force_divr;
970 force.w += pair_eng;
972 force.w *= Scalar(0.5);
973 h_force.data[idx].x -= force.x;
974 h_force.data[idx].y -= force.y;
975 h_force.data[idx].z -= force.z;
976 h_force.data[idx].w = -force.w;
977 for (unsigned int k = 0; k < 6; k++)
978 h_virial.data[k*virial_pitch+idx] = -virial[k];
983 /*! Computes the additional energy and virial contributed by PPPM
984 \note The additional terms are simply added onto particle 0 so that they will be accounted for by
985 ComputeThermo
987 void PPPMForceCompute::fix_thermo_quantities()
989 // access data arrays
990 BoxDim box = m_pdata->getBox();
991 Scalar3 L = box.getL();
993 ArrayHandle<CUFFTCOMPLEX> d_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
994 ArrayHandle<Scalar> d_green_hat(m_green_hat, access_location::host, access_mode::readwrite);
995 ArrayHandle<Scalar> d_vg(m_vg, access_location::host, access_mode::readwrite);
996 Scalar2 pppm_virial_energy = make_scalar2(0.0, 0.0);
998 Scalar v_xx=0.0, v_xy=0.0, v_xz=0.0, v_yy=0.0, v_yz=0.0, v_zz=0.0;
1002 // compute the correction
1003 for (int i = 0; i < m_Nx*m_Ny*m_Nz; i++)
1005 Scalar energy = d_green_hat.data[i]*(d_rho_real_space.data[i].x*d_rho_real_space.data[i].x +
1006 d_rho_real_space.data[i].y*d_rho_real_space.data[i].y);
1007 Scalar pressure = energy*(d_vg.data[0+6*i] + d_vg.data[3+6*i] + d_vg.data[5+6*i]);
1008 v_xx += d_vg.data[0+6*i]*energy;
1009 v_xy += d_vg.data[1+6*i]*energy;
1010 v_xz += d_vg.data[2+6*i]*energy;
1011 v_yy += d_vg.data[3+6*i]*energy;
1012 v_yz += d_vg.data[4+6*i]*energy;
1013 v_zz += d_vg.data[5+6*i]*energy;
1014 pppm_virial_energy.x += pressure;
1015 pppm_virial_energy.y += energy;
1018 pppm_virial_energy.x *= m_energy_virial_factor/ (Scalar(3.0) * L.x * L.y * L.z);
1019 pppm_virial_energy.y *= m_energy_virial_factor;
1020 pppm_virial_energy.y -= m_q2 * m_kappa / Scalar(1.772453850905516027298168);
1021 pppm_virial_energy.y -= 0.5*M_PI*m_q*m_q / (m_kappa*m_kappa* L.x * L.y * L.z);
1023 // apply the correction to particle 0
1024 ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::readwrite);
1025 ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::readwrite);
1026 h_force.data[0].w += pppm_virial_energy.y;
1029 // Compute full virial tensor
1030 unsigned int virial_pitch = m_virial.getPitch();
1031 h_virial.data[0*virial_pitch+0] += v_xx*m_energy_virial_factor;
1032 h_virial.data[1*virial_pitch+0] += v_xy*m_energy_virial_factor;
1033 h_virial.data[2*virial_pitch+0] += v_xz*m_energy_virial_factor;
1034 h_virial.data[3*virial_pitch+0] += v_yy*m_energy_virial_factor;
1035 h_virial.data[4*virial_pitch+0] += v_yz*m_energy_virial_factor;
1036 h_virial.data[5*virial_pitch+0] += v_zz*m_energy_virial_factor;
1039 void export_PPPMForceCompute()
1041 class_<PPPMForceCompute, boost::shared_ptr<PPPMForceCompute>, bases<ForceCompute>, boost::noncopyable >
1042 ("PPPMForceCompute", init< boost::shared_ptr<SystemDefinition>,
1043 boost::shared_ptr<NeighborList>,
1044 boost::shared_ptr<ParticleGroup> >())
1045 .def("setParams", &PPPMForceCompute::setParams)