libhoomd/computes/PPPMForceCompute.cc

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: sbarr
  51
  52 #ifdef WIN32
  53 #pragma warning( push )
  54 #pragma warning( disable : 4244 )
  55 #endif
  56
  57 #include <boost/python.hpp>
  58 #include <boost/bind.hpp>
  59
  60 #include "PPPMForceCompute.h"
  61
  62 #include <iostream>
  63 #include <sstream>
  64 #include <stdexcept>
  65 #include <math.h>
  66
  67 using namespace boost;
  68 using namespace boost::python;
  69 using namespace std;
  70
  71 //! Coefficients of a power expansion of sin(x)/x
  72 const Scalar cpu_sinc_coeff[] = {Scalar(1.0), Scalar(-1.0/6.0), Scalar(1.0/120.0),
  73                         Scalar(-1.0/5040.0),Scalar(1.0/362880.0),
  74                         Scalar(-1.0/39916800.0)};
  75
  76
  77 /*! \file PPPMForceCompute.cc
  78     \brief Contains code for the PPPMForceCompute class
  79 */
  80
  81 /*! \param sysdef System to compute forces on
  82     \param nlist Neighbor list
  83     \param group Particle group
  84     \post Memory is allocated, and forces are zeroed.
  85 */
  86 PPPMForceCompute::PPPMForceCompute(boost::shared_ptr<SystemDefinition> sysdef,
  87                                    boost::shared_ptr<NeighborList> nlist,
  88                                    boost::shared_ptr<ParticleGroup> group)
  89     : ForceCompute(sysdef), m_params_set(false), m_nlist(nlist), m_group(group),
  90       fft_in(NULL), fft_ex(NULL), fft_ey(NULL), fft_ez(NULL)
  91     {
  92     m_exec_conf->msg->notice(5) << "Constructing PPPMForceCompute" << endl;
  93
  94     assert(m_pdata);
  95     assert(m_nlist);
  96
  97     m_box_changed = false;
  98     m_boxchange_connection = m_pdata->connectBoxChange(bind(&PPPMForceCompute::slotBoxChanged, this));
  99     }
 100
 101 PPPMForceCompute::~PPPMForceCompute()
 102     {
 103     m_exec_conf->msg->notice(5) << "Destroying PPPMForceCompute" << endl;
 104
 105     if (fft_in)
 106         free(fft_in);
 107     if (fft_ex)
 108         free(fft_ex);
 109     if (fft_ey)
 110         free(fft_ey);
 111     if (fft_ez)
 112         free(fft_ez);
 113
 114     m_boxchange_connection.disconnect();
 115     }
 116
 117 /*! \param Nx Number of grid points in x direction
 118     \param Ny Number of grid points in y direction
 119     \param Nz Number of grid points in z direction
 120     \param order Number of grid points in each direction to assign charges to
 121     \param kappa Screening parameter in erfc
 122     \param rcut Short-ranged cutoff, used for computing the relative force error
 123
 124     Sets parameters for the long-ranged part of the electrostatics calculation
 125 */
 126 void PPPMForceCompute::setParams(int Nx, int Ny, int Nz, int order, Scalar kappa, Scalar rcut)
 127     {
 128     m_params_set = true;
 129     m_Nx = Nx;
 130     m_Ny = Ny;
 131     m_Nz = Nz;
 132     m_order = order;
 133     m_kappa = kappa;
 134     m_rcut = rcut;
 135     first_run = 0;
 136
 137     if(!(m_Nx == 2)&& !(m_Nx == 4)&& !(m_Nx == 8)&& !(m_Nx == 16)&& !(m_Nx == 32)&& !(m_Nx == 64)&& !(m_Nx == 128)&& !(m_Nx == 256)&& !(m_Nx == 512)&& !(m_Nx == 1024))
 138         {
 139         m_exec_conf->msg->warning() << "charge.pppm: PPPM X gridsize should be a power of 2 for the best performance" << endl;
 140         }
 141     if(!(m_Ny == 2)&& !(m_Ny == 4)&& !(m_Ny == 8)&& !(m_Ny == 16)&& !(m_Ny == 32)&& !(m_Ny == 64)&& !(m_Ny == 128)&& !(m_Ny == 256)&& !(m_Ny == 512)&& !(m_Ny == 1024))
 142         {
 143         m_exec_conf->msg->warning() << "charge.pppm: PPPM Y gridsize should be a power of 2 for the best performance" << endl;
 144         }
 145     if(!(m_Nz == 2)&& !(m_Nz == 4)&& !(m_Nz == 8)&& !(m_Nz == 16)&& !(m_Nz == 32)&& !(m_Nz == 64)&& !(m_Nz == 128)&& !(m_Nz == 256)&& !(m_Nz == 512)&& !(m_Nz == 1024))
 146         {
 147         m_exec_conf->msg->warning() << "charge.pppm: PPPM Z gridsize should be a power of 2 for the best performance" << endl;
 148         }
 149     if (m_order * (2*m_order +1) > CONSTANT_SIZE)
 150         {
 151         m_exec_conf->msg->error() << "charge.pppm: interpolation order too high, doesn't fit into constant array" << endl;
 152         throw std::runtime_error("Error initializing PPPMForceCompute");
 153         }
 154     if (m_order > MaxOrder)
 155         {
 156         m_exec_conf->msg->error() << "charge.pppm: interpolation order too high, max is " << MaxOrder << endl;
 157         throw std::runtime_error("Error initializing PPPMForceCompute");
 158         }
 159
 160     GPUArray<CUFFTCOMPLEX> n_rho_real_space(Nx*Ny*Nz, exec_conf);
 161     m_rho_real_space.swap(n_rho_real_space);
 162     GPUArray<Scalar> n_green_hat(Nx*Ny*Nz, exec_conf);
 163     m_green_hat.swap(n_green_hat);
 164
 165     GPUArray<Scalar> n_vg(6*Nx*Ny*Nz, exec_conf);
 166     m_vg.swap(n_vg);
 167
 168
 169     GPUArray<Scalar3> n_kvec(Nx*Ny*Nz, exec_conf);
 170     m_kvec.swap(n_kvec);
 171     GPUArray<CUFFTCOMPLEX> n_Ex(Nx*Ny*Nz, exec_conf);
 172     m_Ex.swap(n_Ex);
 173     GPUArray<CUFFTCOMPLEX> n_Ey(Nx*Ny*Nz, exec_conf);
 174     m_Ey.swap(n_Ey);
 175     GPUArray<CUFFTCOMPLEX> n_Ez(Nx*Ny*Nz, exec_conf);
 176     m_Ez.swap(n_Ez);
 177     GPUArray<Scalar> n_gf_b(order, exec_conf);
 178     m_gf_b.swap(n_gf_b);
 179     GPUArray<Scalar> n_rho_coeff(order*(2*order+1), exec_conf);
 180     m_rho_coeff.swap(n_rho_coeff);
 181     GPUArray<Scalar3> n_field(Nx*Ny*Nz, exec_conf);
 182     m_field.swap(n_field);
 183     const BoxDim& box = m_pdata->getBox();
 184     ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
 185
 186     // get system charge
 187     m_q = 0.f;
 188     m_q2 = 0.0;
 189     for(int i = 0; i < (int)m_pdata->getN(); i++) {
 190         m_q += h_charge.data[i];
 191         m_q2 += h_charge.data[i]*h_charge.data[i];
 192         }
 193     if(fabs(m_q) > 0.0)
 194         m_exec_conf->msg->warning() << "charge.pppm: system in not neutral, the net charge is " << m_q << endl;
 195
 196     // compute RMS force error
 197     Scalar3 L = box.getL();
 198     Scalar hx =  L.x/(Scalar)Nx;
 199     Scalar hy =  L.y/(Scalar)Ny;
 200     Scalar hz =  L.z/(Scalar)Nz;
 201     Scalar lprx = PPPMForceCompute::rms(hx, L.x, (int)m_pdata->getN());
 202     Scalar lpry = PPPMForceCompute::rms(hy, L.y, (int)m_pdata->getN());
 203     Scalar lprz = PPPMForceCompute::rms(hz, L.z, (int)m_pdata->getN());
 204     Scalar lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
 205     Scalar spr = 2.0*m_q2*exp(-m_kappa*m_kappa*m_rcut*m_rcut) / sqrt((int)m_pdata->getN()*m_rcut*L.x*L.y*L.z);
 206
 207     double RMS_error = MAX(lpr,spr);
 208     if(RMS_error > 0.1) {
 209         printf("!!!!!!!\n!!!!!!!\n!!!!!!!\nWARNING RMS error of %g is probably too high %f %f\n!!!!!!!\n!!!!!!!\n!!!!!!!\n", RMS_error, lpr, spr);
 210         }
 211     else{
 212         printf("Notice: PPPM RMS error: %g\n", RMS_error);
 213         }
 214
 215     PPPMForceCompute::compute_rho_coeff();
 216
 217     // compute k vectors, virial contribution and Green's function
 218     PPPMForceCompute::reset_kvec_green_hat_cpu();
 219
 220     Scalar scale = 1.0f/((Scalar)(Nx * Ny * Nz));
 221     m_energy_virial_factor = 0.5 * L.x * L.y * L.z * scale * scale;
 222     }
 223
 224 std::vector< std::string > PPPMForceCompute::getProvidedLogQuantities()
 225     {
 226     vector<string> list;
 227     list.push_back("pppm_energy");
 228     return list;
 229     }
 230
 231 /*! \param quantity Name of the quantity to get the log value of
 232   \param timestep Current time step of the simulation
 233 */
 234 Scalar PPPMForceCompute::getLogValue(const std::string& quantity, unsigned int timestep)
 235     {
 236     if (quantity == string("pppm_energy"))
 237         {
 238         compute(timestep);
 239         Scalar energy = calcEnergySum();
 240         return energy;
 241         }
 242     else
 243         {
 244         m_exec_conf->msg->error() << "charge.pppm: " << quantity << " is not a valid log quantity" << endl;
 245         throw runtime_error("Error getting log value");
 246         }
 247     }
 248
 249 /*! Actually perform the force computation
 250   \param timestep Current time step
 251 */
 252
 253 void PPPMForceCompute::computeForces(unsigned int timestep)
 254     {
 255     if (!m_params_set)
 256         {
 257         m_exec_conf->msg->error() << "charge.pppm: setParams must be called prior to computeForces()" << endl;
 258         throw std::runtime_error("Error computing forces in PPPMForceCompute");
 259         }
 260
 261     // start by updating the neighborlist
 262     m_nlist->compute(timestep);
 263
 264     // start the profile for this compute
 265     if (m_prof) m_prof->push("PPPM force");
 266     int dim[3];
 267     dim[0] = m_Nx;
 268     dim[1] = m_Ny;
 269     dim[2] = m_Nz;
 270
 271     if(first_run == 0)
 272         {
 273         first_run = 1;
 274         fft_in = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
 275         fft_ex = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
 276         fft_ey = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
 277         fft_ez = (kiss_fft_cpx *)malloc(m_Nx*m_Ny*m_Nz*sizeof(kiss_fft_cpx));
 278
 279         fft_forward = kiss_fftnd_alloc(dim, 3, 0, NULL, NULL);
 280         fft_inverse = kiss_fftnd_alloc(dim, 3, 1, NULL, NULL);
 281         }
 282
 283     if(m_box_changed)
 284         {
 285         const BoxDim& box = m_pdata->getBox();
 286         Scalar3 L = box.getL();
 287         PPPMForceCompute::reset_kvec_green_hat_cpu();
 288         Scalar scale = Scalar(1.0)/((Scalar)(m_Nx * m_Ny * m_Nz));
 289         m_energy_virial_factor = 0.5 * L.x * L.y * L.z * scale * scale;
 290         m_box_changed = false;
 291         }
 292
 293     PPPMForceCompute::assign_charges_to_grid();
 294
 295 //FFTs go next
 296
 297         { // scoping array handles
 298         ArrayHandle<CUFFTCOMPLEX> h_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
 299         for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++) {
 300             fft_in[i].r = (Scalar) h_rho_real_space.data[i].x;
 301             fft_in[i].i = (Scalar)0.0;
 302             }
 303
 304         kiss_fftnd(fft_forward, &fft_in[0], &fft_in[0]);
 305
 306         for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++) {
 307             h_rho_real_space.data[i].x = fft_in[i].r;
 308             h_rho_real_space.data[i].y = fft_in[i].i;
 309
 310             }
 311         }
 312
 313     PPPMForceCompute::combined_green_e();
 314
 315 //More FFTs
 316
 317         { // scoping array handles
 318         ArrayHandle<CUFFTCOMPLEX> h_Ex(m_Ex, access_location::host, access_mode::readwrite);
 319         ArrayHandle<CUFFTCOMPLEX> h_Ey(m_Ey, access_location::host, access_mode::readwrite);
 320         ArrayHandle<CUFFTCOMPLEX> h_Ez(m_Ez, access_location::host, access_mode::readwrite);
 321
 322         for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++)
 323             {
 324             fft_ex[i].r = (Scalar) h_Ex.data[i].x;
 325             fft_ex[i].i = (Scalar) h_Ex.data[i].y;
 326
 327             fft_ey[i].r = (Scalar) h_Ey.data[i].x;
 328             fft_ey[i].i = (Scalar) h_Ey.data[i].y;
 329
 330             fft_ez[i].r = (Scalar) h_Ez.data[i].x;
 331             fft_ez[i].i = (Scalar) h_Ez.data[i].y;
 332             }
 333
 334
 335         kiss_fftnd(fft_inverse, &fft_ex[0], &fft_ex[0]);
 336         kiss_fftnd(fft_inverse, &fft_ey[0], &fft_ey[0]);
 337         kiss_fftnd(fft_inverse, &fft_ez[0], &fft_ez[0]);
 338
 339         for(int i = 0; i < m_Nx * m_Ny * m_Nz ; i++)
 340             {
 341             h_Ex.data[i].x = fft_ex[i].r;
 342             h_Ex.data[i].y = fft_ex[i].i;
 343
 344             h_Ey.data[i].x = fft_ey[i].r;
 345             h_Ey.data[i].y = fft_ey[i].i;
 346
 347             h_Ez.data[i].x = fft_ez[i].r;
 348             h_Ez.data[i].y = fft_ez[i].i;
 349             }
 350         }
 351
 352     PPPMForceCompute::calculate_forces();
 353
 354     // If there are exclusions, correct for the long-range part of the potential
 355     if( m_nlist->getExclusionsSet())
 356         {
 357         PPPMForceCompute::fix_exclusions_cpu();
 358         }
 359
 360     // access flags and correct energy and virial if needed
 361     PDataFlags flags = this->m_pdata->getFlags();
 362     if (flags[pdata_flag::potential_energy] || flags[pdata_flag::pressure_tensor] || flags[pdata_flag::isotropic_virial])
 363         {
 364         fix_thermo_quantities();
 365         }
 366
 367     if (m_prof) m_prof->pop();
 368     }
 369
 370 Scalar PPPMForceCompute::rms(Scalar h, Scalar prd, Scalar natoms)
 371     {
 372     int m;
 373     Scalar sum = 0.0;
 374     Scalar acons[8][7];
 375
 376     acons[1][0] = 2.0 / 3.0;
 377     acons[2][0] = 1.0 / 50.0;
 378     acons[2][1] = 5.0 / 294.0;
 379     acons[3][0] = 1.0 / 588.0;
 380     acons[3][1] = 7.0 / 1440.0;
 381     acons[3][2] = 21.0 / 3872.0;
 382     acons[4][0] = 1.0 / 4320.0;
 383     acons[4][1] = 3.0 / 1936.0;
 384     acons[4][2] = 7601.0 / 2271360.0;
 385     acons[4][3] = 143.0 / 28800.0;
 386     acons[5][0] = 1.0 / 23232.0;
 387     acons[5][1] = 7601.0 / 13628160.0;
 388     acons[5][2] = 143.0 / 69120.0;
 389     acons[5][3] = 517231.0 / 106536960.0;
 390     acons[5][4] = 106640677.0 / 11737571328.0;
 391     acons[6][0] = 691.0 / 68140800.0;
 392     acons[6][1] = 13.0 / 57600.0;
 393     acons[6][2] = 47021.0 / 35512320.0;
 394     acons[6][3] = 9694607.0 / 2095994880.0;
 395     acons[6][4] = 733191589.0 / 59609088000.0;
 396     acons[6][5] = 326190917.0 / 11700633600.0;
 397     acons[7][0] = 1.0 / 345600.0;
 398     acons[7][1] = 3617.0 / 35512320.0;
 399     acons[7][2] = 745739.0 / 838397952.0;
 400     acons[7][3] = 56399353.0 / 12773376000.0;
 401     acons[7][4] = 25091609.0 / 1560084480.0;
 402     acons[7][5] = 1755948832039.0 / 36229939200000.0;
 403     acons[7][6] = 4887769399.0 / 37838389248.0;
 404
 405     for (m = 0; m < m_order; m++)
 406         sum += acons[m_order][m] * pow(h*m_kappa,Scalar(2.0)*(Scalar)m);
 407     Scalar value = m_q2 * pow(h*m_kappa,(Scalar)m_order) *
 408         sqrt(m_kappa*prd*sqrt(2.0*M_PI)*sum/natoms) / (prd*prd);
 409     return value;
 410     }
 411
 412
 413 void PPPMForceCompute::compute_rho_coeff()
 414     {
 415     int j, k, l, m;
 416     Scalar s;
 417     Scalar a[136];
 418     ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::readwrite);
 419
 420     //    usage: a[x][y] = a[y + x*(2*m_order+1)]
 421
 422     for(l=0; l<m_order; l++)
 423         {
 424         for(m=0; m<(2*m_order+1); m++)
 425             {
 426             a[m + l*(2*m_order +1)] = Scalar(0.0);
 427             }
 428         }
 429
 430     for (k = -m_order; k <= m_order; k++)
 431         for (l = 0; l < m_order; l++) {
 432             a[(k+m_order) + l * (2*m_order+1)] = Scalar(0.0);
 433             }
 434
 435     a[m_order + 0 * (2*m_order+1)] = Scalar(1.0);
 436     for (j = 1; j < m_order; j++) {
 437         for (k = -j; k <= j; k += 2) {
 438             s = 0.0;
 439             for (l = 0; l < j; l++) {
 440                 a[(k + m_order) + (l+1)*(2*m_order+1)] = (a[(k+1+m_order) + l * (2*m_order + 1)] - a[(k-1+m_order) + l * (2*m_order + 1)]) / (l+1);
 441                 s += pow(0.5,(double) (l+1)) * (a[(k-1+m_order) + l * (2*m_order + 1)] + pow(-1.0,(double) l) * a[(k+1+m_order) + l * (2*m_order + 1)] ) / (double)(l+1);
 442                 }
 443             a[k+m_order + 0 * (2*m_order+1)] = s;
 444             }
 445         }
 446
 447     m = 0;
 448     for (k = -(m_order-1); k < m_order; k += 2) {
 449         for (l = 0; l < m_order; l++) {
 450             h_rho_coeff.data[m + l*(2*m_order +1)] = a[k+m_order + l * (2*m_order + 1)];
 451             }
 452         m++;
 453         }
 454     }
 455
 456 void PPPMForceCompute::compute_gf_denom()
 457     {
 458     int k,l,m;
 459       ArrayHandle<Scalar> h_gf_b(m_gf_b, access_location::host, access_mode::readwrite);
 460     for (l = 1; l < m_order; l++) h_gf_b.data[l] = 0.0;
 461     h_gf_b.data[0] = 1.0;
 462
 463     for (m = 1; m < m_order; m++) {
 464         for (l = m; l > 0; l--) {
 465             h_gf_b.data[l] = 4.0 * (h_gf_b.data[l]*(l-m)*(l-m-0.5)-h_gf_b.data[l-1]*(l-m-1)*(l-m-1));
 466             }
 467         h_gf_b.data[0] = 4.0 * (h_gf_b.data[0]*(l-m)*(l-m-0.5));
 468     }
 469
 470     int ifact = 1;
 471     for (k = 1; k < 2*m_order; k++) ifact *= k;
 472     Scalar gaminv = 1.0/ifact;
 473     for (l = 0; l < m_order; l++) h_gf_b.data[l] *= gaminv;
 474     }
 475
 476 Scalar PPPMForceCompute::gf_denom(Scalar x, Scalar y, Scalar z)
 477     {
 478     int l ;
 479     Scalar sx,sy,sz;
 480     ArrayHandle<Scalar> h_gf_b(m_gf_b, access_location::host, access_mode::readwrite);
 481     sz = sy = sx = 0.0;
 482     for (l = m_order-1; l >= 0; l--) {
 483         sx = h_gf_b.data[l] + sx*x;
 484         sy = h_gf_b.data[l] + sy*y;
 485         sz = h_gf_b.data[l] + sz*z;
 486         }
 487     Scalar s = sx*sy*sz;
 488     return s*s;
 489     }
 490
 491
 492 //! GPU implementation of sinc(x)==sin(x)/x
 493 inline Scalar sinc(Scalar x)
 494     {
 495     Scalar sinc = 0;
 496
 497     if (x*x <= Scalar(1.0))
 498         {
 499         Scalar term = Scalar(1.0);
 500         for (unsigned int i = 0; i < 6; ++i)
 501            {
 502            sinc += cpu_sinc_coeff[i] * term;
 503            term *= x*x;
 504            }
 505         }
 506     else
 507         {
 508         sinc = sin(x)/x;
 509         }
 510
 511     return sinc;
 512     }
 513
 514 void PPPMForceCompute::reset_kvec_green_hat_cpu()
 515     {
 516     ArrayHandle<Scalar3> h_kvec(m_kvec, access_location::host, access_mode::readwrite);
 517     const BoxDim& box = m_pdata->getBox();
 518     Scalar3 L = box.getL();
 519
 520     // compute reciprocal lattice vectors
 521     Scalar3 a1 = box.getLatticeVector(0);
 522     Scalar3 a2 = box.getLatticeVector(1);
 523     Scalar3 a3 = box.getLatticeVector(2);
 524
 525     Scalar V_box = box.getVolume();
 526     Scalar3 b1 = Scalar(2.0*M_PI)*make_scalar3(a2.y*a3.z-a2.z*a3.y, a2.z*a3.x-a2.x*a3.z, a2.x*a3.y-a2.y*a3.x)/V_box;
 527     Scalar3 b2 = Scalar(2.0*M_PI)*make_scalar3(a3.y*a1.z-a3.z*a1.y, a3.z*a1.x-a3.x*a1.z, a3.x*a1.y-a3.y*a1.x)/V_box;
 528     Scalar3 b3 = Scalar(2.0*M_PI)*make_scalar3(a1.y*a2.z-a1.z*a2.y, a1.z*a2.x-a1.x*a2.z, a1.x*a2.y-a1.y*a2.x)/V_box;
 529
 530     // Set up the k-vectors
 531     int ix, iy, iz, kper, lper, mper, k, l, m;
 532     for (ix = 0; ix < m_Nx; ix++) {
 533         Scalar3 j;
 534         j.x = ix > m_Nx/2 ? ix - m_Nx : ix;
 535         for (iy = 0; iy < m_Ny; iy++) {
 536             j.y = iy > m_Ny/2 ? iy - m_Ny : iy;
 537             for (iz = 0; iz < m_Nz; iz++) {
 538                 j.z = iz > m_Nz/2 ? iz - m_Nz : iz;
 539                 h_kvec.data[iz + m_Nz * (iy + m_Ny * ix)] =  j.x*b1+j.y*b2+j.z*b3;
 540                 }
 541             }
 542         }
 543
 544     // Set up constants for virial calculation
 545     ArrayHandle<Scalar> h_vg(m_vg, access_location::host, access_mode::readwrite);;
 546     for(int x = 0; x < m_Nx; x++)
 547         {
 548         for(int y = 0; y < m_Ny; y++)
 549             {
 550             for(int z = 0; z < m_Nz; z++)
 551                 {
 552                 Scalar3 kvec = h_kvec.data[z + m_Nz * (y + m_Ny * x)];
 553                 Scalar sqk =  kvec.x*kvec.x;
 554                 sqk += kvec.y*kvec.y;
 555                 sqk += kvec.z*kvec.z;
 556
 557                 int grid_point = z + m_Nz * (y + m_Ny * x);
 558                 if (sqk == 0.0)
 559                     {
 560                     h_vg.data[0 + 6*grid_point] = Scalar(0.0);
 561                     h_vg.data[1 + 6*grid_point] = Scalar(0.0);
 562                     h_vg.data[2 + 6*grid_point] = Scalar(0.0);
 563                     h_vg.data[3 + 6*grid_point] = Scalar(0.0);
 564                     h_vg.data[4 + 6*grid_point] = Scalar(0.0);
 565                     h_vg.data[5 + 6*grid_point] = Scalar(0.0);
 566                     }
 567                 else
 568                     {
 569                     Scalar vterm = -2.0 * (1.0/sqk + 0.25/(m_kappa*m_kappa));
 570                     h_vg.data[0 + 6*grid_point] =  1.0 + vterm*kvec.x*kvec.x;
 571                     h_vg.data[1 + 6*grid_point] =        vterm*kvec.x*kvec.y;
 572                     h_vg.data[2 + 6*grid_point] =        vterm*kvec.x*kvec.z;
 573                     h_vg.data[3 + 6*grid_point] =  1.0 + vterm*kvec.y*kvec.y;
 574                     h_vg.data[4 + 6*grid_point] =        vterm*kvec.y*kvec.z;
 575                     h_vg.data[5 + 6*grid_point] =  1.0 + vterm*kvec.z*kvec.z;
 576                     }
 577                 }
 578             }
 579         }
 580
 581
 582     // Set up the grid based Green's function
 583     ArrayHandle<Scalar> h_green_hat(m_green_hat, access_location::host, access_mode::readwrite);
 584     Scalar snx, sny, snz, snx2, sny2, snz2;
 585     Scalar argx, argy, argz, wx, wy, wz, qx, qy, qz;
 586     Scalar sum1, dot1, dot2;
 587     Scalar numerator, denominator, sqk;
 588
 589     Scalar3 kH = Scalar(2.0*M_PI)*make_scalar3(Scalar(1.0)/(Scalar)m_Nx,
 590                                                Scalar(1.0)/(Scalar)m_Ny,
 591                                                Scalar(1.0)/(Scalar)m_Nz);
 592     Scalar xprd = L.x;
 593     Scalar yprd = L.y;
 594     Scalar zprd_slab = L.z;
 595
 596     Scalar form = 1.0;
 597
 598     PPPMForceCompute::compute_gf_denom();
 599
 600     Scalar temp = floor(((m_kappa*xprd/(M_PI*m_Nx)) *
 601                          pow(-log(EPS_HOC),0.25)));
 602     int nbx = (int)temp;
 603
 604     temp = floor(((m_kappa*yprd/(M_PI*m_Ny)) *
 605                   pow(-log(EPS_HOC),0.25)));
 606     int nby = (int)temp;
 607
 608     temp =  floor(((m_kappa*zprd_slab/(M_PI*m_Nz)) *
 609                    pow(-log(EPS_HOC),0.25)));
 610     int nbz = (int)temp;
 611
 612     Scalar3 kvec,kn, kn1, kn2, kn3;
 613     Scalar arg_gauss, gauss;
 614
 615     for (m = 0; m < m_Nz; m++) {
 616         mper = m - m_Nz*(2*m/m_Nz);
 617         snz = sin(0.5*kH.z*mper);
 618         snz2 = snz*snz;
 619
 620         for (l = 0; l < m_Ny; l++) {
 621             lper = l - m_Ny*(2*l/m_Ny);
 622             sny = sin(0.5*kH.y*lper);
 623             sny2 = sny*sny;
 624
 625             for (k = 0; k < m_Nx; k++) {
 626                 kper = k - m_Nx*(2*k/m_Nx);
 627                 snx = sin(0.5*kH.x*kper);
 628                 snx2 = snx*snx;
 629
 630                 kvec = kper*b1 + lper*b2 + mper*b3;
 631                 sqk = dot(kvec, kvec);
 632
 633                 if (sqk != 0.0) {
 634                     numerator = form*12.5663706/sqk;
 635                     denominator = gf_denom(snx2,sny2,snz2);
 636
 637                     sum1 = 0.0;
 638                     for (ix = -nbx; ix <= nbx; ix++) {
 639                         qx = (kper+(Scalar)(m_Nx*ix));
 640                         kn1 = b1*qx;
 641                         argx = 0.5*qx*kH.x;
 642                         wx = pow(sinc(argx),m_order);
 643                         for (iy = -nby; iy <= nby; iy++) {
 644                             qy = (lper+(Scalar)(m_Ny*iy));
 645                             kn2 = b2*qy;
 646                             argy = 0.5*qy*kH.y;
 647                             wy = pow(sinc(argy),m_order);
 648                             for (iz = -nbz; iz <= nbz; iz++) {
 649                                 qz = (mper+(Scalar)(m_Nz*iz));
 650                                 kn3 = b3*qz;
 651                                 argz = 0.5*qz*kH.z;
 652                                 wz = pow(sinc(argz),m_order);
 653
 654                                 kn = kn1 + kn2 + kn3;
 655
 656                                 dot1 = dot(kn,kvec);
 657                                 dot2 = dot(kn,kn);
 658
 659                                 arg_gauss = Scalar(0.25)*dot2/m_kappa/m_kappa;
 660                                 gauss = exp(-arg_gauss);
 661
 662                                 sum1 += (dot1/dot2) * gauss* pow(Scalar(wx*wy*wz),Scalar(2.0));
 663                                 }
 664                             }
 665                         }
 666                     h_green_hat.data[m + m_Nz * (l + m_Ny * k)] = numerator*sum1/denominator;
 667                     } else h_green_hat.data[m + m_Nz * (l + m_Ny * k)] = 0.0;
 668                 }
 669             }
 670         }
 671     }
 672
 673 void PPPMForceCompute::assign_charges_to_grid()
 674     {
 675
 676     const BoxDim& box = m_pdata->getBox();
 677
 678     ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
 679     ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
 680
 681     ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::read);
 682     ArrayHandle<CUFFTCOMPLEX> h_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
 683
 684     memset(h_rho_real_space.data, 0, sizeof(CUFFTCOMPLEX)*m_Nx*m_Ny*m_Nz);
 685
 686     Scalar V_cell = box.getVolume()/(Scalar)(m_Nx*m_Ny*m_Nz);
 687
 688     for(int i = 0; i < (int)m_pdata->getN(); i++)
 689         {
 690         Scalar qi = h_charge.data[i];
 691         Scalar3 posi;
 692         posi.x = h_pos.data[i].x;
 693         posi.y = h_pos.data[i].y;
 694         posi.z = h_pos.data[i].z;
 695
 696         //normalize position to gridsize:
 697         Scalar3 pos_frac = box.makeFraction(posi);
 698         pos_frac.x *= (Scalar)m_Nx;
 699         pos_frac.y *= (Scalar)m_Ny;
 700         pos_frac.z *= (Scalar)m_Nz;
 701
 702         Scalar shift, shiftone, x0, y0, z0, dx, dy, dz;
 703         int nlower, nupper, mx, my, mz, nxi, nyi, nzi;
 704
 705         nlower = -(m_order-1)/2;
 706         nupper = m_order/2;
 707
 708         if (m_order % 2)
 709             {
 710             shift =0.5;
 711             shiftone = 0.0;
 712             }
 713         else
 714             {
 715             shift = 0.0;
 716             shiftone = 0.5;
 717             }
 718
 719         nxi = (int)(pos_frac.x + shift);
 720         nyi = (int)(pos_frac.y + shift);
 721         nzi = (int)(pos_frac.z + shift);
 722
 723         dx = shiftone+(Scalar)nxi-pos_frac.x;
 724         dy = shiftone+(Scalar)nyi-pos_frac.y;
 725         dz = shiftone+(Scalar)nzi-pos_frac.z;
 726
 727         int n,m,l,k;
 728         Scalar result;
 729         int mult_fact = 2*m_order+1;
 730
 731         x0 = qi / V_cell;
 732         for (n = nlower; n <= nupper; n++) {
 733             mx = n+nxi;
 734             if(mx >= m_Nx) mx -= m_Nx;
 735             if(mx < 0)  mx += m_Nx;
 736             result = Scalar(0.0);
 737             for (k = m_order-1; k >= 0; k--) {
 738                 result = h_rho_coeff.data[n-nlower + k*mult_fact] + result * dx;
 739                 }
 740             y0 = x0*result;
 741             for (m = nlower; m <= nupper; m++) {
 742                 my = m+nyi;
 743                 if(my >= m_Ny) my -= m_Ny;
 744                 if(my < 0)  my += m_Ny;
 745                 result = Scalar(0.0);
 746                 for (k = m_order-1; k >= 0; k--) {
 747                     result = h_rho_coeff.data[m-nlower + k*mult_fact] + result * dy;
 748                     }
 749                 z0 = y0*result;
 750                 for (l = nlower; l <= nupper; l++) {
 751                     mz = l+nzi;
 752                     if(mz >= m_Nz) mz -= m_Nz;
 753                     if(mz < 0)  mz += m_Nz;
 754                     result = Scalar(0.0);
 755                     for (k = m_order-1; k >= 0; k--) {
 756                         result = h_rho_coeff.data[l-nlower + k*mult_fact] + result * dz;
 757                         }
 758                     h_rho_real_space.data[mz + m_Nz * (my + m_Ny * mx)].x += z0*result;
 759                     }
 760                 }
 761             }
 762         }
 763
 764     }
 765
 766 void PPPMForceCompute::combined_green_e()
 767     {
 768
 769     ArrayHandle<Scalar3> h_kvec(m_kvec, access_location::host, access_mode::readwrite);
 770     ArrayHandle<Scalar> h_green_hat(m_green_hat, access_location::host, access_mode::readwrite);
 771     ArrayHandle<CUFFTCOMPLEX> h_Ex(m_Ex, access_location::host, access_mode::readwrite);
 772     ArrayHandle<CUFFTCOMPLEX> h_Ey(m_Ey, access_location::host, access_mode::readwrite);
 773     ArrayHandle<CUFFTCOMPLEX> h_Ez(m_Ez, access_location::host, access_mode::readwrite);
 774     ArrayHandle<CUFFTCOMPLEX> h_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
 775
 776     unsigned int NNN = m_Nx*m_Ny*m_Nz;
 777     for(unsigned int i = 0; i < NNN; i++)
 778         {
 779
 780         CUFFTCOMPLEX rho_local = h_rho_real_space.data[i];
 781         Scalar scale_times_green = h_green_hat.data[i] / ((Scalar)(NNN));
 782         rho_local.x *= scale_times_green;
 783         rho_local.y *= scale_times_green;
 784
 785         h_Ex.data[i].x = h_kvec.data[i].x * rho_local.y;
 786         h_Ex.data[i].y = -h_kvec.data[i].x * rho_local.x;
 787
 788         h_Ey.data[i].x = h_kvec.data[i].y * rho_local.y;
 789         h_Ey.data[i].y = -h_kvec.data[i].y * rho_local.x;
 790
 791         h_Ez.data[i].x = h_kvec.data[i].z * rho_local.y;
 792         h_Ez.data[i].y = -h_kvec.data[i].z * rho_local.x;
 793         }
 794     }
 795
 796 void PPPMForceCompute::calculate_forces()
 797     {
 798     const BoxDim& box = m_pdata->getBox();
 799
 800     ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
 801     ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
 802
 803     ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::overwrite);
 804     ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::overwrite);
 805
 806     // there are enough other checks on the input data: but it doesn't hurt to be safe
 807     assert(h_force.data);
 808     assert(h_virial.data);
 809
 810     // Zero data for force calculation.
 811     memset((void*)h_force.data,0,sizeof(Scalar4)*m_force.getNumElements());
 812     memset((void*)h_virial.data,0,sizeof(Scalar)*m_virial.getNumElements());
 813
 814     ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::read);
 815     ArrayHandle<CUFFTCOMPLEX> h_Ex(m_Ex, access_location::host, access_mode::readwrite);
 816     ArrayHandle<CUFFTCOMPLEX> h_Ey(m_Ey, access_location::host, access_mode::readwrite);
 817     ArrayHandle<CUFFTCOMPLEX> h_Ez(m_Ez, access_location::host, access_mode::readwrite);
 818
 819     for(int i = 0; i < (int)m_pdata->getN(); i++)
 820         {
 821         Scalar qi = h_charge.data[i];
 822         Scalar3 posi;
 823         posi.x = h_pos.data[i].x;
 824         posi.y = h_pos.data[i].y;
 825         posi.z = h_pos.data[i].z;
 826
 827         //normalize position to gridsize:
 828         Scalar3 pos_frac = box.makeFraction(posi);
 829         pos_frac.x *= (Scalar)m_Nx;
 830         pos_frac.y *= (Scalar)m_Ny;
 831         pos_frac.z *= (Scalar)m_Nz;
 832
 833         Scalar shift, shiftone, x0, y0, z0, dx, dy, dz;
 834         int nlower, nupper, mx, my, mz, nxi, nyi, nzi;
 835
 836         nlower = -(m_order-1)/2;
 837         nupper = m_order/2;
 838
 839         if (m_order % 2)
 840             {
 841             shift =0.5;
 842             shiftone = 0.0;
 843             }
 844         else
 845             {
 846             shift = 0.0;
 847             shiftone = 0.5;
 848             }
 849
 850         nxi = (int)(pos_frac.x + shift);
 851         nyi = (int)(pos_frac.y + shift);
 852         nzi = (int)(pos_frac.z + shift);
 853
 854         dx = shiftone+(Scalar)nxi-pos_frac.x;
 855         dy = shiftone+(Scalar)nyi-pos_frac.y;
 856         dz = shiftone+(Scalar)nzi-pos_frac.z;
 857
 858         int n,m,l,k;
 859         Scalar result;
 860         int mult_fact = 2*m_order+1;
 861         for (n = nlower; n <= nupper; n++) {
 862             mx = n+nxi;
 863             if(mx >= m_Nx) mx -= m_Nx;
 864             if(mx < 0)  mx += m_Nx;
 865             result = Scalar(0.0);
 866             for (k = m_order-1; k >= 0; k--) {
 867                 result = h_rho_coeff.data[n-nlower + k*mult_fact] + result * dx;
 868                 }
 869             x0 = result;
 870             for (m = nlower; m <= nupper; m++) {
 871                 my = m+nyi;
 872                 if(my >= m_Ny) my -= m_Ny;
 873                 if(my < 0)  my += m_Ny;
 874                 result = Scalar(0.0);
 875                 for (k = m_order-1; k >= 0; k--) {
 876                     result = h_rho_coeff.data[m-nlower + k*mult_fact] + result * dy;
 877                     }
 878                 y0 = x0*result;
 879                 for (l = nlower; l <= nupper; l++) {
 880                     mz = l+nzi;
 881                     if(mz >= m_Nz) mz -= m_Nz;
 882                     if(mz < 0)  mz += m_Nz;
 883                     result = Scalar(0.0);
 884                     for (k = m_order-1; k >= 0; k--) {
 885                         result = h_rho_coeff.data[l-nlower + k*mult_fact] + result * dz;
 886                         }
 887                     z0 = y0*result;
 888                     Scalar local_field_x = h_Ex.data[mz + m_Nz * (my + m_Ny * mx)].x;
 889                     Scalar local_field_y = h_Ey.data[mz + m_Nz * (my + m_Ny * mx)].x;
 890                     Scalar local_field_z = h_Ez.data[mz + m_Nz * (my + m_Ny * mx)].x;
 891                     h_force.data[i].x += qi*z0*local_field_x;
 892                     h_force.data[i].y += qi*z0*local_field_y;
 893                     h_force.data[i].z += qi*z0*local_field_z;
 894                     }
 895                 }
 896             }
 897         }
 898
 899     }
 900
 901 void PPPMForceCompute::fix_exclusions_cpu()
 902     {
 903     unsigned int group_size = m_group->getNumMembers();
 904     // just drop out if the group is an empty group
 905     if (group_size == 0)
 906         return;
 907
 908     ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::readwrite);
 909     ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::readwrite);
 910     unsigned int virial_pitch = m_virial.getPitch();
 911
 912     // there are enough other checks on the input data: but it doesn't hurt to be safe
 913     assert(h_force.data);
 914
 915     ArrayHandle< unsigned int > d_group_members(m_group->getIndexArray(), access_location::host, access_mode::read);
 916     const BoxDim& box = m_pdata->getBox();
 917     ArrayHandle<unsigned int> d_exlist(m_nlist->getExListArray(), access_location::host, access_mode::read);
 918     ArrayHandle<unsigned int> d_n_ex(m_nlist->getNExArray(), access_location::host, access_mode::read);
 919     Index2D nex = m_nlist->getExListIndexer();
 920
 921     ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
 922     ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
 923
 924     for(unsigned int i = 0; i < group_size; i++)
 925         {
 926         Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
 927         Scalar virial[6];
 928         for (unsigned int k = 0; k < 6; k++)
 929             virial[k] = Scalar(0.0);
 930         unsigned int idx = d_group_members.data[i];
 931         Scalar3 posi;
 932         posi.x = h_pos.data[idx].x;
 933         posi.y = h_pos.data[idx].y;
 934         posi.z = h_pos.data[idx].z;
 935         Scalar qi = h_charge.data[idx];
 936
 937         unsigned int n_neigh = d_n_ex.data[idx];
 938         const Scalar sqrtpi = sqrtf(M_PI);
 939         unsigned int cur_j = 0;
 940
 941         for (unsigned int neigh_idx = 0; neigh_idx < n_neigh; neigh_idx++)
 942             {
 943             cur_j = d_exlist.data[nex(idx, neigh_idx)];
 944            // get the neighbor's position
 945             Scalar3 posj;
 946             posj.x = h_pos.data[cur_j].x;
 947             posj.y = h_pos.data[cur_j].y;
 948             posj.z = h_pos.data[cur_j].z;
 949             Scalar qj = h_charge.data[cur_j];
 950             Scalar3 dx = posi - posj;
 951
 952             // apply periodic boundary conditions:
 953             dx = box.minImage(dx);
 954
 955             Scalar rsq = dot(dx, dx);
 956             Scalar r = sqrtf(rsq);
 957             Scalar qiqj = qi * qj;
 958             Scalar erffac = erf(m_kappa * r) / r;
 959             Scalar force_divr = qiqj * (-Scalar(2.0) * exp(-rsq * m_kappa * m_kappa) * m_kappa / (sqrtpi * rsq) + erffac / rsq);
 960             Scalar pair_eng = qiqj * erffac;
 961             virial[0]+= Scalar(0.5) * dx.x * dx.x * force_divr;
 962             virial[1]+= Scalar(0.5) * dx.y * dx.x * force_divr;
 963             virial[2]+= Scalar(0.5) * dx.z * dx.x * force_divr;
 964             virial[3]+= Scalar(0.5) * dx.y * dx.y * force_divr;
 965             virial[4]+= Scalar(0.5) * dx.z * dx.y * force_divr;
 966             virial[5]+= Scalar(0.5) * dx.z * dx.z * force_divr;
 967             force.x += dx.x * force_divr;
 968             force.y += dx.y * force_divr;
 969             force.z += dx.z * force_divr;
 970             force.w += pair_eng;
 971             }
 972         force.w *= Scalar(0.5);
 973         h_force.data[idx].x -= force.x;
 974         h_force.data[idx].y -= force.y;
 975         h_force.data[idx].z -= force.z;
 976         h_force.data[idx].w = -force.w;
 977         for (unsigned int k = 0; k < 6; k++)
 978             h_virial.data[k*virial_pitch+idx] = -virial[k];
 979         }
 980
 981     }
 982
 983 /*! Computes the additional energy and virial contributed by PPPM
 984     \note The additional terms are simply added onto particle 0 so that they will be accounted for by
 985     ComputeThermo
 986 */
 987 void PPPMForceCompute::fix_thermo_quantities()
 988     {
 989     // access data arrays
 990     BoxDim box = m_pdata->getBox();
 991     Scalar3 L = box.getL();
 992
 993     ArrayHandle<CUFFTCOMPLEX> d_rho_real_space(m_rho_real_space, access_location::host, access_mode::readwrite);
 994     ArrayHandle<Scalar> d_green_hat(m_green_hat, access_location::host, access_mode::readwrite);
 995     ArrayHandle<Scalar> d_vg(m_vg, access_location::host, access_mode::readwrite);
 996     Scalar2 pppm_virial_energy = make_scalar2(0.0, 0.0);
 997
 998     Scalar v_xx=0.0, v_xy=0.0, v_xz=0.0, v_yy=0.0, v_yz=0.0, v_zz=0.0;
 999
1000
1001
1002     // compute the correction
1003     for (int i = 0; i < m_Nx*m_Ny*m_Nz; i++)
1004         {
1005         Scalar energy = d_green_hat.data[i]*(d_rho_real_space.data[i].x*d_rho_real_space.data[i].x +
1006                                              d_rho_real_space.data[i].y*d_rho_real_space.data[i].y);
1007         Scalar pressure = energy*(d_vg.data[0+6*i] + d_vg.data[3+6*i] + d_vg.data[5+6*i]);
1008         v_xx += d_vg.data[0+6*i]*energy;
1009         v_xy += d_vg.data[1+6*i]*energy;
1010         v_xz += d_vg.data[2+6*i]*energy;
1011         v_yy += d_vg.data[3+6*i]*energy;
1012         v_yz += d_vg.data[4+6*i]*energy;
1013         v_zz += d_vg.data[5+6*i]*energy;
1014         pppm_virial_energy.x += pressure;
1015         pppm_virial_energy.y += energy;
1016         }
1017
1018     pppm_virial_energy.x *= m_energy_virial_factor/ (Scalar(3.0) * L.x * L.y * L.z);
1019     pppm_virial_energy.y *= m_energy_virial_factor;
1020     pppm_virial_energy.y -= m_q2 * m_kappa / Scalar(1.772453850905516027298168);
1021     pppm_virial_energy.y -= 0.5*M_PI*m_q*m_q / (m_kappa*m_kappa* L.x * L.y * L.z);
1022
1023     // apply the correction to particle 0
1024     ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::readwrite);
1025     ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::readwrite);
1026     h_force.data[0].w += pppm_virial_energy.y;
1027
1028
1029     // Compute full virial tensor
1030     unsigned int virial_pitch = m_virial.getPitch();
1031     h_virial.data[0*virial_pitch+0] += v_xx*m_energy_virial_factor;
1032     h_virial.data[1*virial_pitch+0] += v_xy*m_energy_virial_factor;
1033     h_virial.data[2*virial_pitch+0] += v_xz*m_energy_virial_factor;
1034     h_virial.data[3*virial_pitch+0] += v_yy*m_energy_virial_factor;
1035     h_virial.data[4*virial_pitch+0] += v_yz*m_energy_virial_factor;
1036     h_virial.data[5*virial_pitch+0] += v_zz*m_energy_virial_factor;
1037     }
1038
1039 void export_PPPMForceCompute()
1040     {
1041     class_<PPPMForceCompute, boost::shared_ptr<PPPMForceCompute>, bases<ForceCompute>, boost::noncopyable >
1042         ("PPPMForceCompute", init< boost::shared_ptr<SystemDefinition>,
1043          boost::shared_ptr<NeighborList>,
1044          boost::shared_ptr<ParticleGroup> >())
1045         .def("setParams", &PPPMForceCompute::setParams)
1046         ;
1047     }