2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 #pragma warning( push )
54 #pragma warning( disable : 4244 )
57 #include <boost/python.hpp>
58 #include <boost/bind.hpp>
60 #include "PPPMForceCompute.h"
67 using namespace boost
;
68 using namespace boost::python
;
71 //! Coefficients of a power expansion of sin(x)/x
72 const Scalar cpu_sinc_coeff
[] = {Scalar(1.0), Scalar(-1.0/6.0), Scalar(1.0/120.0),
73 Scalar(-1.0/5040.0),Scalar(1.0/362880.0),
74 Scalar(-1.0/39916800.0)};
77 /*! \file PPPMForceCompute.cc
78 \brief Contains code for the PPPMForceCompute class
81 /*! \param sysdef System to compute forces on
82 \param nlist Neighbor list
83 \param group Particle group
84 \post Memory is allocated, and forces are zeroed.
86 PPPMForceCompute::PPPMForceCompute(boost::shared_ptr
<SystemDefinition
> sysdef
,
87 boost::shared_ptr
<NeighborList
> nlist
,
88 boost::shared_ptr
<ParticleGroup
> group
)
89 : ForceCompute(sysdef
), m_params_set(false), m_nlist(nlist
), m_group(group
),
90 fft_in(NULL
), fft_ex(NULL
), fft_ey(NULL
), fft_ez(NULL
)
92 m_exec_conf
->msg
->notice(5) << "Constructing PPPMForceCompute" << endl
;
97 m_box_changed
= false;
98 m_boxchange_connection
= m_pdata
->connectBoxChange(bind(&PPPMForceCompute::slotBoxChanged
, this));
101 PPPMForceCompute::~PPPMForceCompute()
103 m_exec_conf
->msg
->notice(5) << "Destroying PPPMForceCompute" << endl
;
114 m_boxchange_connection
.disconnect();
117 /*! \param Nx Number of grid points in x direction
118 \param Ny Number of grid points in y direction
119 \param Nz Number of grid points in z direction
120 \param order Number of grid points in each direction to assign charges to
121 \param kappa Screening parameter in erfc
122 \param rcut Short-ranged cutoff, used for computing the relative force error
124 Sets parameters for the long-ranged part of the electrostatics calculation
126 void PPPMForceCompute::setParams(int Nx
, int Ny
, int Nz
, int order
, Scalar kappa
, Scalar rcut
)
137 if(!(m_Nx
== 2)&& !(m_Nx
== 4)&& !(m_Nx
== 8)&& !(m_Nx
== 16)&& !(m_Nx
== 32)&& !(m_Nx
== 64)&& !(m_Nx
== 128)&& !(m_Nx
== 256)&& !(m_Nx
== 512)&& !(m_Nx
== 1024))
139 m_exec_conf
->msg
->warning() << "charge.pppm: PPPM X gridsize should be a power of 2 for the best performance" << endl
;
141 if(!(m_Ny
== 2)&& !(m_Ny
== 4)&& !(m_Ny
== 8)&& !(m_Ny
== 16)&& !(m_Ny
== 32)&& !(m_Ny
== 64)&& !(m_Ny
== 128)&& !(m_Ny
== 256)&& !(m_Ny
== 512)&& !(m_Ny
== 1024))
143 m_exec_conf
->msg
->warning() << "charge.pppm: PPPM Y gridsize should be a power of 2 for the best performance" << endl
;
145 if(!(m_Nz
== 2)&& !(m_Nz
== 4)&& !(m_Nz
== 8)&& !(m_Nz
== 16)&& !(m_Nz
== 32)&& !(m_Nz
== 64)&& !(m_Nz
== 128)&& !(m_Nz
== 256)&& !(m_Nz
== 512)&& !(m_Nz
== 1024))
147 m_exec_conf
->msg
->warning() << "charge.pppm: PPPM Z gridsize should be a power of 2 for the best performance" << endl
;
149 if (m_order
* (2*m_order
+1) > CONSTANT_SIZE
)
151 m_exec_conf
->msg
->error() << "charge.pppm: interpolation order too high, doesn't fit into constant array" << endl
;
152 throw std::runtime_error("Error initializing PPPMForceCompute");
154 if (m_order
> MaxOrder
)
156 m_exec_conf
->msg
->error() << "charge.pppm: interpolation order too high, max is " << MaxOrder
<< endl
;
157 throw std::runtime_error("Error initializing PPPMForceCompute");
160 GPUArray
<CUFFTCOMPLEX
> n_rho_real_space(Nx
*Ny
*Nz
, exec_conf
);
161 m_rho_real_space
.swap(n_rho_real_space
);
162 GPUArray
<Scalar
> n_green_hat(Nx
*Ny
*Nz
, exec_conf
);
163 m_green_hat
.swap(n_green_hat
);
165 GPUArray
<Scalar
> n_vg(6*Nx
*Ny
*Nz
, exec_conf
);
169 GPUArray
<Scalar3
> n_kvec(Nx
*Ny
*Nz
, exec_conf
);
171 GPUArray
<CUFFTCOMPLEX
> n_Ex(Nx
*Ny
*Nz
, exec_conf
);
173 GPUArray
<CUFFTCOMPLEX
> n_Ey(Nx
*Ny
*Nz
, exec_conf
);
175 GPUArray
<CUFFTCOMPLEX
> n_Ez(Nx
*Ny
*Nz
, exec_conf
);
177 GPUArray
<Scalar
> n_gf_b(order
, exec_conf
);
179 GPUArray
<Scalar
> n_rho_coeff(order
*(2*order
+1), exec_conf
);
180 m_rho_coeff
.swap(n_rho_coeff
);
181 GPUArray
<Scalar3
> n_field(Nx
*Ny
*Nz
, exec_conf
);
182 m_field
.swap(n_field
);
183 const BoxDim
& box
= m_pdata
->getBox();
184 ArrayHandle
<Scalar
> h_charge(m_pdata
->getCharges(), access_location::host
, access_mode::read
);
189 for(int i
= 0; i
< (int)m_pdata
->getN(); i
++) {
190 m_q
+= h_charge
.data
[i
];
191 m_q2
+= h_charge
.data
[i
]*h_charge
.data
[i
];
194 m_exec_conf
->msg
->warning() << "charge.pppm: system in not neutral, the net charge is " << m_q
<< endl
;
196 // compute RMS force error
197 Scalar3 L
= box
.getL();
198 Scalar hx
= L
.x
/(Scalar
)Nx
;
199 Scalar hy
= L
.y
/(Scalar
)Ny
;
200 Scalar hz
= L
.z
/(Scalar
)Nz
;
201 Scalar lprx
= PPPMForceCompute::rms(hx
, L
.x
, (int)m_pdata
->getN());
202 Scalar lpry
= PPPMForceCompute::rms(hy
, L
.y
, (int)m_pdata
->getN());
203 Scalar lprz
= PPPMForceCompute::rms(hz
, L
.z
, (int)m_pdata
->getN());
204 Scalar lpr
= sqrt(lprx
*lprx
+ lpry
*lpry
+ lprz
*lprz
) / sqrt(3.0);
205 Scalar spr
= 2.0*m_q2
*exp(-m_kappa
*m_kappa
*m_rcut
*m_rcut
) / sqrt((int)m_pdata
->getN()*m_rcut
*L
.x
*L
.y
*L
.z
);
207 double RMS_error
= MAX(lpr
,spr
);
208 if(RMS_error
> 0.1) {
209 printf("!!!!!!!\n!!!!!!!\n!!!!!!!\nWARNING RMS error of %g is probably too high %f %f\n!!!!!!!\n!!!!!!!\n!!!!!!!\n", RMS_error
, lpr
, spr
);
212 printf("Notice: PPPM RMS error: %g\n", RMS_error
);
215 PPPMForceCompute::compute_rho_coeff();
217 // compute k vectors, virial contribution and Green's function
218 PPPMForceCompute::reset_kvec_green_hat_cpu();
220 Scalar scale
= 1.0f
/((Scalar
)(Nx
* Ny
* Nz
));
221 m_energy_virial_factor
= 0.5 * L
.x
* L
.y
* L
.z
* scale
* scale
;
224 std::vector
< std::string
> PPPMForceCompute::getProvidedLogQuantities()
227 list
.push_back("pppm_energy");
231 /*! \param quantity Name of the quantity to get the log value of
232 \param timestep Current time step of the simulation
234 Scalar
PPPMForceCompute::getLogValue(const std::string
& quantity
, unsigned int timestep
)
236 if (quantity
== string("pppm_energy"))
239 Scalar energy
= calcEnergySum();
244 m_exec_conf
->msg
->error() << "charge.pppm: " << quantity
<< " is not a valid log quantity" << endl
;
245 throw runtime_error("Error getting log value");
249 /*! Actually perform the force computation
250 \param timestep Current time step
253 void PPPMForceCompute::computeForces(unsigned int timestep
)
257 m_exec_conf
->msg
->error() << "charge.pppm: setParams must be called prior to computeForces()" << endl
;
258 throw std::runtime_error("Error computing forces in PPPMForceCompute");
261 // start by updating the neighborlist
262 m_nlist
->compute(timestep
);
264 // start the profile for this compute
265 if (m_prof
) m_prof
->push("PPPM force");
274 fft_in
= (kiss_fft_cpx
*)malloc(m_Nx
*m_Ny
*m_Nz
*sizeof(kiss_fft_cpx
));
275 fft_ex
= (kiss_fft_cpx
*)malloc(m_Nx
*m_Ny
*m_Nz
*sizeof(kiss_fft_cpx
));
276 fft_ey
= (kiss_fft_cpx
*)malloc(m_Nx
*m_Ny
*m_Nz
*sizeof(kiss_fft_cpx
));
277 fft_ez
= (kiss_fft_cpx
*)malloc(m_Nx
*m_Ny
*m_Nz
*sizeof(kiss_fft_cpx
));
279 fft_forward
= kiss_fftnd_alloc(dim
, 3, 0, NULL
, NULL
);
280 fft_inverse
= kiss_fftnd_alloc(dim
, 3, 1, NULL
, NULL
);
285 const BoxDim
& box
= m_pdata
->getBox();
286 Scalar3 L
= box
.getL();
287 PPPMForceCompute::reset_kvec_green_hat_cpu();
288 Scalar scale
= Scalar(1.0)/((Scalar
)(m_Nx
* m_Ny
* m_Nz
));
289 m_energy_virial_factor
= 0.5 * L
.x
* L
.y
* L
.z
* scale
* scale
;
290 m_box_changed
= false;
293 PPPMForceCompute::assign_charges_to_grid();
297 { // scoping array handles
298 ArrayHandle
<CUFFTCOMPLEX
> h_rho_real_space(m_rho_real_space
, access_location::host
, access_mode::readwrite
);
299 for(int i
= 0; i
< m_Nx
* m_Ny
* m_Nz
; i
++) {
300 fft_in
[i
].r
= (Scalar
) h_rho_real_space
.data
[i
].x
;
301 fft_in
[i
].i
= (Scalar
)0.0;
304 kiss_fftnd(fft_forward
, &fft_in
[0], &fft_in
[0]);
306 for(int i
= 0; i
< m_Nx
* m_Ny
* m_Nz
; i
++) {
307 h_rho_real_space
.data
[i
].x
= fft_in
[i
].r
;
308 h_rho_real_space
.data
[i
].y
= fft_in
[i
].i
;
313 PPPMForceCompute::combined_green_e();
317 { // scoping array handles
318 ArrayHandle
<CUFFTCOMPLEX
> h_Ex(m_Ex
, access_location::host
, access_mode::readwrite
);
319 ArrayHandle
<CUFFTCOMPLEX
> h_Ey(m_Ey
, access_location::host
, access_mode::readwrite
);
320 ArrayHandle
<CUFFTCOMPLEX
> h_Ez(m_Ez
, access_location::host
, access_mode::readwrite
);
322 for(int i
= 0; i
< m_Nx
* m_Ny
* m_Nz
; i
++)
324 fft_ex
[i
].r
= (Scalar
) h_Ex
.data
[i
].x
;
325 fft_ex
[i
].i
= (Scalar
) h_Ex
.data
[i
].y
;
327 fft_ey
[i
].r
= (Scalar
) h_Ey
.data
[i
].x
;
328 fft_ey
[i
].i
= (Scalar
) h_Ey
.data
[i
].y
;
330 fft_ez
[i
].r
= (Scalar
) h_Ez
.data
[i
].x
;
331 fft_ez
[i
].i
= (Scalar
) h_Ez
.data
[i
].y
;
335 kiss_fftnd(fft_inverse
, &fft_ex
[0], &fft_ex
[0]);
336 kiss_fftnd(fft_inverse
, &fft_ey
[0], &fft_ey
[0]);
337 kiss_fftnd(fft_inverse
, &fft_ez
[0], &fft_ez
[0]);
339 for(int i
= 0; i
< m_Nx
* m_Ny
* m_Nz
; i
++)
341 h_Ex
.data
[i
].x
= fft_ex
[i
].r
;
342 h_Ex
.data
[i
].y
= fft_ex
[i
].i
;
344 h_Ey
.data
[i
].x
= fft_ey
[i
].r
;
345 h_Ey
.data
[i
].y
= fft_ey
[i
].i
;
347 h_Ez
.data
[i
].x
= fft_ez
[i
].r
;
348 h_Ez
.data
[i
].y
= fft_ez
[i
].i
;
352 PPPMForceCompute::calculate_forces();
354 // If there are exclusions, correct for the long-range part of the potential
355 if( m_nlist
->getExclusionsSet())
357 PPPMForceCompute::fix_exclusions_cpu();
360 // access flags and correct energy and virial if needed
361 PDataFlags flags
= this->m_pdata
->getFlags();
362 if (flags
[pdata_flag::potential_energy
] || flags
[pdata_flag::pressure_tensor
] || flags
[pdata_flag::isotropic_virial
])
364 fix_thermo_quantities();
367 if (m_prof
) m_prof
->pop();
370 Scalar
PPPMForceCompute::rms(Scalar h
, Scalar prd
, Scalar natoms
)
376 acons
[1][0] = 2.0 / 3.0;
377 acons
[2][0] = 1.0 / 50.0;
378 acons
[2][1] = 5.0 / 294.0;
379 acons
[3][0] = 1.0 / 588.0;
380 acons
[3][1] = 7.0 / 1440.0;
381 acons
[3][2] = 21.0 / 3872.0;
382 acons
[4][0] = 1.0 / 4320.0;
383 acons
[4][1] = 3.0 / 1936.0;
384 acons
[4][2] = 7601.0 / 2271360.0;
385 acons
[4][3] = 143.0 / 28800.0;
386 acons
[5][0] = 1.0 / 23232.0;
387 acons
[5][1] = 7601.0 / 13628160.0;
388 acons
[5][2] = 143.0 / 69120.0;
389 acons
[5][3] = 517231.0 / 106536960.0;
390 acons
[5][4] = 106640677.0 / 11737571328.0;
391 acons
[6][0] = 691.0 / 68140800.0;
392 acons
[6][1] = 13.0 / 57600.0;
393 acons
[6][2] = 47021.0 / 35512320.0;
394 acons
[6][3] = 9694607.0 / 2095994880.0;
395 acons
[6][4] = 733191589.0 / 59609088000.0;
396 acons
[6][5] = 326190917.0 / 11700633600.0;
397 acons
[7][0] = 1.0 / 345600.0;
398 acons
[7][1] = 3617.0 / 35512320.0;
399 acons
[7][2] = 745739.0 / 838397952.0;
400 acons
[7][3] = 56399353.0 / 12773376000.0;
401 acons
[7][4] = 25091609.0 / 1560084480.0;
402 acons
[7][5] = 1755948832039.0 / 36229939200000.0;
403 acons
[7][6] = 4887769399.0 / 37838389248.0;
405 for (m
= 0; m
< m_order
; m
++)
406 sum
+= acons
[m_order
][m
] * pow(h
*m_kappa
,Scalar(2.0)*(Scalar
)m
);
407 Scalar value
= m_q2
* pow(h
*m_kappa
,(Scalar
)m_order
) *
408 sqrt(m_kappa
*prd
*sqrt(2.0*M_PI
)*sum
/natoms
) / (prd
*prd
);
413 void PPPMForceCompute::compute_rho_coeff()
418 ArrayHandle
<Scalar
> h_rho_coeff(m_rho_coeff
, access_location::host
, access_mode::readwrite
);
420 // usage: a[x][y] = a[y + x*(2*m_order+1)]
422 for(l
=0; l
<m_order
; l
++)
424 for(m
=0; m
<(2*m_order
+1); m
++)
426 a
[m
+ l
*(2*m_order
+1)] = Scalar(0.0);
430 for (k
= -m_order
; k
<= m_order
; k
++)
431 for (l
= 0; l
< m_order
; l
++) {
432 a
[(k
+m_order
) + l
* (2*m_order
+1)] = Scalar(0.0);
435 a
[m_order
+ 0 * (2*m_order
+1)] = Scalar(1.0);
436 for (j
= 1; j
< m_order
; j
++) {
437 for (k
= -j
; k
<= j
; k
+= 2) {
439 for (l
= 0; l
< j
; l
++) {
440 a
[(k
+ m_order
) + (l
+1)*(2*m_order
+1)] = (a
[(k
+1+m_order
) + l
* (2*m_order
+ 1)] - a
[(k
-1+m_order
) + l
* (2*m_order
+ 1)]) / (l
+1);
441 s
+= pow(0.5,(double) (l
+1)) * (a
[(k
-1+m_order
) + l
* (2*m_order
+ 1)] + pow(-1.0,(double) l
) * a
[(k
+1+m_order
) + l
* (2*m_order
+ 1)] ) / (double)(l
+1);
443 a
[k
+m_order
+ 0 * (2*m_order
+1)] = s
;
448 for (k
= -(m_order
-1); k
< m_order
; k
+= 2) {
449 for (l
= 0; l
< m_order
; l
++) {
450 h_rho_coeff
.data
[m
+ l
*(2*m_order
+1)] = a
[k
+m_order
+ l
* (2*m_order
+ 1)];
456 void PPPMForceCompute::compute_gf_denom()
459 ArrayHandle
<Scalar
> h_gf_b(m_gf_b
, access_location::host
, access_mode::readwrite
);
460 for (l
= 1; l
< m_order
; l
++) h_gf_b
.data
[l
] = 0.0;
461 h_gf_b
.data
[0] = 1.0;
463 for (m
= 1; m
< m_order
; m
++) {
464 for (l
= m
; l
> 0; l
--) {
465 h_gf_b
.data
[l
] = 4.0 * (h_gf_b
.data
[l
]*(l
-m
)*(l
-m
-0.5)-h_gf_b
.data
[l
-1]*(l
-m
-1)*(l
-m
-1));
467 h_gf_b
.data
[0] = 4.0 * (h_gf_b
.data
[0]*(l
-m
)*(l
-m
-0.5));
471 for (k
= 1; k
< 2*m_order
; k
++) ifact
*= k
;
472 Scalar gaminv
= 1.0/ifact
;
473 for (l
= 0; l
< m_order
; l
++) h_gf_b
.data
[l
] *= gaminv
;
476 Scalar
PPPMForceCompute::gf_denom(Scalar x
, Scalar y
, Scalar z
)
480 ArrayHandle
<Scalar
> h_gf_b(m_gf_b
, access_location::host
, access_mode::readwrite
);
482 for (l
= m_order
-1; l
>= 0; l
--) {
483 sx
= h_gf_b
.data
[l
] + sx
*x
;
484 sy
= h_gf_b
.data
[l
] + sy
*y
;
485 sz
= h_gf_b
.data
[l
] + sz
*z
;
492 //! GPU implementation of sinc(x)==sin(x)/x
493 inline Scalar
sinc(Scalar x
)
497 if (x
*x
<= Scalar(1.0))
499 Scalar term
= Scalar(1.0);
500 for (unsigned int i
= 0; i
< 6; ++i
)
502 sinc
+= cpu_sinc_coeff
[i
] * term
;
514 void PPPMForceCompute::reset_kvec_green_hat_cpu()
516 ArrayHandle
<Scalar3
> h_kvec(m_kvec
, access_location::host
, access_mode::readwrite
);
517 const BoxDim
& box
= m_pdata
->getBox();
518 Scalar3 L
= box
.getL();
520 // compute reciprocal lattice vectors
521 Scalar3 a1
= box
.getLatticeVector(0);
522 Scalar3 a2
= box
.getLatticeVector(1);
523 Scalar3 a3
= box
.getLatticeVector(2);
525 Scalar V_box
= box
.getVolume();
526 Scalar3 b1
= Scalar(2.0*M_PI
)*make_scalar3(a2
.y
*a3
.z
-a2
.z
*a3
.y
, a2
.z
*a3
.x
-a2
.x
*a3
.z
, a2
.x
*a3
.y
-a2
.y
*a3
.x
)/V_box
;
527 Scalar3 b2
= Scalar(2.0*M_PI
)*make_scalar3(a3
.y
*a1
.z
-a3
.z
*a1
.y
, a3
.z
*a1
.x
-a3
.x
*a1
.z
, a3
.x
*a1
.y
-a3
.y
*a1
.x
)/V_box
;
528 Scalar3 b3
= Scalar(2.0*M_PI
)*make_scalar3(a1
.y
*a2
.z
-a1
.z
*a2
.y
, a1
.z
*a2
.x
-a1
.x
*a2
.z
, a1
.x
*a2
.y
-a1
.y
*a2
.x
)/V_box
;
530 // Set up the k-vectors
531 int ix
, iy
, iz
, kper
, lper
, mper
, k
, l
, m
;
532 for (ix
= 0; ix
< m_Nx
; ix
++) {
534 j
.x
= ix
> m_Nx
/2 ? ix
- m_Nx
: ix
;
535 for (iy
= 0; iy
< m_Ny
; iy
++) {
536 j
.y
= iy
> m_Ny
/2 ? iy
- m_Ny
: iy
;
537 for (iz
= 0; iz
< m_Nz
; iz
++) {
538 j
.z
= iz
> m_Nz
/2 ? iz
- m_Nz
: iz
;
539 h_kvec
.data
[iz
+ m_Nz
* (iy
+ m_Ny
* ix
)] = j
.x
*b1
+j
.y
*b2
+j
.z
*b3
;
544 // Set up constants for virial calculation
545 ArrayHandle
<Scalar
> h_vg(m_vg
, access_location::host
, access_mode::readwrite
);;
546 for(int x
= 0; x
< m_Nx
; x
++)
548 for(int y
= 0; y
< m_Ny
; y
++)
550 for(int z
= 0; z
< m_Nz
; z
++)
552 Scalar3 kvec
= h_kvec
.data
[z
+ m_Nz
* (y
+ m_Ny
* x
)];
553 Scalar sqk
= kvec
.x
*kvec
.x
;
554 sqk
+= kvec
.y
*kvec
.y
;
555 sqk
+= kvec
.z
*kvec
.z
;
557 int grid_point
= z
+ m_Nz
* (y
+ m_Ny
* x
);
560 h_vg
.data
[0 + 6*grid_point
] = Scalar(0.0);
561 h_vg
.data
[1 + 6*grid_point
] = Scalar(0.0);
562 h_vg
.data
[2 + 6*grid_point
] = Scalar(0.0);
563 h_vg
.data
[3 + 6*grid_point
] = Scalar(0.0);
564 h_vg
.data
[4 + 6*grid_point
] = Scalar(0.0);
565 h_vg
.data
[5 + 6*grid_point
] = Scalar(0.0);
569 Scalar vterm
= -2.0 * (1.0/sqk
+ 0.25/(m_kappa
*m_kappa
));
570 h_vg
.data
[0 + 6*grid_point
] = 1.0 + vterm
*kvec
.x
*kvec
.x
;
571 h_vg
.data
[1 + 6*grid_point
] = vterm
*kvec
.x
*kvec
.y
;
572 h_vg
.data
[2 + 6*grid_point
] = vterm
*kvec
.x
*kvec
.z
;
573 h_vg
.data
[3 + 6*grid_point
] = 1.0 + vterm
*kvec
.y
*kvec
.y
;
574 h_vg
.data
[4 + 6*grid_point
] = vterm
*kvec
.y
*kvec
.z
;
575 h_vg
.data
[5 + 6*grid_point
] = 1.0 + vterm
*kvec
.z
*kvec
.z
;
582 // Set up the grid based Green's function
583 ArrayHandle
<Scalar
> h_green_hat(m_green_hat
, access_location::host
, access_mode::readwrite
);
584 Scalar snx
, sny
, snz
, snx2
, sny2
, snz2
;
585 Scalar argx
, argy
, argz
, wx
, wy
, wz
, qx
, qy
, qz
;
586 Scalar sum1
, dot1
, dot2
;
587 Scalar numerator
, denominator
, sqk
;
589 Scalar3 kH
= Scalar(2.0*M_PI
)*make_scalar3(Scalar(1.0)/(Scalar
)m_Nx
,
590 Scalar(1.0)/(Scalar
)m_Ny
,
591 Scalar(1.0)/(Scalar
)m_Nz
);
594 Scalar zprd_slab
= L
.z
;
598 PPPMForceCompute::compute_gf_denom();
600 Scalar temp
= floor(((m_kappa
*xprd
/(M_PI
*m_Nx
)) *
601 pow(-log(EPS_HOC
),0.25)));
604 temp
= floor(((m_kappa
*yprd
/(M_PI
*m_Ny
)) *
605 pow(-log(EPS_HOC
),0.25)));
608 temp
= floor(((m_kappa
*zprd_slab
/(M_PI
*m_Nz
)) *
609 pow(-log(EPS_HOC
),0.25)));
612 Scalar3 kvec
,kn
, kn1
, kn2
, kn3
;
613 Scalar arg_gauss
, gauss
;
615 for (m
= 0; m
< m_Nz
; m
++) {
616 mper
= m
- m_Nz
*(2*m
/m_Nz
);
617 snz
= sin(0.5*kH
.z
*mper
);
620 for (l
= 0; l
< m_Ny
; l
++) {
621 lper
= l
- m_Ny
*(2*l
/m_Ny
);
622 sny
= sin(0.5*kH
.y
*lper
);
625 for (k
= 0; k
< m_Nx
; k
++) {
626 kper
= k
- m_Nx
*(2*k
/m_Nx
);
627 snx
= sin(0.5*kH
.x
*kper
);
630 kvec
= kper
*b1
+ lper
*b2
+ mper
*b3
;
631 sqk
= dot(kvec
, kvec
);
634 numerator
= form
*12.5663706/sqk
;
635 denominator
= gf_denom(snx2
,sny2
,snz2
);
638 for (ix
= -nbx
; ix
<= nbx
; ix
++) {
639 qx
= (kper
+(Scalar
)(m_Nx
*ix
));
642 wx
= pow(sinc(argx
),m_order
);
643 for (iy
= -nby
; iy
<= nby
; iy
++) {
644 qy
= (lper
+(Scalar
)(m_Ny
*iy
));
647 wy
= pow(sinc(argy
),m_order
);
648 for (iz
= -nbz
; iz
<= nbz
; iz
++) {
649 qz
= (mper
+(Scalar
)(m_Nz
*iz
));
652 wz
= pow(sinc(argz
),m_order
);
654 kn
= kn1
+ kn2
+ kn3
;
659 arg_gauss
= Scalar(0.25)*dot2
/m_kappa
/m_kappa
;
660 gauss
= exp(-arg_gauss
);
662 sum1
+= (dot1
/dot2
) * gauss
* pow(Scalar(wx
*wy
*wz
),Scalar(2.0));
666 h_green_hat
.data
[m
+ m_Nz
* (l
+ m_Ny
* k
)] = numerator
*sum1
/denominator
;
667 } else h_green_hat
.data
[m
+ m_Nz
* (l
+ m_Ny
* k
)] = 0.0;
673 void PPPMForceCompute::assign_charges_to_grid()
676 const BoxDim
& box
= m_pdata
->getBox();
678 ArrayHandle
<Scalar4
> h_pos(m_pdata
->getPositions(), access_location::host
, access_mode::read
);
679 ArrayHandle
<Scalar
> h_charge(m_pdata
->getCharges(), access_location::host
, access_mode::read
);
681 ArrayHandle
<Scalar
> h_rho_coeff(m_rho_coeff
, access_location::host
, access_mode::read
);
682 ArrayHandle
<CUFFTCOMPLEX
> h_rho_real_space(m_rho_real_space
, access_location::host
, access_mode::readwrite
);
684 memset(h_rho_real_space
.data
, 0, sizeof(CUFFTCOMPLEX
)*m_Nx
*m_Ny
*m_Nz
);
686 Scalar V_cell
= box
.getVolume()/(Scalar
)(m_Nx
*m_Ny
*m_Nz
);
688 for(int i
= 0; i
< (int)m_pdata
->getN(); i
++)
690 Scalar qi
= h_charge
.data
[i
];
692 posi
.x
= h_pos
.data
[i
].x
;
693 posi
.y
= h_pos
.data
[i
].y
;
694 posi
.z
= h_pos
.data
[i
].z
;
696 //normalize position to gridsize:
697 Scalar3 pos_frac
= box
.makeFraction(posi
);
698 pos_frac
.x
*= (Scalar
)m_Nx
;
699 pos_frac
.y
*= (Scalar
)m_Ny
;
700 pos_frac
.z
*= (Scalar
)m_Nz
;
702 Scalar shift
, shiftone
, x0
, y0
, z0
, dx
, dy
, dz
;
703 int nlower
, nupper
, mx
, my
, mz
, nxi
, nyi
, nzi
;
705 nlower
= -(m_order
-1)/2;
719 nxi
= (int)(pos_frac
.x
+ shift
);
720 nyi
= (int)(pos_frac
.y
+ shift
);
721 nzi
= (int)(pos_frac
.z
+ shift
);
723 dx
= shiftone
+(Scalar
)nxi
-pos_frac
.x
;
724 dy
= shiftone
+(Scalar
)nyi
-pos_frac
.y
;
725 dz
= shiftone
+(Scalar
)nzi
-pos_frac
.z
;
729 int mult_fact
= 2*m_order
+1;
732 for (n
= nlower
; n
<= nupper
; n
++) {
734 if(mx
>= m_Nx
) mx
-= m_Nx
;
735 if(mx
< 0) mx
+= m_Nx
;
736 result
= Scalar(0.0);
737 for (k
= m_order
-1; k
>= 0; k
--) {
738 result
= h_rho_coeff
.data
[n
-nlower
+ k
*mult_fact
] + result
* dx
;
741 for (m
= nlower
; m
<= nupper
; m
++) {
743 if(my
>= m_Ny
) my
-= m_Ny
;
744 if(my
< 0) my
+= m_Ny
;
745 result
= Scalar(0.0);
746 for (k
= m_order
-1; k
>= 0; k
--) {
747 result
= h_rho_coeff
.data
[m
-nlower
+ k
*mult_fact
] + result
* dy
;
750 for (l
= nlower
; l
<= nupper
; l
++) {
752 if(mz
>= m_Nz
) mz
-= m_Nz
;
753 if(mz
< 0) mz
+= m_Nz
;
754 result
= Scalar(0.0);
755 for (k
= m_order
-1; k
>= 0; k
--) {
756 result
= h_rho_coeff
.data
[l
-nlower
+ k
*mult_fact
] + result
* dz
;
758 h_rho_real_space
.data
[mz
+ m_Nz
* (my
+ m_Ny
* mx
)].x
+= z0
*result
;
766 void PPPMForceCompute::combined_green_e()
769 ArrayHandle
<Scalar3
> h_kvec(m_kvec
, access_location::host
, access_mode::readwrite
);
770 ArrayHandle
<Scalar
> h_green_hat(m_green_hat
, access_location::host
, access_mode::readwrite
);
771 ArrayHandle
<CUFFTCOMPLEX
> h_Ex(m_Ex
, access_location::host
, access_mode::readwrite
);
772 ArrayHandle
<CUFFTCOMPLEX
> h_Ey(m_Ey
, access_location::host
, access_mode::readwrite
);
773 ArrayHandle
<CUFFTCOMPLEX
> h_Ez(m_Ez
, access_location::host
, access_mode::readwrite
);
774 ArrayHandle
<CUFFTCOMPLEX
> h_rho_real_space(m_rho_real_space
, access_location::host
, access_mode::readwrite
);
776 unsigned int NNN
= m_Nx
*m_Ny
*m_Nz
;
777 for(unsigned int i
= 0; i
< NNN
; i
++)
780 CUFFTCOMPLEX rho_local
= h_rho_real_space
.data
[i
];
781 Scalar scale_times_green
= h_green_hat
.data
[i
] / ((Scalar
)(NNN
));
782 rho_local
.x
*= scale_times_green
;
783 rho_local
.y
*= scale_times_green
;
785 h_Ex
.data
[i
].x
= h_kvec
.data
[i
].x
* rho_local
.y
;
786 h_Ex
.data
[i
].y
= -h_kvec
.data
[i
].x
* rho_local
.x
;
788 h_Ey
.data
[i
].x
= h_kvec
.data
[i
].y
* rho_local
.y
;
789 h_Ey
.data
[i
].y
= -h_kvec
.data
[i
].y
* rho_local
.x
;
791 h_Ez
.data
[i
].x
= h_kvec
.data
[i
].z
* rho_local
.y
;
792 h_Ez
.data
[i
].y
= -h_kvec
.data
[i
].z
* rho_local
.x
;
796 void PPPMForceCompute::calculate_forces()
798 const BoxDim
& box
= m_pdata
->getBox();
800 ArrayHandle
<Scalar4
> h_pos(m_pdata
->getPositions(), access_location::host
, access_mode::read
);
801 ArrayHandle
<Scalar
> h_charge(m_pdata
->getCharges(), access_location::host
, access_mode::read
);
803 ArrayHandle
<Scalar4
> h_force(m_force
,access_location::host
, access_mode::overwrite
);
804 ArrayHandle
<Scalar
> h_virial(m_virial
,access_location::host
, access_mode::overwrite
);
806 // there are enough other checks on the input data: but it doesn't hurt to be safe
807 assert(h_force
.data
);
808 assert(h_virial
.data
);
810 // Zero data for force calculation.
811 memset((void*)h_force
.data
,0,sizeof(Scalar4
)*m_force
.getNumElements());
812 memset((void*)h_virial
.data
,0,sizeof(Scalar
)*m_virial
.getNumElements());
814 ArrayHandle
<Scalar
> h_rho_coeff(m_rho_coeff
, access_location::host
, access_mode::read
);
815 ArrayHandle
<CUFFTCOMPLEX
> h_Ex(m_Ex
, access_location::host
, access_mode::readwrite
);
816 ArrayHandle
<CUFFTCOMPLEX
> h_Ey(m_Ey
, access_location::host
, access_mode::readwrite
);
817 ArrayHandle
<CUFFTCOMPLEX
> h_Ez(m_Ez
, access_location::host
, access_mode::readwrite
);
819 for(int i
= 0; i
< (int)m_pdata
->getN(); i
++)
821 Scalar qi
= h_charge
.data
[i
];
823 posi
.x
= h_pos
.data
[i
].x
;
824 posi
.y
= h_pos
.data
[i
].y
;
825 posi
.z
= h_pos
.data
[i
].z
;
827 //normalize position to gridsize:
828 Scalar3 pos_frac
= box
.makeFraction(posi
);
829 pos_frac
.x
*= (Scalar
)m_Nx
;
830 pos_frac
.y
*= (Scalar
)m_Ny
;
831 pos_frac
.z
*= (Scalar
)m_Nz
;
833 Scalar shift
, shiftone
, x0
, y0
, z0
, dx
, dy
, dz
;
834 int nlower
, nupper
, mx
, my
, mz
, nxi
, nyi
, nzi
;
836 nlower
= -(m_order
-1)/2;
850 nxi
= (int)(pos_frac
.x
+ shift
);
851 nyi
= (int)(pos_frac
.y
+ shift
);
852 nzi
= (int)(pos_frac
.z
+ shift
);
854 dx
= shiftone
+(Scalar
)nxi
-pos_frac
.x
;
855 dy
= shiftone
+(Scalar
)nyi
-pos_frac
.y
;
856 dz
= shiftone
+(Scalar
)nzi
-pos_frac
.z
;
860 int mult_fact
= 2*m_order
+1;
861 for (n
= nlower
; n
<= nupper
; n
++) {
863 if(mx
>= m_Nx
) mx
-= m_Nx
;
864 if(mx
< 0) mx
+= m_Nx
;
865 result
= Scalar(0.0);
866 for (k
= m_order
-1; k
>= 0; k
--) {
867 result
= h_rho_coeff
.data
[n
-nlower
+ k
*mult_fact
] + result
* dx
;
870 for (m
= nlower
; m
<= nupper
; m
++) {
872 if(my
>= m_Ny
) my
-= m_Ny
;
873 if(my
< 0) my
+= m_Ny
;
874 result
= Scalar(0.0);
875 for (k
= m_order
-1; k
>= 0; k
--) {
876 result
= h_rho_coeff
.data
[m
-nlower
+ k
*mult_fact
] + result
* dy
;
879 for (l
= nlower
; l
<= nupper
; l
++) {
881 if(mz
>= m_Nz
) mz
-= m_Nz
;
882 if(mz
< 0) mz
+= m_Nz
;
883 result
= Scalar(0.0);
884 for (k
= m_order
-1; k
>= 0; k
--) {
885 result
= h_rho_coeff
.data
[l
-nlower
+ k
*mult_fact
] + result
* dz
;
888 Scalar local_field_x
= h_Ex
.data
[mz
+ m_Nz
* (my
+ m_Ny
* mx
)].x
;
889 Scalar local_field_y
= h_Ey
.data
[mz
+ m_Nz
* (my
+ m_Ny
* mx
)].x
;
890 Scalar local_field_z
= h_Ez
.data
[mz
+ m_Nz
* (my
+ m_Ny
* mx
)].x
;
891 h_force
.data
[i
].x
+= qi
*z0
*local_field_x
;
892 h_force
.data
[i
].y
+= qi
*z0
*local_field_y
;
893 h_force
.data
[i
].z
+= qi
*z0
*local_field_z
;
901 void PPPMForceCompute::fix_exclusions_cpu()
903 unsigned int group_size
= m_group
->getNumMembers();
904 // just drop out if the group is an empty group
908 ArrayHandle
<Scalar4
> h_force(m_force
,access_location::host
, access_mode::readwrite
);
909 ArrayHandle
<Scalar
> h_virial(m_virial
,access_location::host
, access_mode::readwrite
);
910 unsigned int virial_pitch
= m_virial
.getPitch();
912 // there are enough other checks on the input data: but it doesn't hurt to be safe
913 assert(h_force
.data
);
915 ArrayHandle
< unsigned int > d_group_members(m_group
->getIndexArray(), access_location::host
, access_mode::read
);
916 const BoxDim
& box
= m_pdata
->getBox();
917 ArrayHandle
<unsigned int> d_exlist(m_nlist
->getExListArray(), access_location::host
, access_mode::read
);
918 ArrayHandle
<unsigned int> d_n_ex(m_nlist
->getNExArray(), access_location::host
, access_mode::read
);
919 Index2D nex
= m_nlist
->getExListIndexer();
921 ArrayHandle
<Scalar4
> h_pos(m_pdata
->getPositions(), access_location::host
, access_mode::read
);
922 ArrayHandle
<Scalar
> h_charge(m_pdata
->getCharges(), access_location::host
, access_mode::read
);
924 for(unsigned int i
= 0; i
< group_size
; i
++)
926 Scalar4 force
= make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
928 for (unsigned int k
= 0; k
< 6; k
++)
929 virial
[k
] = Scalar(0.0);
930 unsigned int idx
= d_group_members
.data
[i
];
932 posi
.x
= h_pos
.data
[idx
].x
;
933 posi
.y
= h_pos
.data
[idx
].y
;
934 posi
.z
= h_pos
.data
[idx
].z
;
935 Scalar qi
= h_charge
.data
[idx
];
937 unsigned int n_neigh
= d_n_ex
.data
[idx
];
938 const Scalar sqrtpi
= sqrtf(M_PI
);
939 unsigned int cur_j
= 0;
941 for (unsigned int neigh_idx
= 0; neigh_idx
< n_neigh
; neigh_idx
++)
943 cur_j
= d_exlist
.data
[nex(idx
, neigh_idx
)];
944 // get the neighbor's position
946 posj
.x
= h_pos
.data
[cur_j
].x
;
947 posj
.y
= h_pos
.data
[cur_j
].y
;
948 posj
.z
= h_pos
.data
[cur_j
].z
;
949 Scalar qj
= h_charge
.data
[cur_j
];
950 Scalar3 dx
= posi
- posj
;
952 // apply periodic boundary conditions:
953 dx
= box
.minImage(dx
);
955 Scalar rsq
= dot(dx
, dx
);
956 Scalar r
= sqrtf(rsq
);
957 Scalar qiqj
= qi
* qj
;
958 Scalar erffac
= erf(m_kappa
* r
) / r
;
959 Scalar force_divr
= qiqj
* (-Scalar(2.0) * exp(-rsq
* m_kappa
* m_kappa
) * m_kappa
/ (sqrtpi
* rsq
) + erffac
/ rsq
);
960 Scalar pair_eng
= qiqj
* erffac
;
961 virial
[0]+= Scalar(0.5) * dx
.x
* dx
.x
* force_divr
;
962 virial
[1]+= Scalar(0.5) * dx
.y
* dx
.x
* force_divr
;
963 virial
[2]+= Scalar(0.5) * dx
.z
* dx
.x
* force_divr
;
964 virial
[3]+= Scalar(0.5) * dx
.y
* dx
.y
* force_divr
;
965 virial
[4]+= Scalar(0.5) * dx
.z
* dx
.y
* force_divr
;
966 virial
[5]+= Scalar(0.5) * dx
.z
* dx
.z
* force_divr
;
967 force
.x
+= dx
.x
* force_divr
;
968 force
.y
+= dx
.y
* force_divr
;
969 force
.z
+= dx
.z
* force_divr
;
972 force
.w
*= Scalar(0.5);
973 h_force
.data
[idx
].x
-= force
.x
;
974 h_force
.data
[idx
].y
-= force
.y
;
975 h_force
.data
[idx
].z
-= force
.z
;
976 h_force
.data
[idx
].w
= -force
.w
;
977 for (unsigned int k
= 0; k
< 6; k
++)
978 h_virial
.data
[k
*virial_pitch
+idx
] = -virial
[k
];
983 /*! Computes the additional energy and virial contributed by PPPM
984 \note The additional terms are simply added onto particle 0 so that they will be accounted for by
987 void PPPMForceCompute::fix_thermo_quantities()
989 // access data arrays
990 BoxDim box
= m_pdata
->getBox();
991 Scalar3 L
= box
.getL();
993 ArrayHandle
<CUFFTCOMPLEX
> d_rho_real_space(m_rho_real_space
, access_location::host
, access_mode::readwrite
);
994 ArrayHandle
<Scalar
> d_green_hat(m_green_hat
, access_location::host
, access_mode::readwrite
);
995 ArrayHandle
<Scalar
> d_vg(m_vg
, access_location::host
, access_mode::readwrite
);
996 Scalar2 pppm_virial_energy
= make_scalar2(0.0, 0.0);
998 Scalar v_xx
=0.0, v_xy
=0.0, v_xz
=0.0, v_yy
=0.0, v_yz
=0.0, v_zz
=0.0;
1002 // compute the correction
1003 for (int i
= 0; i
< m_Nx
*m_Ny
*m_Nz
; i
++)
1005 Scalar energy
= d_green_hat
.data
[i
]*(d_rho_real_space
.data
[i
].x
*d_rho_real_space
.data
[i
].x
+
1006 d_rho_real_space
.data
[i
].y
*d_rho_real_space
.data
[i
].y
);
1007 Scalar pressure
= energy
*(d_vg
.data
[0+6*i
] + d_vg
.data
[3+6*i
] + d_vg
.data
[5+6*i
]);
1008 v_xx
+= d_vg
.data
[0+6*i
]*energy
;
1009 v_xy
+= d_vg
.data
[1+6*i
]*energy
;
1010 v_xz
+= d_vg
.data
[2+6*i
]*energy
;
1011 v_yy
+= d_vg
.data
[3+6*i
]*energy
;
1012 v_yz
+= d_vg
.data
[4+6*i
]*energy
;
1013 v_zz
+= d_vg
.data
[5+6*i
]*energy
;
1014 pppm_virial_energy
.x
+= pressure
;
1015 pppm_virial_energy
.y
+= energy
;
1018 pppm_virial_energy
.x
*= m_energy_virial_factor
/ (Scalar(3.0) * L
.x
* L
.y
* L
.z
);
1019 pppm_virial_energy
.y
*= m_energy_virial_factor
;
1020 pppm_virial_energy
.y
-= m_q2
* m_kappa
/ Scalar(1.772453850905516027298168);
1021 pppm_virial_energy
.y
-= 0.5*M_PI
*m_q
*m_q
/ (m_kappa
*m_kappa
* L
.x
* L
.y
* L
.z
);
1023 // apply the correction to particle 0
1024 ArrayHandle
<Scalar4
> h_force(m_force
,access_location::host
, access_mode::readwrite
);
1025 ArrayHandle
<Scalar
> h_virial(m_virial
,access_location::host
, access_mode::readwrite
);
1026 h_force
.data
[0].w
+= pppm_virial_energy
.y
;
1029 // Compute full virial tensor
1030 unsigned int virial_pitch
= m_virial
.getPitch();
1031 h_virial
.data
[0*virial_pitch
+0] += v_xx
*m_energy_virial_factor
;
1032 h_virial
.data
[1*virial_pitch
+0] += v_xy
*m_energy_virial_factor
;
1033 h_virial
.data
[2*virial_pitch
+0] += v_xz
*m_energy_virial_factor
;
1034 h_virial
.data
[3*virial_pitch
+0] += v_yy
*m_energy_virial_factor
;
1035 h_virial
.data
[4*virial_pitch
+0] += v_yz
*m_energy_virial_factor
;
1036 h_virial
.data
[5*virial_pitch
+0] += v_zz
*m_energy_virial_factor
;
1039 void export_PPPMForceCompute()
1041 class_
<PPPMForceCompute
, boost::shared_ptr
<PPPMForceCompute
>, bases
<ForceCompute
>, boost::noncopyable
>
1042 ("PPPMForceCompute", init
< boost::shared_ptr
<SystemDefinition
>,
1043 boost::shared_ptr
<NeighborList
>,
1044 boost::shared_ptr
<ParticleGroup
> >())
1045 .def("setParams", &PPPMForceCompute::setParams
)