libhoomd/computes_gpu/PPPMForceComputeGPU.cc

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: sbarr
  51
  52 /*! \file PPPMForceComputeGPU.cc
  53     \brief Defines the PPPMForceComputeGPU class
  54 */
  55
  56 #ifdef SINGLE_PRECISION
  57 #define CUFFT_TRANSFORM_TYPE CUFFT_C2C
  58 #else
  59 #define CUFFT_TRANSFORM_TYPE CUFFT_Z2Z
  60 #endif
  61
  62
  63 #include "PotentialPair.h"
  64 #include "PPPMForceComputeGPU.h"
  65
  66 #include <boost/python.hpp>
  67 using namespace boost::python;
  68
  69 #include <boost/bind.hpp>
  70 using namespace boost;
  71
  72 using namespace std;
  73
  74 /*! \param sysdef System to compute bond forces on
  75     \param nlist Neighbor list
  76     \param group Particle group
  77
  78 */
  79 PPPMForceComputeGPU::PPPMForceComputeGPU(boost::shared_ptr<SystemDefinition> sysdef,
  80                                          boost::shared_ptr<NeighborList> nlist,
  81                                          boost::shared_ptr<ParticleGroup> group)
  82     : PPPMForceCompute(sysdef, nlist, group), m_block_size(256),m_first_run(true)
  83     {
  84
  85     // can't run on the GPU if there aren't any GPUs in the execution configuration
  86     if (!exec_conf->isCUDAEnabled())
  87         {
  88         m_exec_conf->msg->error() << "Creating a PPMForceComputeGPU with no GPU in the execution configuration" << endl;
  89         throw std::runtime_error("Error initializing PPMForceComputeGPU");
  90         }
  91     CHECK_CUDA_ERROR();
  92     }
  93
  94 PPPMForceComputeGPU::~PPPMForceComputeGPU()
  95     {
  96     cufftDestroy(plan);
  97     }
  98
  99 /*!
 100   \param Nx Number of grid points in x direction
 101   \param Ny Number of grid points in y direction
 102   \param Nz Number of grid points in z direction
 103   \param order Number of grid points in each direction to assign charges to
 104   \param kappa Screening parameter in erfc
 105   \param rcut Short-ranged cutoff, used for computing the relative force error
 106
 107   Sets parameters for the long-ranged part of the electrostatics calculation and updates the
 108   parameters on the GPU.
 109 */
 110 void PPPMForceComputeGPU::setParams(int Nx, int Ny, int Nz, int order, Scalar kappa, Scalar rcut)
 111     {
 112     PPPMForceCompute::setParams(Nx, Ny, Nz, order, kappa, rcut);
 113     cufftPlan3d(&plan, Nx, Ny, Nz, CUFFT_TRANSFORM_TYPE);
 114
 115     GPUArray<Scalar> n_energy_sum(Nx*Ny*Nz, exec_conf);
 116     m_energy_sum.swap(n_energy_sum);
 117
 118     GPUArray<Scalar> n_v_xx_sum(Nx*Ny*Nz, exec_conf);
 119     m_v_xx_sum.swap(n_v_xx_sum);
 120
 121     GPUArray<Scalar> n_v_xy_sum(Nx*Ny*Nz, exec_conf);
 122     m_v_xy_sum.swap(n_v_xy_sum);
 123
 124     GPUArray<Scalar> n_v_xz_sum(Nx*Ny*Nz, exec_conf);
 125     m_v_xz_sum.swap(n_v_xz_sum);
 126
 127     GPUArray<Scalar> n_v_yy_sum(Nx*Ny*Nz, exec_conf);
 128     m_v_yy_sum.swap(n_v_yy_sum);
 129
 130     GPUArray<Scalar> n_v_yz_sum(Nx*Ny*Nz, exec_conf);
 131     m_v_yz_sum.swap(n_v_yz_sum);
 132
 133     GPUArray<Scalar> n_v_zz_sum(Nx*Ny*Nz, exec_conf);
 134     m_v_zz_sum.swap(n_v_zz_sum);
 135
 136     GPUArray<Scalar> n_o_data(Nx*Ny*Nz, exec_conf);
 137     o_data.swap(n_o_data);
 138
 139
 140     }
 141
 142
 143
 144 /*! Internal method for computing the forces on the GPU.
 145   \post The force data on the GPU is written with the calculated forces
 146
 147   \param timestep Current time step of the simulation
 148
 149   Calls gpu_compute_harmonic_bond_forces to do the dirty work.
 150 */
 151 void PPPMForceComputeGPU::computeForces(unsigned int timestep)
 152     {
 153     if (!m_params_set)
 154         {
 155         m_exec_conf->msg->error() << "charge.pppm: setParams must be called prior to computeForces()" << endl;
 156         throw std::runtime_error("Error computing forces in PPPMForceComputeGPU");
 157         }
 158
 159     unsigned int group_size = m_group->getNumMembers();
 160     // just drop out if the group is an empty group
 161     if (group_size == 0)
 162         return;
 163
 164     // start the profile
 165     if (m_prof) m_prof->push(exec_conf, "PPPM");
 166
 167     assert(m_pdata);
 168
 169     ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
 170     ArrayHandle<Scalar> d_charge(m_pdata->getCharges(), access_location::device, access_mode::read);
 171
 172     BoxDim box = m_pdata->getBox();
 173     ArrayHandle<CUFFTCOMPLEX> d_rho_real_space(m_rho_real_space, access_location::device, access_mode::readwrite);
 174     ArrayHandle<CUFFTCOMPLEX> d_Ex(m_Ex, access_location::device, access_mode::readwrite);
 175     ArrayHandle<CUFFTCOMPLEX> d_Ey(m_Ey, access_location::device, access_mode::readwrite);
 176     ArrayHandle<CUFFTCOMPLEX> d_Ez(m_Ez, access_location::device, access_mode::readwrite);
 177     ArrayHandle<Scalar3> d_kvec(m_kvec, access_location::device, access_mode::readwrite);
 178     ArrayHandle<Scalar> d_green_hat(m_green_hat, access_location::device, access_mode::readwrite);
 179     ArrayHandle<Scalar> h_rho_coeff(m_rho_coeff, access_location::host, access_mode::readwrite);
 180     ArrayHandle<Scalar3> d_field(m_field, access_location::device, access_mode::readwrite);
 181
 182         { //begin ArrayHandle scope
 183         ArrayHandle<Scalar4> d_force(m_force,access_location::device,access_mode::overwrite);
 184         ArrayHandle<Scalar> d_virial(m_virial,access_location::device,access_mode::overwrite);
 185
 186         // reset virial
 187         cudaMemset(d_virial.data, 0, sizeof(Scalar)*m_virial.getNumElements());
 188
 189         //Zero pppm forces on ALL particles, including neutral ones that aren't taken
 190         //care of in calculate_forces_kernel
 191         cudaMemset(d_force.data, 0, sizeof(Scalar4)*m_force.getNumElements());
 192
 193         // access the group
 194         ArrayHandle< unsigned int > d_index_array(m_group->getIndexArray(), access_location::device, access_mode::read);
 195
 196         if(m_box_changed || m_first_run)
 197             {
 198             Scalar3 L = box.getL();
 199             Scalar temp = floor(((m_kappa*L.x/(M_PI*m_Nx)) *  pow(-log(EPS_HOC),0.25)));
 200             int nbx = (int)temp;
 201             temp = floor(((m_kappa*L.y/(M_PI*m_Ny)) * pow(-log(EPS_HOC),0.25)));
 202             int nby = (int)temp;
 203             temp =  floor(((m_kappa*L.z/(M_PI*m_Nz)) *  pow(-log(EPS_HOC),0.25)));
 204             int nbz = (int)temp;
 205
 206             ArrayHandle<Scalar> d_vg(m_vg, access_location::device, access_mode::readwrite);;
 207             ArrayHandle<Scalar> d_gf_b(m_gf_b, access_location::device, access_mode::readwrite);
 208             reset_kvec_green_hat(box,
 209                                  m_Nx,
 210                                  m_Ny,
 211                                  m_Nz,
 212                                  nbx,
 213                                  nby,
 214                                  nbz,
 215                                  m_order,
 216                                  m_kappa,
 217                                  d_kvec.data,
 218                                  d_green_hat.data,
 219                                  d_vg.data,
 220                                  d_gf_b.data,
 221                                  m_block_size);
 222
 223
 224             Scalar scale = Scalar(1.0)/((Scalar)(m_Nx * m_Ny * m_Nz));
 225             m_energy_virial_factor = 0.5 * L.x * L.y * L.z * scale * scale;
 226             m_box_changed = false;
 227             m_first_run = false;
 228             }
 229
 230         // run the kernel in parallel on all GPUs
 231
 232         gpu_compute_pppm_forces(d_force.data,
 233                                 m_pdata->getN(),
 234                                 d_pos.data,
 235                                 d_charge.data,
 236                                 box,
 237                                 m_Nx,
 238                                 m_Ny,
 239                                 m_Nz,
 240                                 m_order,
 241                                 h_rho_coeff.data,
 242                                 d_rho_real_space.data,
 243                                 plan,
 244                                 d_Ex.data,
 245                                 d_Ey.data,
 246                                 d_Ez.data,
 247                                 d_kvec.data,
 248                                 d_green_hat.data,
 249                                 d_field.data,
 250                                 d_index_array.data,
 251                                 group_size,
 252                                 m_block_size,
 253                                 m_exec_conf->getComputeCapability());
 254
 255         if (exec_conf->isCUDAErrorCheckingEnabled())
 256             CHECK_CUDA_ERROR();
 257
 258         // If there are exclusions, correct for the long-range part of the potential
 259         if( m_nlist->getExclusionsSet())
 260             {
 261             ArrayHandle<unsigned int> d_exlist(m_nlist->getExListArray(), access_location::device, access_mode::read);
 262             ArrayHandle<unsigned int> d_n_ex(m_nlist->getNExArray(), access_location::device, access_mode::read);
 263             Index2D nex = m_nlist->getExListIndexer();
 264
 265             fix_exclusions(d_force.data,
 266                            d_virial.data,
 267                            m_virial.getPitch(),
 268                            m_pdata->getN(),
 269                            d_pos.data,
 270                            d_charge.data,
 271                            box,
 272                            d_n_ex.data,
 273                            d_exlist.data,
 274                            nex,
 275                            m_kappa,
 276                            d_index_array.data,
 277                            group_size,
 278                            m_block_size,
 279                            m_exec_conf->getComputeCapability());
 280             if (exec_conf->isCUDAErrorCheckingEnabled())
 281                 CHECK_CUDA_ERROR();
 282             }
 283         } // end ArrayHandle scope
 284
 285     PDataFlags flags = m_pdata->getFlags();
 286
 287     if(flags[pdata_flag::potential_energy] || flags[pdata_flag::pressure_tensor] || flags[pdata_flag::isotropic_virial])
 288         {
 289         ArrayHandle<Scalar> d_vg(m_vg, access_location::device, access_mode::readwrite);
 290         ArrayHandle<Scalar> d_energy_sum(m_energy_sum, access_location::device, access_mode::readwrite);
 291         ArrayHandle<Scalar> d_v_xx_sum(m_v_xx_sum, access_location::device, access_mode::readwrite);
 292         ArrayHandle<Scalar> d_v_xy_sum(m_v_xy_sum, access_location::device, access_mode::readwrite);
 293         ArrayHandle<Scalar> d_v_xz_sum(m_v_xz_sum, access_location::device, access_mode::readwrite);
 294         ArrayHandle<Scalar> d_v_yy_sum(m_v_yy_sum, access_location::device, access_mode::readwrite);
 295         ArrayHandle<Scalar> d_v_yz_sum(m_v_yz_sum, access_location::device, access_mode::readwrite);
 296         ArrayHandle<Scalar> d_v_zz_sum(m_v_zz_sum, access_location::device, access_mode::readwrite);
 297         ArrayHandle<Scalar> d_o_data(o_data, access_location::device, access_mode::readwrite);
 298         Scalar pppm_virial_energy[7];
 299         gpu_compute_pppm_thermo(m_Nx,
 300                                 m_Ny,
 301                                 m_Nz,
 302                                 d_rho_real_space.data,
 303                                 d_vg.data,
 304                                 d_green_hat.data,
 305                                 d_o_data.data,
 306                                 d_energy_sum.data,
 307                                 d_v_xx_sum.data,
 308                                 d_v_xy_sum.data,
 309                                 d_v_xz_sum.data,
 310                                 d_v_yy_sum.data,
 311                                 d_v_yz_sum.data,
 312                                 d_v_zz_sum.data,
 313                                 pppm_virial_energy,
 314                                 m_block_size);
 315
 316         Scalar3 L = box.getL();
 317
 318         pppm_virial_energy[0] *= m_energy_virial_factor;
 319         pppm_virial_energy[0] -= m_q2 * m_kappa / Scalar(1.772453850905516027298168);
 320         pppm_virial_energy[0] -= 0.5*M_PI*m_q*m_q / (m_kappa*m_kappa* L.x * L.y * L.z);
 321
 322         for(int i = 1; i < 7; i++) {
 323             pppm_virial_energy[i] *= m_energy_virial_factor;
 324             }
 325
 326         // apply the correction to particle 0
 327             {
 328             ArrayHandle<Scalar4> h_force(m_force,access_location::host, access_mode::readwrite);
 329             ArrayHandle<Scalar> h_virial(m_virial,access_location::host, access_mode::readwrite);
 330             unsigned int virial_pitch = m_virial.getPitch();
 331
 332             h_force.data[0].w += pppm_virial_energy[0];
 333             h_virial.data[0*virial_pitch+0] += pppm_virial_energy[1]; // xx
 334             h_virial.data[1*virial_pitch+0] += pppm_virial_energy[2]; // xy
 335             h_virial.data[2*virial_pitch+0] += pppm_virial_energy[3]; // xz
 336             h_virial.data[3*virial_pitch+0] += pppm_virial_energy[4]; // yy
 337             h_virial.data[4*virial_pitch+0] += pppm_virial_energy[5]; // yz
 338             h_virial.data[5*virial_pitch+0] += pppm_virial_energy[6]; // zz
 339             }
 340         }
 341     if (m_prof) m_prof->pop(exec_conf);
 342     }
 343
 344 void export_PPPMForceComputeGPU()
 345     {
 346     class_<PPPMForceComputeGPU, boost::shared_ptr<PPPMForceComputeGPU>, bases<PPPMForceCompute>, boost::noncopyable >
 347         ("PPPMForceComputeGPU", init< boost::shared_ptr<SystemDefinition>,
 348          boost::shared_ptr<NeighborList>,
 349          boost::shared_ptr<ParticleGroup> >())
 350         .def("setBlockSize", &PPPMForceComputeGPU::setBlockSize)
 351         ;
 352     }