libhoomd/computes_gpu/ComputeThermoGPU.cc

   1 /*
   2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
   3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
   4 the University of Michigan All rights reserved.
   5
   6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
   7 copyright is held, by various Contributors who have granted The Regents of the
   8 University of Michigan the right to modify and/or distribute such Contributions.
   9
  10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
  11 and binary forms, provided you abide by the following conditions:
  12
  13 * Redistributions of source code must retain the above copyright notice, this
  14 list of conditions, and the following disclaimer both in the code and
  15 prominently in any materials provided with the distribution.
  16
  17 * Redistributions in binary form must reproduce the above copyright notice, this
  18 list of conditions, and the following disclaimer in the documentation and/or
  19 other materials provided with the distribution.
  20
  21 * All publications and presentations based on HOOMD-blue, including any reports
  22 or published results obtained, in whole or in part, with HOOMD-blue, will
  23 acknowledge its use according to the terms posted at the time of submission on:
  24 http://codeblue.umich.edu/hoomd-blue/citations.html
  25
  26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
  27 http://codeblue.umich.edu/hoomd-blue/
  28
  29 * Apart from the above required attributions, neither the name of the copyright
  30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
  31 promote products derived from this software without specific prior written
  32 permission.
  33
  34 Disclaimer
  35
  36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
  37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
  39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
  40
  41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 */
  49
  50 // Maintainer: joaander
  51
  52 /*! \file ComputeThermoGPU.cc
  53     \brief Contains code for the ComputeThermoGPU class
  54 */
  55
  56 #ifdef WIN32
  57 #pragma warning( push )
  58 #pragma warning( disable : 4103 4244 )
  59 #endif
  60
  61 #include "ComputeThermoGPU.h"
  62 #include "ComputeThermoGPU.cuh"
  63
  64 #include <boost/python.hpp>
  65 using namespace boost::python;
  66 #include <boost/bind.hpp>
  67 using namespace boost;
  68
  69 #ifdef ENABLE_MPI
  70 #include "Communicator.h"
  71 #include "HOOMDMPI.h"
  72 #endif
  73
  74 #include <iostream>
  75 using namespace std;
  76
  77 /*! \param sysdef System for which to compute thermodynamic properties
  78     \param group Subset of the system over which properties are calculated
  79     \param suffix Suffix to append to all logged quantity names
  80 */
  81
  82 ComputeThermoGPU::ComputeThermoGPU(boost::shared_ptr<SystemDefinition> sysdef,
  83                                    boost::shared_ptr<ParticleGroup> group,
  84                                    const std::string& suffix)
  85     : ComputeThermo(sysdef, group, suffix)
  86     {
  87     if (!exec_conf->isCUDAEnabled())
  88         {
  89         m_exec_conf->msg->error() << "Creating a ComputeThermoGPU with no GPU in the execution configuration" << endl;
  90         throw std::runtime_error("Error initializing ComputeThermoGPU");
  91         }
  92
  93     m_block_size = 512;
  94     // this allocates more memory than necessary but is needed unless the scratch memory
  95     // is reallocated when the maximum number of particles changes
  96     m_num_blocks = m_group->getNumMembersGlobal() / m_block_size + 1;
  97
  98     GPUArray< Scalar4 > scratch(m_num_blocks, exec_conf);
  99     m_scratch.swap(scratch);
 100
 101     GPUArray< Scalar > scratch_pressure_tensor(m_num_blocks * 6, exec_conf);
 102     m_scratch_pressure_tensor.swap(scratch_pressure_tensor);
 103
 104     // override base class allocation using mapped memory
 105     GPUArray< Scalar > properties(thermo_index::num_quantities, exec_conf,true);
 106     m_properties.swap(properties);
 107
 108     cudaEventCreate(&m_event, cudaEventDisableTiming);
 109     }
 110
 111 //! Destructor
 112 ComputeThermoGPU::~ComputeThermoGPU()
 113     {
 114     cudaEventDestroy(m_event);
 115     }
 116
 117 /*! Computes all thermodynamic properties of the system in one fell swoop, on the GPU.
 118  */
 119 void ComputeThermoGPU::computeProperties()
 120     {
 121     // just drop out if the group is an empty group
 122     if (m_group->getNumMembersGlobal() == 0)
 123         return;
 124
 125     unsigned int group_size = m_group->getNumMembers();
 126
 127     if (m_prof) m_prof->push(m_exec_conf,"Thermo");
 128
 129     assert(m_pdata);
 130     assert(m_ndof != 0);
 131
 132     // access the particle data
 133     ArrayHandle<Scalar4> d_vel(m_pdata->getVelocities(), access_location::device, access_mode::read);
 134     BoxDim box = m_pdata->getGlobalBox();
 135
 136     PDataFlags flags = m_pdata->getFlags();
 137
 138     { // scope these array handles so they are released before the additional terms are added
 139     // access the net force, pe, and virial
 140     const GPUArray< Scalar4 >& net_force = m_pdata->getNetForce();
 141     const GPUArray< Scalar >& net_virial = m_pdata->getNetVirial();
 142     ArrayHandle<Scalar4> d_net_force(net_force, access_location::device, access_mode::read);
 143     ArrayHandle<Scalar> d_net_virial(net_virial, access_location::device, access_mode::read);
 144     ArrayHandle<Scalar4> d_scratch(m_scratch, access_location::device, access_mode::overwrite);
 145     ArrayHandle<Scalar> d_scratch_pressure_tensor(m_scratch_pressure_tensor, access_location::device, access_mode::overwrite);
 146     ArrayHandle<Scalar> d_properties(m_properties, access_location::device, access_mode::overwrite);
 147
 148     // access the group
 149     ArrayHandle< unsigned int > d_index_array(m_group->getIndexArray(), access_location::device, access_mode::read);
 150
 151     // build up args list
 152     m_num_blocks = m_group->getNumMembers() / m_block_size + 1;
 153     compute_thermo_args args;
 154     args.d_net_force = d_net_force.data;
 155     args.d_net_virial = d_net_virial.data;
 156     args.virial_pitch = net_virial.getPitch();
 157     args.ndof = m_ndof;
 158     args.D = m_sysdef->getNDimensions();
 159     args.d_scratch = d_scratch.data;
 160     args.d_scratch_pressure_tensor = d_scratch_pressure_tensor.data;
 161     args.block_size = m_block_size;
 162     args.n_blocks = m_num_blocks;
 163     args.external_virial_xx = m_pdata->getExternalVirial(0);
 164     args.external_virial_xy = m_pdata->getExternalVirial(1);
 165     args.external_virial_xz = m_pdata->getExternalVirial(2);
 166     args.external_virial_yy = m_pdata->getExternalVirial(3);
 167     args.external_virial_yz = m_pdata->getExternalVirial(4);
 168     args.external_virial_zz = m_pdata->getExternalVirial(5);
 169
 170     // perform the computation on the GPU
 171     gpu_compute_thermo( d_properties.data,
 172                         d_vel.data,
 173                         d_index_array.data,
 174                         group_size,
 175                         box,
 176                         args,
 177                         flags[pdata_flag::pressure_tensor]);
 178
 179     if (exec_conf->isCUDAErrorCheckingEnabled())
 180         CHECK_CUDA_ERROR();
 181     }
 182
 183     #ifdef ENABLE_MPI
 184     // in MPI, reduce extensive quantities only when they're needed
 185     m_properties_reduced = !m_pdata->getDomainDecomposition();
 186
 187     if (!m_properties_reduced) cudaEventRecord(m_event);
 188     #endif // ENABLE_MPI
 189
 190     if (m_prof) m_prof->pop(m_exec_conf);
 191     }
 192
 193 #ifdef ENABLE_MPI
 194 void ComputeThermoGPU::reduceProperties()
 195     {
 196     if (m_properties_reduced) return;
 197
 198     ArrayHandleAsync<Scalar> h_properties(m_properties, access_location::host, access_mode::readwrite);
 199     cudaEventSynchronize(m_event);
 200
 201     // reduce properties
 202     MPI_Allreduce(MPI_IN_PLACE, h_properties.data, thermo_index::num_quantities, MPI_HOOMD_SCALAR,
 203             MPI_SUM, m_exec_conf->getMPICommunicator());
 204
 205     m_properties_reduced = true;
 206     }
 207 #endif
 208
 209
 210 void export_ComputeThermoGPU()
 211     {
 212     class_<ComputeThermoGPU, boost::shared_ptr<ComputeThermoGPU>, bases<ComputeThermo>, boost::noncopyable >
 213         ("ComputeThermoGPU", init< boost::shared_ptr<SystemDefinition>,
 214          boost::shared_ptr<ParticleGroup>,
 215          const std::string& >())
 216         ;
 217     }
 218
 219 #ifdef WIN32
 220 #pragma warning( pop )
 221 #endif