Fix potential problem in Messenger related to MPI window
[hoomd-blue.git] / libhoomd / communication / DomainDecomposition.cc
blob8928fd19301e979e419a772009596b7ab024d0df
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: jglaser
52 /*! \file DomainDecomposition.cc
53 \brief Implements the DomainDecomposition class
56 #ifdef ENABLE_MPI
57 #include "DomainDecomposition.h"
59 #include "SystemDefinition.h"
60 #include "ParticleData.h"
62 #include "HOOMDMPI.h"
63 #include <boost/python.hpp>
65 #include <boost/serialization/set.hpp>
67 using namespace boost::python;
69 //! Constructor
70 /*! The constructor performs a spatial domain decomposition of the simulation box of processor with rank \b exec_conf->getMPIroot().
71 * The domain dimensions are distributed on the other processors.
73 DomainDecomposition::DomainDecomposition(boost::shared_ptr<ExecutionConfiguration> exec_conf,
74 Scalar3 L,
75 unsigned int nx,
76 unsigned int ny,
77 unsigned int nz,
78 bool twolevel
80 : m_exec_conf(exec_conf), m_mpi_comm(m_exec_conf->getMPICommunicator())
82 m_exec_conf->msg->notice(5) << "Constructing DomainDecomposition" << endl;
84 unsigned int rank = m_exec_conf->getRank();
85 unsigned int nranks = m_exec_conf->getNRanks();
87 // initialize node names
88 findCommonNodes();
90 m_max_n_node = 0;
91 m_twolevel = twolevel;
93 if (twolevel)
95 // find out if we can do a node-level decomposition
96 initializeTwoLevel();
99 unsigned int nx_node =0, ny_node = 0, nz_node = 0;
100 unsigned int nx_intra = 0, ny_intra = 0, nz_intra = 0;
102 if (nx || ny || nz) m_twolevel = false;
104 if (rank == 0)
106 if (m_twolevel)
108 // every node has the same number of ranks, so nranks == num_nodes * num_ranks_per_node
109 unsigned int n_nodes = m_nodes.size();
111 // subdivide the global grid
112 findDecomposition(nranks, L, nx, ny, nz);
114 // subdivide the local grid
115 subdivide(nranks/n_nodes, L, nx, ny, nz, nx_intra, ny_intra, nz_intra);
117 nx_node = nx/nx_intra;
118 ny_node = ny/ny_intra;
119 nz_node = nz/nz_intra;
121 else
123 bool found_decomposition = findDecomposition(nranks, L, nx, ny, nz);
124 if (! found_decomposition)
126 m_exec_conf->msg->warning() << "Unable to find a decomposition with"
127 << endl << "requested dimensions. Choosing default decomposition."
128 << endl << endl;
130 nx = ny = nz = 0;
131 findDecomposition(nranks, L, nx,ny,nz);
134 m_nx = nx;
135 m_ny = ny;
136 m_nz = nz;
139 // broadcast grid dimensions
140 bcast(m_nx, 0, m_mpi_comm);
141 bcast(m_ny, 0, m_mpi_comm);
142 bcast(m_nz, 0, m_mpi_comm);
144 // Initialize domain indexer
145 m_index = Index3D(m_nx,m_ny,m_nz);
147 // map cartesian grid onto ranks
148 GPUArray<unsigned int> cart_ranks(nranks, m_exec_conf);
149 m_cart_ranks.swap(cart_ranks);
151 GPUArray<unsigned int> cart_ranks_inv(nranks, m_exec_conf);
152 m_cart_ranks_inv.swap(cart_ranks_inv);
154 ArrayHandle<unsigned int> h_cart_ranks(m_cart_ranks, access_location::host, access_mode::overwrite);
155 ArrayHandle<unsigned int> h_cart_ranks_inv(m_cart_ranks_inv, access_location::host, access_mode::overwrite);
157 if (m_twolevel)
159 bcast(nx_intra, 0, m_mpi_comm);
160 bcast(ny_intra, 0, m_mpi_comm);
161 bcast(nz_intra, 0, m_mpi_comm);
163 bcast(nx_node, 0, m_mpi_comm);
164 bcast(ny_node, 0, m_mpi_comm);
165 bcast(nz_node, 0, m_mpi_comm);
167 m_node_grid = Index3D(nx_node, ny_node, nz_node);
168 m_intra_node_grid = Index3D(nx_intra, ny_intra, nz_intra);
170 std::vector<unsigned int> node_ranks(m_max_n_node);
171 std::set<std::string>::iterator node_it = m_nodes.begin();
173 // iterate over node grid
174 for (unsigned int ix_node = 0; ix_node < m_node_grid.getW(); ix_node++)
175 for (unsigned int iy_node = 0; iy_node < m_node_grid.getH(); iy_node++)
176 for (unsigned int iz_node = 0; iz_node < m_node_grid.getD(); iz_node++)
178 // get ranks for this node
179 typedef std::multimap<std::string, unsigned int> map_t;
180 std::string node = *(node_it++);
181 std::pair<map_t::iterator, map_t::iterator> p = m_node_map.equal_range(node);
182 map_t::iterator it = p.first;
184 std::ostringstream oss;
185 oss << "Node " << node << ": ranks";
186 for (unsigned int i = 0; i < m_max_n_node; ++i)
188 unsigned int r = (it++)->second;
189 oss << " " << r;
190 node_ranks[i] = r;
192 m_exec_conf->msg->notice(5) << oss.str() << std::endl;
194 // iterate over local ranks
195 for (unsigned int ix_intra = 0; ix_intra < m_intra_node_grid.getW(); ix_intra++)
196 for (unsigned int iy_intra = 0; iy_intra < m_intra_node_grid.getH(); iy_intra++)
197 for (unsigned int iz_intra = 0; iz_intra < m_intra_node_grid.getD(); iz_intra++)
199 unsigned int ilocal = m_intra_node_grid(ix_intra, iy_intra, iz_intra);
201 unsigned int ix = ix_node * nx_intra + ix_intra;
202 unsigned int iy = iy_node * ny_intra + iy_intra;
203 unsigned int iz = iz_node * nz_intra + iz_intra;
205 unsigned int iglob = m_index(ix, iy, iz);
207 // add rank to table
208 h_cart_ranks.data[iglob] = node_ranks[ilocal];
209 h_cart_ranks_inv.data[node_ranks[ilocal]] = iglob;
212 } // twolevel
213 else
215 // simply map the global grid in sequential order to ranks
216 for (unsigned int iglob = 0; iglob < nranks; ++iglob)
218 h_cart_ranks.data[iglob] = iglob;
219 h_cart_ranks_inv.data[iglob] = iglob;
223 // Print out information about the domain decomposition
224 m_exec_conf->msg->notice(1) << "HOOMD-blue is using domain decomposition: n_x = "
225 << m_nx << " n_y = " << m_ny << " n_z = " << m_nz << "." << std::endl;
227 if (m_twolevel)
228 m_exec_conf->msg->notice(1) << nx_intra << " x " << ny_intra << " x " << nz_intra
229 << " local grid on " << m_nodes.size() << " nodes" << std::endl;
231 // compute position of this box in the domain grid by reverse look-up
232 m_grid_pos = m_index.getTriple(h_cart_ranks_inv.data[rank]);
235 //! Find a domain decomposition with given parameters
236 bool DomainDecomposition::findDecomposition(unsigned int nranks, const Scalar3 L, unsigned int& nx, unsigned int& ny, unsigned int& nz)
238 assert(L.x > 0);
239 assert(L.y > 0);
240 assert(L.z > 0);
242 // Calulate the number of sub-domains in every direction
243 // by minimizing the surface area between domains at constant number of domains
244 double min_surface_area = L.x*L.y*(double)(nranks-1);
246 unsigned int nx_in = nx;
247 unsigned int ny_in = ny;
248 unsigned int nz_in = nz;
250 bool found_decomposition = (nx_in == 0 && ny_in == 0 && nz_in == 0);
252 // initial guess
253 nx = 1;
254 ny = 1;
255 nz = nranks;
257 for (unsigned int nx_try = 1; nx_try <= nranks; nx_try++)
259 if (nx_in != 0 && nx_try != nx_in)
260 continue;
261 for (unsigned int ny_try = 1; nx_try*ny_try <= nranks; ny_try++)
263 if (ny_in != 0 && ny_try != ny_in)
264 continue;
265 for (unsigned int nz_try = 1; nx_try*ny_try*nz_try <= nranks; nz_try++)
267 if (nz_in != 0 && nz_try != nz_in)
268 continue;
269 if (nx_try*ny_try*nz_try != nranks) continue;
270 double surface_area = L.x*L.y*(double)(nz_try-1) + L.x*L.z*(double)(ny_try-1) + L.y*L.z*(double)(nx_try-1);
271 if (surface_area < min_surface_area || !found_decomposition)
273 nx = nx_try;
274 ny = ny_try;
275 nz = nz_try;
276 min_surface_area = surface_area;
277 found_decomposition = true;
283 return found_decomposition;
286 //! Find a two-level decompositon of the global grid
287 void DomainDecomposition::subdivide(unsigned int n_node_ranks, Scalar3 L,
288 unsigned int nx, unsigned int ny, unsigned int nz,
289 unsigned int& nx_intra, unsigned int &ny_intra, unsigned int& nz_intra)
291 assert(L.x > 0);
292 assert(L.y > 0);
293 assert(L.z > 0);
295 // initial guess
296 nx_intra = 1;
297 ny_intra = 1;
298 nz_intra = n_node_ranks;
300 for (unsigned int nx_intra_try = 1; nx_intra_try <= n_node_ranks; nx_intra_try++)
301 for (unsigned int ny_intra_try = 1; nx_intra_try*ny_intra_try <= n_node_ranks; ny_intra_try++)
302 for (unsigned int nz_intra_try = 1; nx_intra_try*ny_intra_try*nz_intra_try <= n_node_ranks; nz_intra_try++)
304 if (nx_intra_try*ny_intra_try*nz_intra_try != n_node_ranks) continue;
305 if (nx % nx_intra_try || ny % ny_intra_try || nz % nz_intra_try) continue;
307 nx_intra = nx_intra_try;
308 ny_intra = ny_intra_try;
309 nz_intra = nz_intra_try;
314 /*! \param dir Spatial direction to find neighbor in
315 0: east, 1: west, 2: north, 3: south, 4: up, 5: down
317 unsigned int DomainDecomposition::getNeighborRank(unsigned int dir) const
319 assert(0<= dir && dir < 6);
321 int adj[6][3] = {{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}};
323 // determine neighbor position
324 int ineigh = (int) m_grid_pos.x + adj[dir][0];
325 int jneigh = (int) m_grid_pos.y + adj[dir][1];
326 int kneigh = (int) m_grid_pos.z + adj[dir][2];
328 // wrap across boundaries
329 if (ineigh < 0)
330 ineigh += m_nx;
331 else if (ineigh == (int) m_nx)
332 ineigh -= m_nx;
334 if (jneigh < 0)
335 jneigh += m_ny;
336 else if (jneigh == (int) m_ny)
337 jneigh -= m_ny;
339 if (kneigh < 0)
340 kneigh += m_nz;
341 else if (kneigh == (int) m_nz)
342 kneigh -= m_nz;
344 unsigned int idx = m_index(ineigh, jneigh, kneigh);
345 ArrayHandle<unsigned int> h_cart_ranks(m_cart_ranks, access_location::host, access_mode::read);
346 return h_cart_ranks.data[idx];
349 //! Determines whether the local box shares a boundary with the global box
350 bool DomainDecomposition::isAtBoundary(unsigned int dir) const
352 return ( (dir == 0 && m_grid_pos.x == m_nx - 1) ||
353 (dir == 1 && m_grid_pos.x == 0) ||
354 (dir == 2 && m_grid_pos.y == m_ny - 1) ||
355 (dir == 3 && m_grid_pos.y == 0) ||
356 (dir == 4 && m_grid_pos.z == m_nz - 1) ||
357 (dir == 5 && m_grid_pos.z == 0));
360 //! Get the dimensions of the local simulation box
361 const BoxDim DomainDecomposition::calculateLocalBox(const BoxDim & global_box)
363 // initialize local box with all properties of global box
364 BoxDim box = global_box;
366 // calculate the local box dimensions by sub-dividing the cartesian lattice
367 Scalar3 L = global_box.getL();
368 Scalar3 L_local = L / make_scalar3(m_nx, m_ny, m_nz);
370 // position of this domain in the grid
371 Scalar3 lo_g = global_box.getLo();
372 Scalar3 lo, hi;
373 lo.x = lo_g.x + (double)m_grid_pos.x * L_local.x;
374 lo.y = lo_g.y + (double)m_grid_pos.y * L_local.y;
375 lo.z = lo_g.z + (double)m_grid_pos.z * L_local.z;
377 hi = lo + L_local;
379 // set periodic flags
380 // we are periodic in a direction along which there is only one box
381 uchar3 periodic = make_uchar3(m_nx == 1 ? 1 : 0,
382 m_ny == 1 ? 1 : 0,
383 m_nz == 1 ? 1 : 0);
385 box.setLoHi(lo,hi);
386 box.setPeriodic(periodic);
387 return box;
390 unsigned int DomainDecomposition::placeParticle(const BoxDim& global_box, Scalar3 pos)
392 // get fractional coordinates in the global box
393 Scalar3 f = global_box.makeFraction(pos);
395 Scalar tol(1e-5);
397 // check user input
398 if (f.x < -tol|| f.x >= 1.0+tol || f.y < -tol || f.y >= 1.0+tol || f.z < -tol|| f.z >= 1.0+tol)
400 m_exec_conf->msg->error() << "Particle coordinates outside box." << std::endl;
401 m_exec_conf->msg->error() << "f.x = " << f.x << " f.y = " << f.y << " f.z = " << f.z << std::endl;
402 throw std::runtime_error("Error placing particle");
405 // compute the box the particle should be placed into
406 unsigned ix = f.x*m_nx;
407 if (ix == m_nx) ix = 0;
409 unsigned iy = f.y*m_ny;
410 if (iy == m_ny) iy = 0;
412 unsigned iz = f.z*m_nz;
413 if (iz == m_nz) iz = 0;
415 ArrayHandle<unsigned int> h_cart_ranks(m_cart_ranks, access_location::host, access_mode::read);
416 unsigned int rank = h_cart_ranks.data[m_index(ix, iy, iz)];
418 // synchronize with rank zero
419 bcast(rank, 0, m_exec_conf->getMPICommunicator());
420 return rank;
423 void DomainDecomposition::findCommonNodes()
425 // get MPI node name
426 char procname[MPI_MAX_PROCESSOR_NAME];
427 int len;
428 MPI_Get_processor_name(procname, &len);
429 std::string s(procname, len);
431 // collect node names from all ranks on rank zero
432 std::vector<std::string> nodes;
433 gather_v(s, nodes, 0, m_exec_conf->getMPICommunicator());
435 // construct map of node names
436 if (m_exec_conf->getRank() == 0)
438 unsigned int nranks = m_exec_conf->getNRanks();
439 for (unsigned int r = 0; r < nranks; r++)
441 // insert into set
442 m_nodes.insert(nodes[r]);
444 // insert into map
445 m_node_map.insert(std::make_pair(nodes[r], r));
449 // broadcast to other ranks
450 bcast(m_nodes, 0, m_exec_conf->getMPICommunicator());
451 bcast(m_node_map, 0, m_exec_conf->getMPICommunicator());
454 void DomainDecomposition::initializeTwoLevel()
456 typedef std::multimap<std::string, unsigned int> map_t;
457 m_twolevel = true;
458 m_max_n_node = 0;
459 for (std::set<std::string>::iterator it = m_nodes.begin(); it != m_nodes.end(); ++it)
461 std::pair<map_t::iterator, map_t::iterator> p = m_node_map.equal_range(*it);
462 unsigned int n_node = std::distance(p.first, p.second);
464 // if we have a non-uniform number of ranks per node use one-level decomposition
465 if (m_max_n_node != 0 && n_node != m_max_n_node)
466 m_twolevel = false;
468 if (n_node > m_max_n_node)
469 m_max_n_node = n_node;
473 //! Export DomainDecomposition class to python
474 void export_DomainDecomposition()
476 class_<DomainDecomposition, boost::shared_ptr<DomainDecomposition>, boost::noncopyable >("DomainDecomposition",
477 init< boost::shared_ptr<ExecutionConfiguration>, Scalar3, unsigned int, unsigned int, unsigned int, bool>())
480 #endif // ENABLE_MPI