boost/mpi/collectives.hpp

   1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
   2
   3 // Use, modification and distribution is subject to the Boost Software
   4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
   5 // http://www.boost.org/LICENSE_1_0.txt)
   6
   7 // Message Passing Interface 1.1 -- Section 4. MPI Collectives
   8
   9 /** @file collectives.hpp
  10  *
  11  *  This header contains MPI collective operations, which implement
  12  *  various parallel algorithms that require the coordination of all
  13  *  processes within a communicator. The header @c collectives_fwd.hpp
  14  *  provides forward declarations for each of these operations. To
  15  *  include only specific collective algorithms, use the headers @c
  16  *  boost/mpi/collectives/algorithm_name.hpp.
  17  */
  18 #ifndef BOOST_MPI_COLLECTIVES_HPP
  19 #define BOOST_MPI_COLLECTIVES_HPP
  20
  21 #include <boost/mpi/communicator.hpp>
  22 #include <vector>
  23
  24 namespace boost { namespace mpi {
  25
  26 /**
  27  *  @brief Gather the values stored at every process into vectors of
  28  *  values from each process.
  29  *
  30  *  @c all_gather is a collective algorithm that collects the values
  31  *  stored at each process into a vector of values indexed by the
  32  *  process number they came from. The type @c T of the values may be
  33  *  any type that is serializable or has an associated MPI data type.
  34  *
  35  *  When the type @c T has an associated MPI data type, this routine
  36  *  invokes @c MPI_Allgather to gather the values.
  37  *
  38  *    @param comm The communicator over which the all-gather will
  39  *    occur.
  40  *
  41  *    @param in_value The value to be transmitted by each process. To
  42  *    gather an array of values, @c in_values points to the @c n local
  43  *    values to be transmitted.
  44  *
  45  *    @param out_values A vector or pointer to storage that will be
  46  *    populated with the values from each process, indexed by the
  47  *    process ID number. If it is a vector, the vector will be resized
  48  *    accordingly.
  49  */
  50 template<typename T>
  51 void
  52 all_gather(const communicator& comm, const T& in_value,
  53            std::vector<T>& out_values);
  54
  55 /**
  56  * \overload
  57  */
  58 template<typename T>
  59 void
  60 all_gather(const communicator& comm, const T& in_value, T* out_values);
  61
  62 /**
  63  * \overload
  64  */
  65 template<typename T>
  66 void
  67 all_gather(const communicator& comm, const T* in_values, int n,
  68            std::vector<T>& out_values);
  69
  70 /**
  71  * \overload
  72  */
  73 template<typename T>
  74 void
  75 all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
  76
  77 /**
  78  *  @brief Combine the values stored by each process into a single
  79  *  value available to all processes.
  80  *
  81  *  @c all_reduce is a collective algorithm that combines the values
  82  *  stored by each process into a single value available to all
  83  *  processes. The values are combined in a user-defined way,
  84  *  specified via a function object. The type @c T of the values may
  85  *  be any type that is serializable or has an associated MPI data
  86  *  type. One can think of this operation as a @c all_gather, followed
  87  *  by an @c std::accumulate() over the gather values and using the
  88  *  operation @c op.
  89  *
  90  *  When the type @c T has an associated MPI data type, this routine
  91  *  invokes @c MPI_Allreduce to perform the reduction. If possible,
  92  *  built-in MPI operations will be used; otherwise, @c all_reduce()
  93  *  will create a custom MPI_Op for the call to MPI_Allreduce.
  94  *
  95  *    @param comm The communicator over which the reduction will
  96  *    occur.
  97  *
  98  *    @param in_value The local value to be combined with the local
  99  *    values of every other process. For reducing arrays, @c in_values
 100  *    is a pointer to the local values to be reduced and @c n is the
 101  *    number of values to reduce. See @c reduce for more information.
 102  *
 103  *    @param out_value Will receive the result of the reduction
 104  *    operation. If this parameter is omitted, the outgoing value will
 105  *    instead be returned.
 106  *
 107  *    @param op The binary operation that combines two values of type
 108  *    @c T and returns a third value of type @c T. For types @c T that has
 109  *    ssociated MPI data types, @c op will either be translated into
 110  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
 111  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
 112  *    operations.hpp header for more details on this mapping. For any
 113  *    non-built-in operation, commutativity will be determined by the
 114  *    @c is_commmutative trait (also in @c operations.hpp): users are
 115  *    encouraged to mark commutative operations as such, because it
 116  *    gives the implementation additional lattitude to optimize the
 117  *    reduction operation.
 118  *
 119  *    @returns If no @p out_value parameter is supplied, returns the
 120  *    result of the reduction operation.
 121  */
 122 template<typename T, typename Op>
 123 void
 124 all_reduce(const communicator& comm, const T& in_value, T& out_value, Op op);
 125
 126 /**
 127  * \overload
 128  */
 129 template<typename T, typename Op>
 130 T all_reduce(const communicator& comm, const T& in_value, Op op);
 131
 132 /**
 133  * \overload
 134  */
 135 template<typename T, typename Op>
 136 void
 137 all_reduce(const communicator& comm, const T* in_values, int n, T* out_values,
 138            Op op);
 139
 140 /**
 141  *  @brief Send data from every process to every other process.
 142  *
 143  *  @c all_to_all is a collective algorithm that transmits @c p values
 144  *  from every process to every other process. On process i, jth value
 145  *  of the @p in_values vector is sent to process j and placed in the
 146  *  ith position of the @p out_values vector in process @p j. The type
 147  *  @c T of the values may be any type that is serializable or has an
 148  *  associated MPI data type. If @c n is provided, then arrays of @p n
 149  *  values will be transferred from one process to another.
 150  *
 151  *  When the type @c T has an associated MPI data type, this routine
 152  *  invokes @c MPI_Alltoall to scatter the values.
 153  *
 154  *    @param comm The communicator over which the all-to-all
 155  *    communication will occur.
 156  *
 157  *    @param in_values A vector or pointer to storage that contains
 158  *    the values to send to each process, indexed by the process ID
 159  *    number.
 160  *
 161  *    @param out_values A vector or pointer to storage that will be
 162  *    updated to contain the values received from other processes. The
 163  *    jth value in @p out_values will come from the procss with rank j.
 164  */
 165 template<typename T>
 166 void
 167 all_to_all(const communicator& comm, const std::vector<T>& in_values,
 168            std::vector<T>& out_values);
 169
 170 /**
 171  * \overload
 172  */
 173 template<typename T>
 174 void all_to_all(const communicator& comm, const T* in_values, T* out_values);
 175
 176 /**
 177  * \overload
 178  */
 179 template<typename T>
 180 void
 181 all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
 182            std::vector<T>& out_values);
 183
 184 /**
 185  * \overload
 186  */
 187 template<typename T>
 188 void
 189 all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
 190
 191 /**
 192  * @brief Broadcast a value from a root process to all other
 193  * processes.
 194  *
 195  * @c broadcast is a collective algorithm that transfers a value from
 196  * an arbitrary @p root process to every other process that is part of
 197  * the given communicator. The @c broadcast algorithm can transmit any
 198  * Serializable value, values that have associated MPI data types,
 199  * packed archives, skeletons, and the content of skeletons; see the
 200  * @c send primitive for communicators for a complete list. The type
 201  * @c T shall be the same for all processes that are a part of the
 202  * communicator @p comm, unless packed archives are being transferred:
 203  * with packed archives, the root sends a @c packed_oarchive or @c
 204  * packed_skeleton_oarchive whereas the other processes receive a
 205  * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
 206  *
 207  * When the type @c T has an associated MPI data type, this routine
 208  * invokes @c MPI_Bcast to perform the broadcast.
 209  *
 210  *   @param comm The communicator over which the broadcast will
 211  *   occur.
 212  *
 213  *   @param value The value (or values, if @p n is provided) to be
 214  *   transmitted (if the rank of @p comm is equal to @p root) or
 215  *   received (if the rank of @p comm is not equal to @p root). When
 216  *   the @p value is a @c skeleton_proxy, only the skeleton of the
 217  *   object will be broadcast. In this case, the @p root will build a
 218  *   skeleton from the object help in the proxy and all of the
 219  *   non-roots will reshape the objects held in their proxies based on
 220  *   the skeleton sent from the root.
 221  *
 222  *   @param n When supplied, the number of values that the pointer @p
 223  *   values points to, for broadcasting an array of values. The value
 224  *   of @p n must be the same for all processes in @p comm.
 225  *
 226  *   @param root The rank/process ID of the process that will be
 227  *   transmitting the value.
 228  */
 229 template<typename T>
 230 void broadcast(const communicator& comm, T& value, int root);
 231
 232 /**
 233  * \overload
 234  */
 235 template<typename T>
 236 void broadcast(const communicator& comm, T* values, int n, int root);
 237
 238 /**
 239  * \overload
 240  */
 241 template<typename T>
 242 void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
 243
 244 /**
 245  * \overload
 246  */
 247 template<typename T>
 248 void
 249 broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
 250
 251 /**
 252  *  @brief Gather the values stored at every process into a vector at
 253  *  the root process.
 254  *
 255  *  @c gather is a collective algorithm that collects the values
 256  *  stored at each process into a vector of values at the @p root
 257  *  process. This vector is indexed by the process number that the
 258  *  value came from. The type @c T of the values may be any type that
 259  *  is serializable or has an associated MPI data type.
 260  *
 261  *  When the type @c T has an associated MPI data type, this routine
 262  *  invokes @c MPI_Gather to gather the values.
 263  *
 264  *    @param comm The communicator over which the gather will occur.
 265  *
 266  *    @param in_value The value to be transmitted by each process. For
 267  *    gathering arrays of values, @c in_values points to storage for
 268  *    @c n*comm.size() values.
 269  *
 270  *    @param out_values A vector or pointer to storage that will be
 271  *    populated with the values from each process, indexed by the
 272  *    process ID number. If it is a vector, it will be resized
 273  *    accordingly. For non-root processes, this parameter may be
 274  *    omitted. If it is still provided, however, it will be unchanged.
 275  *
 276  *    @param root The process ID number that will collect the
 277  *    values. This value must be the same on all processes.
 278  */
 279 template<typename T>
 280 void
 281 gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
 282        int root);
 283
 284 /**
 285  * \overload
 286  */
 287 template<typename T>
 288 void
 289 gather(const communicator& comm, const T& in_value, T* out_values, int root);
 290
 291 /**
 292  * \overload
 293  */
 294 template<typename T>
 295 void gather(const communicator& comm, const T& in_value, int root);
 296
 297 /**
 298  * \overload
 299  */
 300 template<typename T>
 301 void
 302 gather(const communicator& comm, const T* in_values, int n,
 303        std::vector<T>& out_values, int root);
 304
 305 /**
 306  * \overload
 307  */
 308 template<typename T>
 309 void
 310 gather(const communicator& comm, const T* in_values, int n, T* out_values,
 311        int root);
 312
 313 /**
 314  * \overload
 315  */
 316 template<typename T>
 317 void gather(const communicator& comm, const T* in_values, int n, int root);
 318
 319 /**
 320  *  @brief Scatter the values stored at the root to all processes
 321  *  within the communicator.
 322  *
 323  *  @c scatter is a collective algorithm that scatters the values
 324  *  stored in the @p root process (inside a vector) to all of the
 325  *  processes in the communicator. The vector @p out_values (only
 326  *  significant at the @p root) is indexed by the process number to
 327  *  which the corresponding value will be sent. The type @c T of the
 328  *  values may be any type that is serializable or has an associated
 329  *  MPI data type.
 330  *
 331  *  When the type @c T has an associated MPI data type, this routine
 332  *  invokes @c MPI_Scatter to scatter the values.
 333  *
 334  *    @param comm The communicator over which the gather will occur.
 335  *
 336  *    @param in_values A vector or pointer to storage that will contain
 337  *    the values to send to each process, indexed by the process rank.
 338  *    For non-root processes, this parameter may be omitted. If it is
 339  *    still provided, however, it will be unchanged.
 340  *
 341  *    @param out_value The value received by each process. When
 342  *    scattering an array of values, @p out_values points to the @p n
 343  *    values that will be received by each process.
 344  *
 345  *    @param root The process ID number that will scatter the
 346  *    values. This value must be the same on all processes.
 347  */
 348 template<typename T>
 349 void
 350 scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
 351         int root);
 352
 353 /**
 354  * \overload
 355  */
 356 template<typename T>
 357 void
 358 scatter(const communicator& comm, const T* in_values, T& out_value, int root);
 359
 360 /**
 361  * \overload
 362  */
 363 template<typename T>
 364 void scatter(const communicator& comm, T& out_value, int root);
 365
 366 /**
 367  * \overload
 368  */
 369 template<typename T>
 370 void
 371 scatter(const communicator& comm, const std::vector<T>& in_values,
 372         T* out_values, int n, int root);
 373
 374 /**
 375  * \overload
 376  */
 377 template<typename T>
 378 void
 379 scatter(const communicator& comm, const T* in_values, T* out_values, int n,
 380         int root);
 381
 382 /**
 383  * \overload
 384  */
 385 template<typename T>
 386 void scatter(const communicator& comm, T* out_values, int n, int root);
 387
 388 /**
 389  *  @brief Combine the values stored by each process into a single
 390  *  value at the root.
 391  *
 392  *  @c reduce is a collective algorithm that combines the values
 393  *  stored by each process into a single value at the @c root. The
 394  *  values can be combined arbitrarily, specified via a function
 395  *  object. The type @c T of the values may be any type that is
 396  *  serializable or has an associated MPI data type. One can think of
 397  *  this operation as a @c gather to the @p root, followed by an @c
 398  *  std::accumulate() over the gathered values and using the operation
 399  *  @c op.
 400  *
 401  *  When the type @c T has an associated MPI data type, this routine
 402  *  invokes @c MPI_Reduce to perform the reduction. If possible,
 403  *  built-in MPI operations will be used; otherwise, @c reduce() will
 404  *  create a custom MPI_Op for the call to MPI_Reduce.
 405  *
 406  *    @param comm The communicator over which the reduction will
 407  *    occur.
 408  *
 409  *    @param in_value The local value to be combined with the local
 410  *    values of every other process. For reducing arrays, @c in_values
 411  *    contains a pointer to the local values. In this case, @c n is
 412  *    the number of values that will be reduced. Reduction occurs
 413  *    independently for each of the @p n values referenced by @p
 414  *    in_values, e.g., calling reduce on an array of @p n values is
 415  *    like calling @c reduce @p n separate times, one for each
 416  *    location in @p in_values and @p out_values.
 417  *
 418  *    @param out_value Will receive the result of the reduction
 419  *    operation, but only for the @p root process. Non-root processes
 420  *    may omit if parameter; if they choose to supply the parameter,
 421  *    it will be unchanged. For reducing arrays, @c out_values
 422  *    contains a pointer to the storage for the output values.
 423  *
 424  *    @param op The binary operation that combines two values of type
 425  *    @c T into a third value of type @c T. For types @c T that has
 426  *    ssociated MPI data types, @c op will either be translated into
 427  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
 428  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
 429  *    operations.hpp header for more details on this mapping. For any
 430  *    non-built-in operation, commutativity will be determined by the
 431  *    @c is_commmutative trait (also in @c operations.hpp): users are
 432  *    encouraged to mark commutative operations as such, because it
 433  *    gives the implementation additional lattitude to optimize the
 434  *    reduction operation.
 435  *
 436  *    @param root The process ID number that will receive the final,
 437  *    combined value. This value must be the same on all processes.
 438  */
 439 template<typename T, typename Op>
 440 void
 441 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
 442        int root);
 443
 444 /**
 445  * \overload
 446  */
 447 template<typename T, typename Op>
 448 void reduce(const communicator& comm, const T& in_value, Op op, int root);
 449
 450 /**
 451  * \overload
 452  */
 453 template<typename T, typename Op>
 454 void
 455 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
 456        Op op, int root);
 457
 458 /**
 459  * \overload
 460  */
 461 template<typename T, typename Op>
 462 void
 463 reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
 464
 465 /**
 466  *  @brief Compute a prefix reduction of values from all processes in
 467  *  the communicator.
 468  *
 469  *  @c scan is a collective algorithm that combines the values stored
 470  *  by each process with the values of all processes with a smaller
 471  *  rank. The values can be arbitrarily combined, specified via a
 472  *  function object @p op. The type @c T of the values may be any type
 473  *  that is serializable or has an associated MPI data type. One can
 474  *  think of this operation as a @c gather to some process, followed
 475  *  by an @c std::prefix_sum() over the gathered values using the
 476  *  operation @c op. The ith process returns the ith value emitted by
 477  *  @c std::prefix_sum().
 478  *
 479  *  When the type @c T has an associated MPI data type, this routine
 480  *  invokes @c MPI_Scan to perform the reduction. If possible,
 481  *  built-in MPI operations will be used; otherwise, @c scan() will
 482  *  create a custom @c MPI_Op for the call to MPI_Scan.
 483  *
 484  *    @param comm The communicator over which the prefix reduction
 485  *    will occur.
 486  *
 487  *    @param in_value The local value to be combined with the local
 488  *    values of other processes. For the array variant, the @c
 489  *    in_values parameter points to the @c n local values that will be
 490  *    combined.
 491  *
 492  *    @param out_value If provided, the ith process will receive the
 493  *    value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
 494  *    ... )). For the array variant, @c out_values contains a pointer
 495  *    to storage for the @c n output values. The prefix reduction
 496  *    occurs independently for each of the @p n values referenced by
 497  *    @p in_values, e.g., calling scan on an array of @p n values is
 498  *    like calling @c scan @p n separate times, one for each location
 499  *    in @p in_values and @p out_values.
 500  *
 501  *    @param op The binary operation that combines two values of type
 502  *    @c T into a third value of type @c T. For types @c T that has
 503  *    ssociated MPI data types, @c op will either be translated into
 504  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
 505  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
 506  *    operations.hpp header for more details on this mapping. For any
 507  *    non-built-in operation, commutativity will be determined by the
 508  *    @c is_commmutative trait (also in @c operations.hpp).
 509  *
 510  *    @returns If no @p out_value parameter is provided, returns the
 511  *    result of prefix reduction.
 512  */
 513 template<typename T, typename Op>
 514 void
 515 scan(const communicator& comm, const T& in_value, T& out_value, Op op);
 516
 517 /**
 518  * \overload
 519  */
 520 template<typename T, typename Op>
 521 T
 522 scan(const communicator& comm, const T& in_value, Op op);
 523
 524 /**
 525  * \overload
 526  */
 527 template<typename T, typename Op>
 528 void
 529 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
 530
 531 } } // end namespace boost::mpi
 532 #endif // BOOST_MPI_COLLECTIVES_HPP
 533
 534 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
 535 // Include implementations of each of the collectives
 536 #  include <boost/mpi/collectives/all_gather.hpp>
 537 #  include <boost/mpi/collectives/all_reduce.hpp>
 538 #  include <boost/mpi/collectives/all_to_all.hpp>
 539 #  include <boost/mpi/collectives/broadcast.hpp>
 540 #  include <boost/mpi/collectives/gather.hpp>
 541 #  include <boost/mpi/collectives/scatter.hpp>
 542 #  include <boost/mpi/collectives/reduce.hpp>
 543 #  include <boost/mpi/collectives/scan.hpp>
 544 #endif
 545