1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
3 // Use, modification and distribution is subject to the Boost Software
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
7 // Message Passing Interface 1.1 -- Section 4. MPI Collectives
9 /** @file collectives.hpp
11 * This header contains MPI collective operations, which implement
12 * various parallel algorithms that require the coordination of all
13 * processes within a communicator. The header @c collectives_fwd.hpp
14 * provides forward declarations for each of these operations. To
15 * include only specific collective algorithms, use the headers @c
16 * boost/mpi/collectives/algorithm_name.hpp.
18 #ifndef BOOST_MPI_COLLECTIVES_HPP
19 #define BOOST_MPI_COLLECTIVES_HPP
21 #include <boost/mpi/communicator.hpp>
24 namespace boost
{ namespace mpi
{
27 * @brief Gather the values stored at every process into vectors of
28 * values from each process.
30 * @c all_gather is a collective algorithm that collects the values
31 * stored at each process into a vector of values indexed by the
32 * process number they came from. The type @c T of the values may be
33 * any type that is serializable or has an associated MPI data type.
35 * When the type @c T has an associated MPI data type, this routine
36 * invokes @c MPI_Allgather to gather the values.
38 * @param comm The communicator over which the all-gather will
41 * @param in_value The value to be transmitted by each process. To
42 * gather an array of values, @c in_values points to the @c n local
43 * values to be transmitted.
45 * @param out_values A vector or pointer to storage that will be
46 * populated with the values from each process, indexed by the
47 * process ID number. If it is a vector, the vector will be resized
52 all_gather(const communicator
& comm
, const T
& in_value
,
53 std::vector
<T
>& out_values
);
60 all_gather(const communicator
& comm
, const T
& in_value
, T
* out_values
);
67 all_gather(const communicator
& comm
, const T
* in_values
, int n
,
68 std::vector
<T
>& out_values
);
75 all_gather(const communicator
& comm
, const T
* in_values
, int n
, T
* out_values
);
78 * @brief Combine the values stored by each process into a single
79 * value available to all processes.
81 * @c all_reduce is a collective algorithm that combines the values
82 * stored by each process into a single value available to all
83 * processes. The values are combined in a user-defined way,
84 * specified via a function object. The type @c T of the values may
85 * be any type that is serializable or has an associated MPI data
86 * type. One can think of this operation as a @c all_gather, followed
87 * by an @c std::accumulate() over the gather values and using the
90 * When the type @c T has an associated MPI data type, this routine
91 * invokes @c MPI_Allreduce to perform the reduction. If possible,
92 * built-in MPI operations will be used; otherwise, @c all_reduce()
93 * will create a custom MPI_Op for the call to MPI_Allreduce.
95 * @param comm The communicator over which the reduction will
98 * @param in_value The local value to be combined with the local
99 * values of every other process. For reducing arrays, @c in_values
100 * is a pointer to the local values to be reduced and @c n is the
101 * number of values to reduce. See @c reduce for more information.
103 * @param out_value Will receive the result of the reduction
104 * operation. If this parameter is omitted, the outgoing value will
105 * instead be returned.
107 * @param op The binary operation that combines two values of type
108 * @c T and returns a third value of type @c T. For types @c T that has
109 * ssociated MPI data types, @c op will either be translated into
110 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
111 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
112 * operations.hpp header for more details on this mapping. For any
113 * non-built-in operation, commutativity will be determined by the
114 * @c is_commmutative trait (also in @c operations.hpp): users are
115 * encouraged to mark commutative operations as such, because it
116 * gives the implementation additional lattitude to optimize the
117 * reduction operation.
119 * @returns If no @p out_value parameter is supplied, returns the
120 * result of the reduction operation.
122 template<typename T
, typename Op
>
124 all_reduce(const communicator
& comm
, const T
& in_value
, T
& out_value
, Op op
);
129 template<typename T
, typename Op
>
130 T
all_reduce(const communicator
& comm
, const T
& in_value
, Op op
);
135 template<typename T
, typename Op
>
137 all_reduce(const communicator
& comm
, const T
* in_values
, int n
, T
* out_values
,
141 * @brief Send data from every process to every other process.
143 * @c all_to_all is a collective algorithm that transmits @c p values
144 * from every process to every other process. On process i, jth value
145 * of the @p in_values vector is sent to process j and placed in the
146 * ith position of the @p out_values vector in process @p j. The type
147 * @c T of the values may be any type that is serializable or has an
148 * associated MPI data type. If @c n is provided, then arrays of @p n
149 * values will be transferred from one process to another.
151 * When the type @c T has an associated MPI data type, this routine
152 * invokes @c MPI_Alltoall to scatter the values.
154 * @param comm The communicator over which the all-to-all
155 * communication will occur.
157 * @param in_values A vector or pointer to storage that contains
158 * the values to send to each process, indexed by the process ID
161 * @param out_values A vector or pointer to storage that will be
162 * updated to contain the values received from other processes. The
163 * jth value in @p out_values will come from the procss with rank j.
167 all_to_all(const communicator
& comm
, const std::vector
<T
>& in_values
,
168 std::vector
<T
>& out_values
);
174 void all_to_all(const communicator
& comm
, const T
* in_values
, T
* out_values
);
181 all_to_all(const communicator
& comm
, const std::vector
<T
>& in_values
, int n
,
182 std::vector
<T
>& out_values
);
189 all_to_all(const communicator
& comm
, const T
* in_values
, int n
, T
* out_values
);
192 * @brief Broadcast a value from a root process to all other
195 * @c broadcast is a collective algorithm that transfers a value from
196 * an arbitrary @p root process to every other process that is part of
197 * the given communicator. The @c broadcast algorithm can transmit any
198 * Serializable value, values that have associated MPI data types,
199 * packed archives, skeletons, and the content of skeletons; see the
200 * @c send primitive for communicators for a complete list. The type
201 * @c T shall be the same for all processes that are a part of the
202 * communicator @p comm, unless packed archives are being transferred:
203 * with packed archives, the root sends a @c packed_oarchive or @c
204 * packed_skeleton_oarchive whereas the other processes receive a
205 * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
207 * When the type @c T has an associated MPI data type, this routine
208 * invokes @c MPI_Bcast to perform the broadcast.
210 * @param comm The communicator over which the broadcast will
213 * @param value The value (or values, if @p n is provided) to be
214 * transmitted (if the rank of @p comm is equal to @p root) or
215 * received (if the rank of @p comm is not equal to @p root). When
216 * the @p value is a @c skeleton_proxy, only the skeleton of the
217 * object will be broadcast. In this case, the @p root will build a
218 * skeleton from the object help in the proxy and all of the
219 * non-roots will reshape the objects held in their proxies based on
220 * the skeleton sent from the root.
222 * @param n When supplied, the number of values that the pointer @p
223 * values points to, for broadcasting an array of values. The value
224 * of @p n must be the same for all processes in @p comm.
226 * @param root The rank/process ID of the process that will be
227 * transmitting the value.
230 void broadcast(const communicator
& comm
, T
& value
, int root
);
236 void broadcast(const communicator
& comm
, T
* values
, int n
, int root
);
242 void broadcast(const communicator
& comm
, skeleton_proxy
<T
>& value
, int root
);
249 broadcast(const communicator
& comm
, const skeleton_proxy
<T
>& value
, int root
);
252 * @brief Gather the values stored at every process into a vector at
255 * @c gather is a collective algorithm that collects the values
256 * stored at each process into a vector of values at the @p root
257 * process. This vector is indexed by the process number that the
258 * value came from. The type @c T of the values may be any type that
259 * is serializable or has an associated MPI data type.
261 * When the type @c T has an associated MPI data type, this routine
262 * invokes @c MPI_Gather to gather the values.
264 * @param comm The communicator over which the gather will occur.
266 * @param in_value The value to be transmitted by each process. For
267 * gathering arrays of values, @c in_values points to storage for
268 * @c n*comm.size() values.
270 * @param out_values A vector or pointer to storage that will be
271 * populated with the values from each process, indexed by the
272 * process ID number. If it is a vector, it will be resized
273 * accordingly. For non-root processes, this parameter may be
274 * omitted. If it is still provided, however, it will be unchanged.
276 * @param root The process ID number that will collect the
277 * values. This value must be the same on all processes.
281 gather(const communicator
& comm
, const T
& in_value
, std::vector
<T
>& out_values
,
289 gather(const communicator
& comm
, const T
& in_value
, T
* out_values
, int root
);
295 void gather(const communicator
& comm
, const T
& in_value
, int root
);
302 gather(const communicator
& comm
, const T
* in_values
, int n
,
303 std::vector
<T
>& out_values
, int root
);
310 gather(const communicator
& comm
, const T
* in_values
, int n
, T
* out_values
,
317 void gather(const communicator
& comm
, const T
* in_values
, int n
, int root
);
320 * @brief Scatter the values stored at the root to all processes
321 * within the communicator.
323 * @c scatter is a collective algorithm that scatters the values
324 * stored in the @p root process (inside a vector) to all of the
325 * processes in the communicator. The vector @p out_values (only
326 * significant at the @p root) is indexed by the process number to
327 * which the corresponding value will be sent. The type @c T of the
328 * values may be any type that is serializable or has an associated
331 * When the type @c T has an associated MPI data type, this routine
332 * invokes @c MPI_Scatter to scatter the values.
334 * @param comm The communicator over which the gather will occur.
336 * @param in_values A vector or pointer to storage that will contain
337 * the values to send to each process, indexed by the process rank.
338 * For non-root processes, this parameter may be omitted. If it is
339 * still provided, however, it will be unchanged.
341 * @param out_value The value received by each process. When
342 * scattering an array of values, @p out_values points to the @p n
343 * values that will be received by each process.
345 * @param root The process ID number that will scatter the
346 * values. This value must be the same on all processes.
350 scatter(const communicator
& comm
, const std::vector
<T
>& in_values
, T
& out_value
,
358 scatter(const communicator
& comm
, const T
* in_values
, T
& out_value
, int root
);
364 void scatter(const communicator
& comm
, T
& out_value
, int root
);
371 scatter(const communicator
& comm
, const std::vector
<T
>& in_values
,
372 T
* out_values
, int n
, int root
);
379 scatter(const communicator
& comm
, const T
* in_values
, T
* out_values
, int n
,
386 void scatter(const communicator
& comm
, T
* out_values
, int n
, int root
);
389 * @brief Combine the values stored by each process into a single
392 * @c reduce is a collective algorithm that combines the values
393 * stored by each process into a single value at the @c root. The
394 * values can be combined arbitrarily, specified via a function
395 * object. The type @c T of the values may be any type that is
396 * serializable or has an associated MPI data type. One can think of
397 * this operation as a @c gather to the @p root, followed by an @c
398 * std::accumulate() over the gathered values and using the operation
401 * When the type @c T has an associated MPI data type, this routine
402 * invokes @c MPI_Reduce to perform the reduction. If possible,
403 * built-in MPI operations will be used; otherwise, @c reduce() will
404 * create a custom MPI_Op for the call to MPI_Reduce.
406 * @param comm The communicator over which the reduction will
409 * @param in_value The local value to be combined with the local
410 * values of every other process. For reducing arrays, @c in_values
411 * contains a pointer to the local values. In this case, @c n is
412 * the number of values that will be reduced. Reduction occurs
413 * independently for each of the @p n values referenced by @p
414 * in_values, e.g., calling reduce on an array of @p n values is
415 * like calling @c reduce @p n separate times, one for each
416 * location in @p in_values and @p out_values.
418 * @param out_value Will receive the result of the reduction
419 * operation, but only for the @p root process. Non-root processes
420 * may omit if parameter; if they choose to supply the parameter,
421 * it will be unchanged. For reducing arrays, @c out_values
422 * contains a pointer to the storage for the output values.
424 * @param op The binary operation that combines two values of type
425 * @c T into a third value of type @c T. For types @c T that has
426 * ssociated MPI data types, @c op will either be translated into
427 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
428 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
429 * operations.hpp header for more details on this mapping. For any
430 * non-built-in operation, commutativity will be determined by the
431 * @c is_commmutative trait (also in @c operations.hpp): users are
432 * encouraged to mark commutative operations as such, because it
433 * gives the implementation additional lattitude to optimize the
434 * reduction operation.
436 * @param root The process ID number that will receive the final,
437 * combined value. This value must be the same on all processes.
439 template<typename T
, typename Op
>
441 reduce(const communicator
& comm
, const T
& in_value
, T
& out_value
, Op op
,
447 template<typename T
, typename Op
>
448 void reduce(const communicator
& comm
, const T
& in_value
, Op op
, int root
);
453 template<typename T
, typename Op
>
455 reduce(const communicator
& comm
, const T
* in_values
, int n
, T
* out_values
,
461 template<typename T
, typename Op
>
463 reduce(const communicator
& comm
, const T
* in_values
, int n
, Op op
, int root
);
466 * @brief Compute a prefix reduction of values from all processes in
469 * @c scan is a collective algorithm that combines the values stored
470 * by each process with the values of all processes with a smaller
471 * rank. The values can be arbitrarily combined, specified via a
472 * function object @p op. The type @c T of the values may be any type
473 * that is serializable or has an associated MPI data type. One can
474 * think of this operation as a @c gather to some process, followed
475 * by an @c std::prefix_sum() over the gathered values using the
476 * operation @c op. The ith process returns the ith value emitted by
477 * @c std::prefix_sum().
479 * When the type @c T has an associated MPI data type, this routine
480 * invokes @c MPI_Scan to perform the reduction. If possible,
481 * built-in MPI operations will be used; otherwise, @c scan() will
482 * create a custom @c MPI_Op for the call to MPI_Scan.
484 * @param comm The communicator over which the prefix reduction
487 * @param in_value The local value to be combined with the local
488 * values of other processes. For the array variant, the @c
489 * in_values parameter points to the @c n local values that will be
492 * @param out_value If provided, the ith process will receive the
493 * value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
494 * ... )). For the array variant, @c out_values contains a pointer
495 * to storage for the @c n output values. The prefix reduction
496 * occurs independently for each of the @p n values referenced by
497 * @p in_values, e.g., calling scan on an array of @p n values is
498 * like calling @c scan @p n separate times, one for each location
499 * in @p in_values and @p out_values.
501 * @param op The binary operation that combines two values of type
502 * @c T into a third value of type @c T. For types @c T that has
503 * ssociated MPI data types, @c op will either be translated into
504 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
505 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
506 * operations.hpp header for more details on this mapping. For any
507 * non-built-in operation, commutativity will be determined by the
508 * @c is_commmutative trait (also in @c operations.hpp).
510 * @returns If no @p out_value parameter is provided, returns the
511 * result of prefix reduction.
513 template<typename T
, typename Op
>
515 scan(const communicator
& comm
, const T
& in_value
, T
& out_value
, Op op
);
520 template<typename T
, typename Op
>
522 scan(const communicator
& comm
, const T
& in_value
, Op op
);
527 template<typename T
, typename Op
>
529 scan(const communicator
& comm
, const T
* in_values
, int n
, T
* out_values
, Op op
);
531 } } // end namespace boost::mpi
532 #endif // BOOST_MPI_COLLECTIVES_HPP
534 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
535 // Include implementations of each of the collectives
536 # include <boost/mpi/collectives/all_gather.hpp>
537 # include <boost/mpi/collectives/all_reduce.hpp>
538 # include <boost/mpi/collectives/all_to_all.hpp>
539 # include <boost/mpi/collectives/broadcast.hpp>
540 # include <boost/mpi/collectives/gather.hpp>
541 # include <boost/mpi/collectives/scatter.hpp>
542 # include <boost/mpi/collectives/reduce.hpp>
543 # include <boost/mpi/collectives/scan.hpp>