1 subroutine da_y_facade_to_global( re_slice, template, re_glob_slice )
3 !---------------------------------------------------------------------------
4 ! Purpose: Collect a local y_facade_type object into a global y_facade_type
5 ! object while preserving serial-code storage order. This is
6 ! used to perform bitwise-exact floating-point summations in
7 ! serial-code-order during bitwise-exact testing of
8 ! distributed-memory parallel configurations.
10 ! Method: Indices stowed away during Read_Obs() are used to restore serial
11 ! storage order. Memory for global objects is allocated here.
12 ! Global objects are minimally allocated to save memory.
13 ! Memory must be deallocated by the caller via a call to
15 ! Memory for local object re_slice is deallocated here. Do not
16 ! use re_slice after this call.
17 ! The "template" argument is needed because some tasks may not
20 ! PERFORMANCE NOTE: This implementation is NOT very efficient. Speed it
21 ! up if testing is too slow.
22 !---------------------------------------------------------------------------
26 ! task-local objects (really intent(in ))
27 type (y_facade_type), intent(inout) :: re_slice ! residual vector
28 type (residual_template_type), intent(in) :: template ! allocation info
29 ! task-global objects (really intent( out))
30 type (y_facade_type), intent(inout) :: re_glob_slice ! residual vector
34 integer :: n, k, serial_n, serial_numvals
36 integer :: num_obs_send
38 integer :: num_obs_all
39 integer :: num_recv_all
40 integer :: obs_global_index(re_slice%num_obs)
41 integer :: num_values(re_slice%num_obs)
42 integer :: num_send_buf_lcl
43 integer,allocatable :: num_obs_send_all(:)
44 integer,allocatable :: obs_global_index_all(:)
45 integer,allocatable :: obs_global_index_inv(:)
46 integer,allocatable :: obs_start_all(:) ! start index of each obs
47 integer,pointer :: num_values_all(:)
48 integer,allocatable :: num_send_buf_all(:)
49 integer,allocatable :: recv_counts(:)
50 integer,allocatable :: recv_displs(:)
51 real,allocatable :: re_vals_lcl(:)
52 real,allocatable :: re_vals_all(:)
55 if (trace_use) call da_trace_entry("da_y_facade_to_global")
57 ! todo: later, upgrade from "allgather" to "gather-broadcast"
59 ! collect information about all observations
62 do n=1, re_slice%num_obs
63 if (re_slice%obs(n)%proc_domain) then
64 num_obs_send = num_obs_send + 1
65 obs_global_index(num_obs_send) = re_slice%obs(n)%obs_global_index
69 if (obs_global_index(n) < 0) then
70 call da_error(__FILE__,__LINE__, &
71 (/'ASSERTION ERROR: bad local value of obs_global_index'/))
75 ! exchange num_obs_send and obs_global_index
76 allocate (num_obs_send_all(0:num_procs-1))
77 allocate (num_send_buf_all(0:num_procs-1))
78 allocate (recv_counts(0:num_procs-1))
79 allocate (recv_displs(0:num_procs-1))
82 call mpi_allgather( num_obs_send, 1, mpi_integer, num_obs_send_all, 1, &
83 mpi_integer, comm, ierr )
84 num_obs_all = sum( num_obs_send_all )
85 if ( num_obs_all /= re_slice%num_obs_glo ) then
86 call da_error (__FILE__,__LINE__, &
87 (/'ASSERTION ERROR: inconsistent count of sound obs'/))
89 ! set up to gather obs_global_index
90 recv_counts = num_obs_send_all
92 do proc=1, num_procs-1
93 recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
95 allocate (num_values_all(num_obs_all))
96 allocate (obs_global_index_all(num_obs_all))
97 allocate (obs_global_index_inv(num_obs_all))
98 allocate (obs_start_all(num_obs_all))
100 ! gather obs_global_index
101 call mpi_allgatherv( obs_global_index, num_obs_send, mpi_integer, &
102 obs_global_index_all, recv_counts, recv_displs, &
103 mpi_integer, comm, ierr )
105 ! invert obs_global_index_all
106 ! initialize to "invalid" value
107 obs_global_index_inv = -1
109 if ( ( obs_global_index_all(n) < 1 ) .OR. &
110 ( obs_global_index_all(n) > size(obs_global_index_inv) ) ) then
111 call da_error (__FILE__,__LINE__, &
112 (/'ASSERTION ERROR: obs_global_index_all(n) out of range'/))
114 if ( obs_global_index_inv(obs_global_index_all(n)) /= -1 ) then
115 call da_error (__FILE__,__LINE__, &
116 (/'ASSERTION ERROR: obs owned by more than one task'/))
118 obs_global_index_inv(obs_global_index_all(n)) = n
121 if ( obs_global_index_inv(obs_global_index_all(n)) == -1 ) then
122 call da_error (__FILE__,__LINE__, &
123 (/'ASSERTION ERROR: obs not owned by any task'/))
127 ! Create re_glob_slice and populate with residual_generic_type objects
128 ! allocated to match template.
129 call da_y_facade_create( re_glob_slice, num_obs_all, num_obs_all, template=template )
131 ! NOTE: This i loop should be inside the n loops.
132 ! Ideally, residual_generic class should know how to pack/unpack
133 ! (serialize/deserialize) itself for arbitrary I/O or communications (MPI or
134 ! otherwise) that clients may choose to implement. Below are a possible set
135 ! of new methods for residual_generic_type:
136 ! residual_generic_pack_counts( res_generic, (out)rcount, (out)icount )
137 ! [client allocates ibuf(icount) and rbuf(rcount)]
138 ! residual_generic_pack( res_generic, (inout)rstart, (inout)rbuf, &
139 ! (inout)istart, (inout)ibuf )
140 ! [client MPI communications: ibuf->ibufg rbuf->rbufg]
141 ! residual_generic_unpack_counts( ibufg, (out)rstarts, (out)rcounts )
142 ! residual_generic_unpack( (out)res_generic, rstarts(n), rcounts(n), rbufg )
144 ! 1) Design serialization for residual_generic_type.
145 ! 2) Implement new methods.
146 ! 3) Refactor below...
147 ! 4) Optimize performance.
148 ! At the moment this refactoring is relatively low-priority since
149 ! da_y_facade_to_global() is already well-encapsulated and peformance is not
150 ! (yet) a concern for testing.
151 ! Loop over vector and scalar elements
153 do i=template%lbnd,template%ubnd
157 ! collect information to allocate buffers
158 do n=1, re_slice%num_obs
159 if ( re_slice%obs(n)%proc_domain ) then
160 num_obs_send = num_obs_send + 1
161 ! track number of scalars/levels per obs element
162 num_values(num_obs_send) = size(re_slice%obs(n)%values(i)%ptr)
163 ! track total buffer size
164 num_send_buf_lcl = num_send_buf_lcl + num_values(num_obs_send)
167 ! gather num_send_buf_lcl
168 call mpi_allgather( num_send_buf_lcl, 1, mpi_integer, &
169 num_send_buf_all, 1, &
170 mpi_integer, comm, ierr )
172 recv_counts = num_obs_send_all
174 do proc=1, num_procs-1
175 recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
178 call mpi_allgatherv( num_values, num_obs_send, mpi_integer, &
179 num_values_all, recv_counts, recv_displs, &
180 mpi_integer, comm, ierr )
181 ! set up to gather local arrays
182 ! compute start index of each obs in gathered buffers
185 obs_start_all(n) = obs_start_all(n-1) + num_values_all(n-1)
187 ! finish setting up to gather local arrays
188 recv_counts = num_send_buf_all
190 do proc=1, num_procs-1
191 recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
193 num_recv_all = sum( recv_counts )
194 ! allocate and initialize send buffer
195 allocate( re_vals_lcl(num_send_buf_lcl) )
197 do n=1, re_slice%num_obs
198 if ( re_slice%obs(n)%proc_domain ) then
199 do k=1, size(re_slice%obs(n)%values(i)%ptr)
200 buf_idx = buf_idx + 1
201 re_vals_lcl(buf_idx) = re_slice%obs(n)%values(i)%ptr(k)
205 if ( buf_idx /= num_send_buf_lcl ) then
206 call da_error (__FILE__,__LINE__, &
207 (/'ASSERTION ERROR: buf_idx /= num_send_buf_lcl'/))
209 ! allocate receive buffer
210 allocate( re_vals_all(num_recv_all) )
211 ! finally, actually gather the values
212 call mpi_allgatherv( re_vals_lcl, num_send_buf_lcl, true_mpi_real, &
213 re_vals_all, recv_counts, recv_displs, &
214 true_mpi_real, comm, ierr )
215 ! initialize re_glob_slice
216 ! NOTE: The i loop should be inside this n loop, see comment above...
218 do n=1, re_glob_slice%num_obs
219 serial_n = obs_global_index_inv(n)
220 serial_numvals = num_values_all(serial_n)
221 buf_idx = obs_start_all(serial_n)
222 ! note that we only collected obs someone owns
223 re_glob_slice%obs(n)%proc_domain = .true.
224 re_glob_slice%obs(n)%obs_global_index = obs_global_index_all(serial_n)
225 call da_res_generic_alloc_and_set (re_glob_slice%obs(n), i, &
226 re_vals_all(buf_idx:(buf_idx+serial_numvals-1)))
228 ! deallocate communication buffers, etc.
229 deallocate (re_vals_all)
230 deallocate (re_vals_lcl)
231 end do ! end of i loop
233 call da_y_facade_free (re_slice)
235 deallocate (num_values_all, obs_global_index_all, obs_global_index_inv, obs_start_all)
236 deallocate (num_obs_send_all, num_send_buf_all, recv_counts, recv_displs)
238 if (trace_use) call da_trace_exit("da_y_facade_to_global")
241 call da_error (__FILE__,__LINE__, &
242 (/'may only be called for a distributed memory parallel run'/))
245 end subroutine da_y_facade_to_global