4 ! gcc -c -DF2CSTYLE task_for_point.c ; g95 -ffree-form -ffree-line-length-huge tfp_tester.F task_for_point.o
6 ! icc -c task_for_point.c ; ifort -FR tfp_tester.F task_for_point.o
8 ! cc -c -DNOUNDERSCORE task_for_point.c ; xlf -qfree=f90 tfp_tester.F task_for_point.o
10 MODULE module_driver_constants
12 ! 0. The following tells the rest of the model what data ordering we are
15 INTEGER , PARAMETER :: DATA_ORDER_XYZ = 1
16 INTEGER , PARAMETER :: DATA_ORDER_YXZ = 2
17 INTEGER , PARAMETER :: DATA_ORDER_ZXY = 3
18 INTEGER , PARAMETER :: DATA_ORDER_ZYX = 4
19 INTEGER , PARAMETER :: DATA_ORDER_XZY = 5
20 INTEGER , PARAMETER :: DATA_ORDER_YZX = 6
21 INTEGER , PARAMETER :: DATA_ORDER_XY = DATA_ORDER_XYZ
22 INTEGER , PARAMETER :: DATA_ORDER_YX = DATA_ORDER_YXZ
24 !#include "model_data_order.inc"
26 ! 1. Following are constants for use in defining maximal values for array
30 ! The maximum number of levels in the model is how deeply the domains may
33 INTEGER , PARAMETER :: max_levels = 20
35 ! The maximum number of nests that can depend on a single parent and other way round
37 INTEGER , PARAMETER :: max_nests = 20
39 ! The maximum number of parents that a nest can have (simplified assumption -> one only)
41 INTEGER , PARAMETER :: max_parents = 1
43 ! The maximum number of domains is how many grids the model will be running.
45 #define MAX_DOMAINS_F 10
46 INTEGER , PARAMETER :: max_domains = ( MAX_DOMAINS_F - 1 ) / 2 + 1
48 ! The maximum number of nest move specifications allowed in a namelist
50 INTEGER , PARAMETER :: max_moves = 50
52 ! The maximum number of eta levels
54 INTEGER , PARAMETER :: max_eta = 501
56 ! The maximum number of outer iterations (for DA minimisation)
58 INTEGER , PARAMETER :: max_outer_iterations = 10
60 ! The maximum number of instruments (for radiance DA)
62 INTEGER , PARAMETER :: max_instruments = 30
64 ! 2. Following related to driver leve data structures for DM_PARALLEL communications
67 INTEGER , PARAMETER :: max_comms = 1024
69 INTEGER , PARAMETER :: max_comms = 1
72 ! 3. Following is information related to the file I/O.
74 ! These are the bounds of the available FORTRAN logical unit numbers for the file I/O.
75 ! Only logical unti numbers within these bounds will be chosen for I/O unit numbers.
77 INTEGER , PARAMETER :: min_file_unit = 10
78 INTEGER , PARAMETER :: max_file_unit = 99
80 ! 4. Unfortunately, the following definition is needed here (rather
81 ! than the more logical place in share/module_model_constants.F)
82 ! for the namelist reads in frame/module_configure.F, and for some
83 ! conversions in share/set_timekeeping.F
84 ! Actually, using it here will mean that we don't need to set it
85 ! in share/module_model_constants.F, since this file will be
87 ! frame/module_configure.F
88 ! which will be USEd in:
90 ! which will be USEd in:
91 ! phys/module_radiation_driver.F
92 ! which is the other important place for it to be, and where
93 ! it is passed as a subroutine parameter to any physics subroutine.
95 ! P2SI is the number of SI seconds in an planetary solar day
96 ! divided by the number of SI seconds in an earth solar day
98 ! For Mars, P2SI = 88775.2/86400.
99 REAL , PARAMETER :: P2SI = 1.0274907
101 ! For Titan, P2SI = 1378080.0/86400.
102 REAL , PARAMETER :: P2SI = 15.95
105 REAL , PARAMETER :: P2SI = 1.0
108 SUBROUTINE init_module_driver_constants
109 END SUBROUTINE init_module_driver_constants
110 END MODULE module_driver_constants
112 MODULE module_machine
114 USE module_driver_constants
116 ! Machine characteristics and utilities here.
118 ! Tile strategy defined constants
119 INTEGER, PARAMETER :: TILE_X = 1, TILE_Y = 2, TILE_XY = 3
122 INTEGER :: tile_strategy
123 END TYPE machine_type
125 TYPE (machine_type) machine_info
129 RECURSIVE SUBROUTINE rlocproc(p,maxi,nproc,ml,mr,ret)
131 INTEGER, INTENT(IN) :: p, maxi, nproc, ml, mr
132 INTEGER, INTENT(OUT) :: ret
133 INTEGER :: width, rem, ret2, bl, br, mid, adjust, &
134 p_r, maxi_r, nproc_r, zero
136 rem = mod( maxi, nproc )
139 IF ( rem>0 .AND. (((mod(rem,2).EQ.0).OR.(rem.GT.2)).OR.(p.LE.mid))) THEN
142 IF ( p.LE.mid .AND. mod(rem,2).NE.0 ) THEN
149 ELSE IF (p>maxi-br-1) THEN
153 maxi_r = maxi-bl-br+adjust
154 nproc_r = max(nproc-2,1)
156 CALL rlocproc( p_r, maxi_r, nproc_r, zero, zero, ret2 ) ! Recursive
160 END SUBROUTINE rlocproc
162 INTEGER FUNCTION locproc( i, m, numpart )
164 integer, intent(in) :: i, m, numpart
165 integer :: retval, ii, im, inumpart, zero
170 CALL rlocproc( ii, im, inumpart, zero, zero, retval )
175 SUBROUTINE patchmap( res, y, x, py, px )
177 INTEGER, INTENT(IN) :: y, x, py, px
178 INTEGER, DIMENSION(x,y), INTENT(OUT) :: res
179 INTEGER :: i, j, p_min, p_maj
181 p_maj = locproc( j, y, py )
183 p_min = locproc( i, x, px )
184 res(i+1,j+1) = p_min + px*p_maj
188 END SUBROUTINE patchmap
190 SUBROUTINE region_bounds( region_start, region_end, &
192 patch_start, patch_end )
193 ! 1-D decomposition routine: Given starting and ending indices of a
194 ! vector, the number of patches dividing the vector, and the number of
195 ! the patch, give the start and ending indices of the patch within the
196 ! vector. This will work with tiles too. Implementation note. This is
197 ! implemented somewhat inefficiently, now, with a loop, so we can use the
198 ! locproc function above, which returns processor number for a given
199 ! index, whereas what we want is index for a given processor number.
200 ! With a little thought and a lot of debugging, we can come up with a
201 ! direct expression for what we want. For time being, we loop...
202 ! Remember that processor numbering starts with zero.
205 INTEGER, INTENT(IN) :: region_start, region_end, num_p, p
206 INTEGER, INTENT(OUT) :: patch_start, patch_end
208 patch_end = -999999999
209 patch_start = 999999999
210 offset = region_start
211 do i = 0, region_end - offset
212 if ( locproc( i, region_end-region_start+1, num_p ) == p ) then
213 patch_end = max(patch_end,i)
214 patch_start = min(patch_start,i)
217 patch_start = patch_start + offset
218 patch_end = patch_end + offset
220 END SUBROUTINE region_bounds
222 SUBROUTINE least_aspect( nparts, minparts_y, minparts_x, nparts_y, nparts_x )
225 INTEGER, INTENT(IN) :: nparts, &
226 minparts_y, minparts_x
228 INTEGER, INTENT(OUT) :: nparts_y, nparts_x
230 INTEGER :: x, y, mini
235 IF ( mod( nparts, y ) .eq. 0 ) THEN
237 IF ( abs( y-x ) .LT. mini &
238 .AND. y .GE. minparts_y &
239 .AND. x .GE. minparts_x ) THEN
246 END SUBROUTINE least_aspect
248 SUBROUTINE init_module_machine
249 machine_info%tile_strategy = TILE_Y
250 END SUBROUTINE init_module_machine
252 END MODULE module_machine
254 SUBROUTINE compute_memory_dims_rsl_lite ( &
255 id , maxhalowidth , &
257 ntasks_x, ntasks_y, &
258 mytask_x, mytask_y, &
259 ids, ide, jds, jde, kds, kde, &
260 ims, ime, jms, jme, kms, kme, &
261 imsx, imex, jmsx, jmex, kmsx, kmex, &
262 imsy, imey, jmsy, jmey, kmsy, kmey, &
263 ips, ipe, jps, jpe, kps, kpe, &
264 ipsx, ipex, jpsx, jpex, kpsx, kpex, &
265 ipsy, ipey, jpsy, jpey, kpsy, kpey )
269 INTEGER, INTENT(IN) :: id , maxhalowidth
270 INTEGER, INTENT(IN) :: shw, bdx, bdy
271 INTEGER, INTENT(IN) :: ntasks_x, ntasks_y
272 INTEGER, INTENT(IN) :: mytask_x, mytask_y
273 INTEGER, INTENT(IN) :: ids, ide, jds, jde, kds, kde
274 INTEGER, INTENT(OUT) :: ims, ime, jms, jme, kms, kme
275 INTEGER, INTENT(OUT) :: imsx, imex, jmsx, jmex, kmsx, kmex
276 INTEGER, INTENT(OUT) :: imsy, imey, jmsy, jmey, kmsy, kmey
277 INTEGER, INTENT(OUT) :: ips, ipe, jps, jpe, kps, kpe
278 INTEGER, INTENT(OUT) :: ipsx, ipex, jpsx, jpex, kpsx, kpex
279 INTEGER, INTENT(OUT) :: ipsy, ipey, jpsy, jpey, kpsy, kpey
281 INTEGER Px, Py, P, i, j, k, ierr
289 CALL task_for_point ( i, j, ids, ide, jds, jde, ntasks_x, ntasks_y, Px, Py, &
290 maxhalowidth, maxhalowidth, ierr )
291 IF ( Px .EQ. mytask_x ) THEN
293 IF ( ips .EQ. -1 ) THEN
298 IF ( ierr .NE. 0 ) THEN
299 CALL tfp_message(__FILE__,__LINE__)
301 ! handle setting the memory dimensions where there are no X elements assigned to this proc
302 IF (ips .EQ. -1 ) THEN
310 CALL task_for_point ( i, j, ids, ide, jds, jde, ntasks_x, ntasks_y, Px, Py, &
311 maxhalowidth, maxhalowidth, ierr )
312 IF ( Py .EQ. mytask_y ) THEN
314 IF ( jps .EQ. -1 ) jps = j
317 IF ( ierr .NE. 0 ) THEN
318 CALL tfp_message(__FILE__,__LINE__)
320 ! handle setting the memory dimensions where there are no Y elements assigned to this proc
321 IF (jps .EQ. -1 ) THEN
326 !begin: wig; 12-Mar-2008
327 ! This appears redundant with the conditionals above, but we get cases with only
328 ! one of the directions being set to "missing" when turning off extra processors.
329 ! This may break the handling of setting only one of nproc_x or nproc_y via the namelist.
330 IF (ipe .EQ. -1 .or. jpe .EQ. -1) THEN
336 !end: wig; 12-Mar-2008
339 ! description of transpose decomposition strategy for RSL LITE. 20061231jm
341 ! Here is the tranpose scheme that is implemented for RSL_LITE. Upper-case
342 ! XY corresponds to the dimension of the processor mesh, lower-case xyz
343 ! corresponds to grid dimension.
347 ! XxYy <--> XzYy <--> XzYx <- note x decomposed over Y procs
350 ! +------------------+ <- this edge is costly; see below
352 ! The aim is to avoid all-to-all communication over whole
353 ! communicator. Instead, when possible, use a transpose scheme that requires
354 ! all-to-all within dimensional communicators; that is, communicators
355 ! defined for the processes in a rank or column of the processor mesh. Note,
356 ! however, it is not possible to create a ring of transposes between
357 ! xy-yz-xz decompositions without at least one of the edges in the ring
358 ! being fully all-to-all (in other words, one of the tranpose edges must
359 ! rotate and not just transpose a plane of the model grid within the
360 ! processor mesh). The issue is then, where should we put this costly edge
361 ! in the tranpose scheme we chose? To avoid being completely arbitrary,
362 ! we chose a scheme most natural for models that use parallel spectral
363 ! transforms, where the costly edge is the one that goes from the xz to
364 ! the xy decomposition. (May be implemented as just a two step transpose
367 ! Additional notational convention, below. The 'x' or 'y' appended to the
368 ! dimension start or end variable refers to which grid dimension is all
369 ! on-processor in the given decomposition. That is ipsx and ipex are the
370 ! start and end for the i-dimension in the zy decomposition where x is
371 ! on-processor. ('z' is assumed for xy decomposition and not appended to
372 ! the ips, ipe, etc. variable names).
381 CALL task_for_point ( k, j, kds, kde, jds, jde, ntasks_x, ntasks_y, Px, Py, &
382 1, maxhalowidth, ierr )
383 IF ( Px .EQ. mytask_x ) THEN
385 IF ( kpsx .EQ. -1 ) kpsx = k
388 IF ( ierr .NE. 0 ) THEN
389 CALL tfp_message(__FILE__,__LINE__)
392 ! handle case where no levels are assigned to this process
393 ! no iterations. Do same for I and J. Need to handle memory alloc below.
394 IF (kpsx .EQ. -1 ) THEN
403 CALL task_for_point ( k, j, kds, kde, jds, jde, ntasks_x, ntasks_y, Px, Py, &
404 1, maxhalowidth, ierr )
405 IF ( Py .EQ. mytask_y ) THEN
407 IF ( jpsx .EQ. -1 ) jpsx = j
410 IF ( ierr .NE. 0 ) THEN
411 CALL tfp_message(__FILE__,__LINE__)
413 IF (jpsx .EQ. -1 ) THEN
418 !begin: wig; 12-Mar-2008
419 ! This appears redundant with the conditionals above, but we get cases with only
420 ! one of the directions being set to "missing" when turning off extra processors.
421 ! This may break the handling of setting only one of nproc_x or nproc_y via the namelist.
422 IF (ipex .EQ. -1 .or. jpex .EQ. -1) THEN
428 !end: wig; 12-Mar-2008
430 ! XzYx decomposition (note, x grid dim is decomposed over Y processor dim)
432 kpsy = kpsx ! same as above
433 kpey = kpex ! same as above
439 CALL task_for_point ( i, k, ids, ide, kds, kde, ntasks_y, ntasks_x, Py, Px, &
440 maxhalowidth, 1, ierr ) ! x and y for proc mesh reversed
441 IF ( Py .EQ. mytask_y ) THEN
443 IF ( ipsy .EQ. -1 ) ipsy = i
446 IF ( ierr .NE. 0 ) THEN
447 CALL tfp_message(__FILE__,__LINE__)
449 IF (ipsy .EQ. -1 ) THEN
454 ! extend the patch dimensions out shw along edges of domain
455 IF ( ips < ipe .and. jps < jpe ) THEN !wig; 11-Mar-2008
456 IF ( mytask_x .EQ. 0 ) THEN
460 IF ( mytask_x .EQ. ntasks_x-1 ) THEN
464 IF ( mytask_y .EQ. 0 ) THEN
468 IF ( mytask_y .EQ. ntasks_y-1 ) THEN
472 ENDIF !wig; 11-Mar-2008
484 ! handle setting the memory dimensions where there are no levels assigned to this proc
485 IF ( kpsx .EQ. 0 .AND. kpex .EQ. -1 ) THEN
489 IF ( kpsy .EQ. 0 .AND. kpey .EQ. -1 ) THEN
494 IF ( (jps .EQ. 0 .AND. jpe .EQ. -1) .OR. (ips .EQ. 0 .AND. ipe .EQ. -1) ) THEN
498 ims = max( ips - max(shw,maxhalowidth), ids - bdx ) - 1
499 ime = min( ipe + max(shw,maxhalowidth), ide + bdx ) + 1
505 ! handle setting the memory dimensions where there are no Y elements assigned to this proc
506 IF ( ipsy .EQ. 0 .AND. ipey .EQ. -1 ) THEN
514 IF ( (jps .EQ. 0 .AND. jpe .EQ. -1) .OR. (ips .EQ. 0 .AND. ipe .EQ. -1) ) THEN
518 jms = max( jps - max(shw,maxhalowidth), jds - bdy ) - 1
519 jme = min( jpe + max(shw,maxhalowidth), jde + bdy ) + 1
525 ! handle setting the memory dimensions where there are no X elements assigned to this proc
526 IF ( jpsx .EQ. 0 .AND. jpex .EQ. -1 ) THEN
533 END SUBROUTINE compute_memory_dims_rsl_lite
535 SUBROUTINE tfp_message( fname, lno )
540 WRITE(mess,*)'tfp_message: ',trim(fname),lno
541 CALL wrf_message(mess)
542 # ifdef ALLOW_OVERDECOMP
543 CALL task_for_point_message ! defined in RSL_LITE/task_for_point.c
545 CALL wrf_error_fatal(mess)
548 END SUBROUTINE tfp_message
550 SUBROUTINE wrf_message( mess )
552 PRINT*,'info: ',TRIM(mess)
553 END SUBROUTINE wrf_message
555 SUBROUTINE wrf_error_fatal( mess )
557 PRINT*,'fatal: ',TRIM(mess)
559 END SUBROUTINE wrf_error_fatal
563 INTEGER id , maxhalowidth , &
565 ntasks_x, ntasks_y, &
566 mytask_x, mytask_y, &
567 ids, ide, jds, jde, kds, kde, &
568 ims, ime, jms, jme, kms, kme, &
569 imsx, imex, jmsx, jmex, kmsx, kmex, &
570 imsy, imey, jmsy, jmey, kmsy, kmey, &
571 ips, ipe, jps, jpe, kps, kpe, &
572 ipsx, ipex, jpsx, jpex, kpsx, kpex, &
573 ipsy, ipey, jpsy, jpey, kpsy, kpey
577 PRINT*,'id,maxhalowidth,shw,bdx,bdy ? '
578 READ(*,*)id,maxhalowidth,shw,bdx,bdy
579 PRINT*,'ids,ide,jds,jde,kds,kde '
580 READ(*,*)ids, ide, jds, jde, kds, kde
581 PRINT*,'ntasks_x,ntasks_y'
582 READ(*,*)ntasks_x,ntasks_y
585 DO mytask_y = 0, ntasks_y-1
586 DO mytask_x = 0, ntasks_x-1
587 CALL compute_memory_dims_rsl_lite ( &
588 id , maxhalowidth , &
590 ntasks_x, ntasks_y, &
591 mytask_x, mytask_y, &
592 ids, ide, jds, jde, kds, kde, &
593 ims, ime, jms, jme, kms, kme, &
594 imsx, imex, jmsx, jmex, kmsx, kmex, &
595 imsy, imey, jmsy, jmey, kmsy, kmey, &
596 ips, ipe, jps, jpe, kps, kpe, &
597 ipsx, ipex, jpsx, jpex, kpsx, kpex, &
598 ipsy, ipey, jpsy, jpey, kpsy, kpey )
600 PRINT*,' mytask_x, mytask_y ',mytask_x, mytask_y
601 PRINT*,' ips, ipe, jps, jpe, kps, kpe ',ips, ipe, jps, jpe, kps, kpe
602 PRINT*,' ims, ime, jms, jme, kms, kme ',ims, ime, jms, jme, kms, kme
603 PRINT*,' ipsx, ipex, jpsx, jpex, kpsx, kpex ',ipsx, ipex, jpsx, jpex, kpsx, kpex
604 PRINT*,' imsx, imex, jmsx, jmex, kmsx, kmex ',imsx, imex, jmsx, jmex, kmsx, kmex
605 PRINT*,' ipsy, ipey, jpsy, jpey, kpsy, kpey ',ipsy, ipey, jpsy, jpey, kpsy, kpey
606 PRINT*,' imsy, imey, jmsy, jmey, kmsy, kmey ',imsy, imey, jmsy, jmey, kmsy, kmey
609 END PROGRAM tfp_tester