updated top-level README and version_decl for V4.5 (#1847)
[WRF.git] / external / RSL_LITE / task_for_point.c
blobb33a03c452371c808e1a2a6cd68c511904ed0ded
1 #ifndef MS_SUA
2 # include <stdio.h>
3 #endif
4 #include "rsl_lite.h"
6 /* updated 20051021, new algorithm distributes the remainder, if any, at either ends of the dimension
7 rather than the first remainder number of processors in the dimension. Idea is that the processes
8 on the ends have less work because they're boundary processes. New alg works like this:
9 a b
10 + + + + + + o o o o o o o o o o o o o + + + + + +
12 + represents a process with an extra point (npoints is n/p+1), o processors that don't (n/p)
13 a and b are the starting process indices in the dimension of the new section of o or x.
17 /* experimental for running some tasks on host and some on MIC
18 if minx = -99 then miny is the number of grid points I want in the Y dimension.
19 Otherwise both are set to 1 and it works normally 20121018 JM */
21 static char tfpmess[1024] ;
23 TASK_FOR_POINT ( i_p , j_p , ids_p, ide_p , jds_p, jde_p , npx_p , npy_p , Px_p, Py_p , minx_p, miny_p, ierr_p )
24 int_p i_p , j_p , Px_p , Py_p , ids_p, ide_p , jds_p, jde_p , npx_p , npy_p, minx_p, miny_p, ierr_p ;
26 int i , j , ids, ide, jds, jde, npx, npy, minx, miny ; /* inputs */
27 int Px, Py ; /* output */
28 int idim, jdim ;
29 int rem, a, b ;
30 i = *i_p - 1 ;
31 j = *j_p - 1 ;
32 npx = *npx_p ;
33 npy = *npy_p ;
34 #if 0
35 minx = *minx_p ;
36 miny = *miny_p ;
37 #else
38 if ( *minx_p == -99 ) {
39 minx = 1 ;
40 miny = *miny_p ;
41 npx = ( *npx_p * *npy_p ) / 2 ; /* x dim gets half the tasks , only decompose Y by 2 */
42 if ( npx * 2 != *npx_p * *npy_p ) {
43 *ierr_p = 1 ;
44 sprintf(tfpmess,"%d by %d decomp will not work for MIC/HOST splitting. Need even number of tasks\n") ;
46 } else {
47 minx = 1 ;
48 miny = 1 ;
50 #endif
51 ids = *ids_p - 1 ; ide = *ide_p - 1 ;
52 jds = *jds_p - 1 ; jde = *jde_p - 1 ;
53 idim = ide - ids + 1 ;
54 jdim = jde - jds + 1 ;
56 *ierr_p = 0 ;
58 if ( *minx_p != -99 ) {
59 /* begin: jm for Peter Johnsen -- noticed problem with polar filters in gwrf
60 if the number of processors exceeds number of vertical levels */
61 if ( npx > idim ) { npx = idim ; }
62 if ( npy > jdim ) { npy = jdim ; }
64 /* begin: wig; 10-Mar-2008
65 Check that the number of processors is not so high that the halos begin to overlap.
66 If they do, then reduce the number of processors allowed for that dimension.
68 tfpmess[0] = '\0' ;
69 if ( idim / npx < minx ) {
70 npx = idim/minx ;
71 if (npx < 1) { npx = 1 ;}
72 if (npx != *npx_p) {
73 sprintf(tfpmess,"RSL_LITE: TASK_FOR_POINT LIMITING PROCESSOR COUNT IN X-DIRECTION TO %d %d\n", npx,*npx_p) ;
74 *ierr_p = 1 ;
77 if ( jdim / npy < miny ) {
78 npy = jdim/miny ;
79 if (npy < 1) { npy = 1 ;}
80 if (npy != *npy_p) {
81 sprintf(tfpmess,"RSL_LITE: TASK_FOR_POINT LIMITING PROCESSOR COUNT IN Y-DIRECTION TO %d %d\n", npy,*npy_p) ;
82 *ierr_p = 1 ;
85 /* end: wig */
88 i = i >= ids ? i : ids ; i = i <= ide ? i : ide ;
89 rem = idim % npx ;
90 a = ( rem / 2 ) * ( (idim / npx) + 1 ) ;
91 b = a + ( npx - rem ) * ( idim / npx ) ;
92 if ( i-ids < a ) {
93 Px = (i-ids) / ( (idim / npx) + 1 ) ;
95 else if ( i-ids < b ) {
96 Px = ( a / ( (idim / npx) + 1 ) ) + (i-a-ids) / ( ( b - a ) / ( npx - rem ) ) ;
98 else {
99 Px = ( a / ( (idim / npx) + 1 ) ) + (b-a-ids) / ( ( b - a ) / ( npx - rem ) ) +
100 (i-b-ids) / ( ( idim / npx ) + 1 ) ;
103 j = j >= jds ? j : jds ; j = j <= jde ? j : jde ;
104 if ( *minx_p != -99 ) {
105 rem = jdim % npy ;
106 a = ( rem / 2 ) * ( (jdim / npy) + 1 ) ;
107 b = a + ( npy - rem ) * ( jdim / npy ) ;
108 if ( j-jds < a ) {
109 Py = (j-jds) / ( (jdim / npy) + 1 ) ;
111 else if ( j-jds < b ) {
112 Py = ( a / ( (jdim / npy) + 1 ) ) + (j-a-jds) / ( ( b - a ) / ( npy - rem ) ) ;
114 else {
115 Py = ( a / ( (jdim / npy) + 1 ) ) + (b-a-jds) / ( ( b - a ) / ( npy - rem ) ) +
116 (j-b-jds) / ( ( jdim / npy ) + 1 ) ;
118 } else {
119 Py = 1 ;
120 if ( j <= jde-miny ) Py = 0 ;
123 *Px_p = Px ;
124 *Py_p = Py ;
127 TASK_FOR_POINT_MESSAGE()
129 fprintf(stderr,"%s\n",tfpmess) ;
132 #if 0
133 main()
135 int minx, miny, ierr ;
136 int ips[100], ipe[100] ;
137 int jps[100], jpe[100] ;
138 int shw, i , j , ids, ide, jds, jde, npx, npy ; /* inputs */
139 int Px, Py, P ; /* output */
140 printf("i, j, ids, ide, jds, jde, npx, npy\n") ;
141 scanf("%d %d %d %d %d %d %d %d",&i, &j, &ids,&ide,&jds,&jde,&npx,&npy ) ;
142 shw =0 ;
143 minx = -99 ;
144 miny = 180 ;
145 for ( i = 0 ; i < 100 ; i++ ) { ips[i] = 9999999 ; ipe[i] = -99999999 ; }
146 for ( i = 0 ; i < 100 ; i++ ) { jps[i] = 9999999 ; jpe[i] = -99999999 ; }
147 #if 1
148 for ( j = jds-shw ; j <= jde+shw ; j++ )
150 for ( i = ids-shw ; i <= ide+shw ; i++ )
152 #endif
153 TASK_FOR_POINT ( &i , &j ,
154 &ids, &ide, &jds, &jde , &npx , &npy ,
155 &Px, &Py, &minx, &miny, &ierr ) ;
156 // printf("(%3d %3d) ",Px,Py) ;
157 printf("%d %3d\n ",i, Px) ;
158 #if 1
160 printf("\n") ;
162 /* for ( i = 0 ; i < npx*npy ; i++ ) { */
163 /* fprintf(stderr,"%3d. ips %d ipe %d (%d) jps %d jpe %d (%d)\n", i, ips[i], ipe[i], ipe[i]-ips[i]+1, jps[i], jpe[i], jpe[i]-jps[i]+1 ) ; */
164 /* } */
165 #endif
167 #endif