Update version info for release v4.6.1 (#2122)
[WRF.git] / external / RSL_LITE / task_for_point.c
blob6da4cd5aa54150ad0fd33ac746b3ae8043161c01
1 #ifndef MS_SUA
2 # include <stdio.h>
3 #endif
4 #include "rsl_lite.h"
6 /* updated 20051021, new algorithm distributes the remainder, if any, at either ends of the dimension
7 rather than the first remainder number of processors in the dimension. Idea is that the processes
8 on the ends have less work because they're boundary processes. New alg works like this:
9 a b
10 + + + + + + o o o o o o o o o o o o o + + + + + +
12 + represents a process with an extra point (npoints is n/p+1), o processors that don't (n/p)
13 a and b are the starting process indices in the dimension of the new section of o or x.
17 /* experimental for running some tasks on host and some on MIC
18 if minx = -99 then miny is the number of grid points I want in the Y dimension.
19 Otherwise both are set to 1 and it works normally 20121018 JM */
21 static char tfpmess[1024] ;
23 int TASK_FOR_POINT ( int* i_p , int* j_p , int* ids_p, int* ide_p , int* jds_p, int* jde_p , int* npx_p , int* npy_p , int* Px_p, int* Py_p , int* minx_p, int* miny_p, int* ierr_p )
25 int i , j , ids, ide, jds, jde, npx, npy, minx, miny ; /* inputs */
26 int Px, Py ; /* output */
27 int idim, jdim ;
28 int rem, a, b ;
29 i = *i_p - 1 ;
30 j = *j_p - 1 ;
31 npx = *npx_p ;
32 npy = *npy_p ;
33 #if 0
34 minx = *minx_p ;
35 miny = *miny_p ;
36 #else
37 if ( *minx_p == -99 ) {
38 minx = 1 ;
39 miny = *miny_p ;
40 npx = ( *npx_p * *npy_p ) / 2 ; /* x dim gets half the tasks , only decompose Y by 2 */
41 if ( npx * 2 != *npx_p * *npy_p ) {
42 *ierr_p = 1 ;
43 sprintf(tfpmess,"%d by %d decomp will not work for MIC/HOST splitting. Need even number of tasks\n") ;
45 } else {
46 minx = 1 ;
47 miny = 1 ;
49 #endif
50 ids = *ids_p - 1 ; ide = *ide_p - 1 ;
51 jds = *jds_p - 1 ; jde = *jde_p - 1 ;
52 idim = ide - ids + 1 ;
53 jdim = jde - jds + 1 ;
55 *ierr_p = 0 ;
57 if ( *minx_p != -99 ) {
58 /* begin: jm for Peter Johnsen -- noticed problem with polar filters in gwrf
59 if the number of processors exceeds number of vertical levels */
60 if ( npx > idim ) { npx = idim ; }
61 if ( npy > jdim ) { npy = jdim ; }
63 /* begin: wig; 10-Mar-2008
64 Check that the number of processors is not so high that the halos begin to overlap.
65 If they do, then reduce the number of processors allowed for that dimension.
67 tfpmess[0] = '\0' ;
68 if ( idim / npx < minx ) {
69 npx = idim/minx ;
70 if (npx < 1) { npx = 1 ;}
71 if (npx != *npx_p) {
72 sprintf(tfpmess,"RSL_LITE: TASK_FOR_POINT LIMITING PROCESSOR COUNT IN X-DIRECTION TO %d %d\n", npx,*npx_p) ;
73 *ierr_p = 1 ;
76 if ( jdim / npy < miny ) {
77 npy = jdim/miny ;
78 if (npy < 1) { npy = 1 ;}
79 if (npy != *npy_p) {
80 sprintf(tfpmess,"RSL_LITE: TASK_FOR_POINT LIMITING PROCESSOR COUNT IN Y-DIRECTION TO %d %d\n", npy,*npy_p) ;
81 *ierr_p = 1 ;
84 /* end: wig */
87 i = i >= ids ? i : ids ; i = i <= ide ? i : ide ;
88 rem = idim % npx ;
89 a = ( rem / 2 ) * ( (idim / npx) + 1 ) ;
90 b = a + ( npx - rem ) * ( idim / npx ) ;
91 if ( i-ids < a ) {
92 Px = (i-ids) / ( (idim / npx) + 1 ) ;
94 else if ( i-ids < b ) {
95 Px = ( a / ( (idim / npx) + 1 ) ) + (i-a-ids) / ( ( b - a ) / ( npx - rem ) ) ;
97 else {
98 Px = ( a / ( (idim / npx) + 1 ) ) + (b-a-ids) / ( ( b - a ) / ( npx - rem ) ) +
99 (i-b-ids) / ( ( idim / npx ) + 1 ) ;
102 j = j >= jds ? j : jds ; j = j <= jde ? j : jde ;
103 if ( *minx_p != -99 ) {
104 rem = jdim % npy ;
105 a = ( rem / 2 ) * ( (jdim / npy) + 1 ) ;
106 b = a + ( npy - rem ) * ( jdim / npy ) ;
107 if ( j-jds < a ) {
108 Py = (j-jds) / ( (jdim / npy) + 1 ) ;
110 else if ( j-jds < b ) {
111 Py = ( a / ( (jdim / npy) + 1 ) ) + (j-a-jds) / ( ( b - a ) / ( npy - rem ) ) ;
113 else {
114 Py = ( a / ( (jdim / npy) + 1 ) ) + (b-a-jds) / ( ( b - a ) / ( npy - rem ) ) +
115 (j-b-jds) / ( ( jdim / npy ) + 1 ) ;
117 } else {
118 Py = 1 ;
119 if ( j <= jde-miny ) Py = 0 ;
122 *Px_p = Px ;
123 *Py_p = Py ;
126 void TASK_FOR_POINT_MESSAGE()
128 fprintf(stderr,"%s\n",tfpmess) ;
131 #if 0
132 main()
134 int minx, miny, ierr ;
135 int ips[100], ipe[100] ;
136 int jps[100], jpe[100] ;
137 int shw, i , j , ids, ide, jds, jde, npx, npy ; /* inputs */
138 int Px, Py, P ; /* output */
139 printf("i, j, ids, ide, jds, jde, npx, npy\n") ;
140 scanf("%d %d %d %d %d %d %d %d",&i, &j, &ids,&ide,&jds,&jde,&npx,&npy ) ;
141 shw =0 ;
142 minx = -99 ;
143 miny = 180 ;
144 for ( i = 0 ; i < 100 ; i++ ) { ips[i] = 9999999 ; ipe[i] = -99999999 ; }
145 for ( i = 0 ; i < 100 ; i++ ) { jps[i] = 9999999 ; jpe[i] = -99999999 ; }
146 #if 1
147 for ( j = jds-shw ; j <= jde+shw ; j++ )
149 for ( i = ids-shw ; i <= ide+shw ; i++ )
151 #endif
152 TASK_FOR_POINT ( &i , &j ,
153 &ids, &ide, &jds, &jde , &npx , &npy ,
154 &Px, &Py, &minx, &miny, &ierr ) ;
155 // printf("(%3d %3d) ",Px,Py) ;
156 printf("%d %3d\n ",i, Px) ;
157 #if 1
159 printf("\n") ;
161 /* for ( i = 0 ; i < npx*npy ; i++ ) { */
162 /* fprintf(stderr,"%3d. ips %d ipe %d (%d) jps %d jpe %d (%d)\n", i, ips[i], ipe[i], ipe[i]-ips[i]+1, jps[i], jpe[i], jpe[i]-jps[i]+1 ) ; */
163 /* } */
164 #endif
166 #endif