6 #define LOOP_IV_TYPE0 LOOP_TYPES
7 #define LOOP_TYPE0 LOOP_TYPES
8 #define LOOP_STYPE0 LOOP_TYPES
10 #define LOOP_IV_TYPE1 LOOP_TYPES
11 #define LOOP_TYPE1 LOOP_TYPES
12 #define LOOP_STYPE1 LOOP_TYPES
14 #define LOOP_IV_TYPE2 LOOP_TYPES
15 #define LOOP_TYPE2 LOOP_TYPES
16 #define LOOP_STYPE2 LOOP_TYPES
18 #define MAX_THREADS 256
21 #define PRINTF(...) printf(__VA_ARGS__)
31 LOOP_STYPE1 jA1, jB1, jStep;
32 LOOP_STYPE2 kA1, kB1, kStep;
34 // We can check <=, <, >=, > (!= has different pattern)
35 // Additional definition of LOOP_LEi, LOOP_LTi, etc. is helpful to build calls
36 // of the test from main
40 #elif defined LOOP_LT0
42 #elif defined LOOP_GE0
44 #elif defined LOOP_GT0
50 #elif defined LOOP_LT1
52 #elif defined LOOP_GE1
54 #elif defined LOOP_GT1
60 #elif defined LOOP_LT2
62 #elif defined LOOP_GE2
64 #elif defined LOOP_GT2
74 spaceType *AllocSpace(unsigned size) {
76 spaceType *p = (spaceType *)malloc(size * sizeof(spaceType));
77 memset(p, 0, size * sizeof(spaceType));
81 void FreeSpace(spaceType *space) { free(space); }
83 // record an iteration
84 void Set(spaceType *space, unsigned count, unsigned trueCount, LOOP_IV_TYPE0 i,
85 LOOP_IV_TYPE1 j, LOOP_IV_TYPE0 k) {
86 if (count > trueCount) {
87 // number of iterations exceeded
88 // will be reported with checks
91 space[count - 1].i = i;
92 space[count - 1].j = j;
93 space[count - 1].k = k;
101 spaceType *openmpSpace;
102 spaceType *scalarSpace;
104 unsigned trueCount = 0;
105 unsigned openmpCount = 0;
106 unsigned scalarCount = 0;
107 unsigned uselessThreadsOpenMP = 0;
108 unsigned usefulThreadsOpenMP = 0;
110 // Use half of the available threads/logical processors.
111 unsigned num_threads = omp_get_max_threads() / 2;
113 // Make sure num_threads is not 0 after the division in case
114 // omp_get_max_threads() returns 1.
115 if (num_threads == 0)
118 if (num_threads > MAX_THREADS)
119 num_threads = MAX_THREADS;
121 unsigned long *chunkSizesOpenmp =
122 (unsigned long *)malloc(sizeof(unsigned long) * num_threads);
123 memset(chunkSizesOpenmp, 0, sizeof(unsigned long) * num_threads);
125 // count iterations and allocate space
126 LOOP { ++trueCount; }
128 openmpSpace = AllocSpace(trueCount);
129 scalarSpace = AllocSpace(trueCount);
131 // fill the scalar (compare) space
134 Set(scalarSpace, scalarCount, trueCount, i, j, k);
138 // perform and record OpenMP iterations and thread use
139 #pragma omp parallel num_threads(num_threads)
141 unsigned gtid = omp_get_thread_num();
142 #pragma omp for collapse(3) private(i, j, k)
145 #pragma omp atomic update
146 ++chunkSizesOpenmp[gtid];
147 #pragma omp atomic capture
148 count = ++openmpCount;
149 Set(openmpSpace, count, trueCount, i, j, k);
153 // check for the right number of iterations processed
154 // (only need to check for less, greater is checked when recording)
155 if (openmpCount < trueCount) {
156 PRINTF("OpenMP FAILURE: Openmp processed fewer iterations: %d vs %d\n",
157 openmpCount, trueCount);
159 } else if (openmpCount > trueCount) {
160 PRINTF("OpenMP FAILURE: Openmp processed more iterations: %d vs %d\n",
161 openmpCount, trueCount);
165 // check openMP for iteration correctnes against scalar
166 for (unsigned i = 0; i < trueCount; i++) {
168 for (j = 0; j < openmpCount; j++) {
169 if ((scalarSpace[i].i == openmpSpace[j].i) &&
170 (scalarSpace[i].j == openmpSpace[j].j) &&
171 (scalarSpace[i].k == openmpSpace[j].k)) {
175 if (j == openmpCount) {
176 PRINTF("OpenMP FAILURE: (%d %d %d) not processed\n", scalarSpace[i].i,
177 scalarSpace[i].j, scalarSpace[i].k);
182 // check for efficient thread use
183 for (unsigned i = 0; i < num_threads; ++i) {
184 if (chunkSizesOpenmp[i] == 0) {
185 ++uselessThreadsOpenMP;
189 // a check to see if at least more than one thread was used (weakish)
190 if ((uselessThreadsOpenMP == num_threads - 1) && (trueCount > 1)) {
191 PRINTF("OpenMP FAILURE: threads are not used\n");
196 // a check to see if the load was spread more or less evenly so that
197 // when there was more work than threads each one got at least something
198 // (stronger, but may currently fail for a general collapse case)
199 if ((trueCount >= num_threads) && (uselessThreadsOpenMP > 0)) {
200 PRINTF("OpenMP FAILURE: %d threads not used with %d iterations\n",
201 uselessThreadsOpenMP, openmpCount);
207 FreeSpace(openmpSpace);
208 FreeSpace(scalarSpace);
209 free(chunkSizesOpenmp);