AMDGPU: Allow f16/bf16 for DS_READ_TR16_B64 gfx950 builtins (#118297)
[llvm-project.git] / openmp / runtime / test / affinity / libomp_test_topology.h
blob410103d80924992d851e00a41b7f7b58d07de64f
1 #ifndef LIBOMP_TEST_TOPOLOGY_H
2 #define LIBOMP_TEST_TOPOLOGY_H
4 #include "libomp_test_affinity.h"
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <dirent.h>
8 #include <errno.h>
9 #include <ctype.h>
10 #include <omp.h>
11 #include <stdarg.h>
13 typedef enum topology_obj_type_t {
14 TOPOLOGY_OBJ_THREAD,
15 TOPOLOGY_OBJ_CORE,
16 TOPOLOGY_OBJ_SOCKET,
17 TOPOLOGY_OBJ_MAX
18 } topology_obj_type_t;
20 typedef struct place_list_t {
21 int num_places;
22 int current_place;
23 int *place_nums;
24 affinity_mask_t **masks;
25 } place_list_t;
27 // Return the first character in file 'f' that is not a whitespace character
28 // including newlines and carriage returns
29 static int get_first_nonspace_from_file(FILE *f) {
30 int c;
31 do {
32 c = fgetc(f);
33 } while (c != EOF && (isspace(c) || c == '\n' || c == '\r'));
34 return c;
37 // Read an integer from file 'f' into 'number'
38 // Return 1 on successful read of integer,
39 // 0 on unsuccessful read of integer,
40 // EOF on end of file.
41 static int get_integer_from_file(FILE *f, int *number) {
42 int n;
43 n = fscanf(f, "%d", number);
44 if (feof(f))
45 return EOF;
46 if (n != 1)
47 return 0;
48 return 1;
51 // Read a siblings list file from Linux /sys/devices/system/cpu/cpu?/topology/*
52 static affinity_mask_t *topology_get_mask_from_file(const char *filename) {
53 int status = EXIT_SUCCESS;
54 FILE *f = fopen(filename, "r");
55 if (!f) {
56 perror(filename);
57 exit(EXIT_FAILURE);
59 affinity_mask_t *mask = affinity_mask_alloc();
60 while (1) {
61 int c, i, n, lower, upper;
62 // Read the first integer
63 n = get_integer_from_file(f, &lower);
64 if (n == EOF) {
65 break;
66 } else if (n == 0) {
67 fprintf(stderr, "syntax error: expected integer\n");
68 status = EXIT_FAILURE;
69 break;
72 // Now either a , or -
73 c = get_first_nonspace_from_file(f);
74 if (c == EOF || c == ',') {
75 affinity_mask_set(mask, lower);
76 if (c == EOF)
77 break;
78 } else if (c == '-') {
79 n = get_integer_from_file(f, &upper);
80 if (n == EOF || n == 0) {
81 fprintf(stderr, "syntax error: expected integer\n");
82 status = EXIT_FAILURE;
83 break;
85 for (i = lower; i <= upper; ++i)
86 affinity_mask_set(mask, i);
87 c = get_first_nonspace_from_file(f);
88 if (c == EOF) {
89 break;
90 } else if (c == ',') {
91 continue;
92 } else {
93 fprintf(stderr, "syntax error: unexpected character: '%c (%d)'\n", c,
94 c);
95 status = EXIT_FAILURE;
96 break;
98 } else {
99 fprintf(stderr, "syntax error: unexpected character: '%c (%d)'\n", c, c);
100 status = EXIT_FAILURE;
101 break;
104 fclose(f);
105 if (status == EXIT_FAILURE) {
106 affinity_mask_free(mask);
107 mask = NULL;
109 return mask;
112 static int topology_get_num_cpus() {
113 char buf[1024];
114 // Count the number of cpus
115 int cpu = 0;
116 while (1) {
117 snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%d", cpu);
118 DIR *dir = opendir(buf);
119 if (dir) {
120 closedir(dir);
121 cpu++;
122 } else {
123 break;
126 if (cpu == 0)
127 cpu = 1;
128 return cpu;
131 // Return whether the current thread has access to all logical processors
132 static int topology_using_full_mask() {
133 int cpu;
134 int has_all = 1;
135 int num_cpus = topology_get_num_cpus();
136 affinity_mask_t *mask = affinity_mask_alloc();
137 get_thread_affinity(mask);
138 for (cpu = 0; cpu < num_cpus; ++cpu) {
139 if (!affinity_mask_isset(mask, cpu)) {
140 has_all = 0;
141 break;
144 affinity_mask_free(mask);
145 return has_all;
148 // Return array of masks representing OMP_PLACES keyword (e.g., sockets, cores,
149 // threads)
150 static place_list_t *topology_alloc_type_places(topology_obj_type_t type) {
151 char buf[1024];
152 int i, cpu, num_places, num_unique;
153 int *place_nums;
154 int num_cpus = topology_get_num_cpus();
155 place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
156 affinity_mask_t **masks =
157 (affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_cpus);
158 num_unique = 0;
159 for (cpu = 0; cpu < num_cpus; ++cpu) {
160 affinity_mask_t *mask;
161 if (type == TOPOLOGY_OBJ_CORE) {
162 snprintf(buf, sizeof(buf),
163 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
164 cpu);
165 mask = topology_get_mask_from_file(buf);
166 } else if (type == TOPOLOGY_OBJ_SOCKET) {
167 snprintf(buf, sizeof(buf),
168 "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list",
169 cpu);
170 mask = topology_get_mask_from_file(buf);
171 } else if (type == TOPOLOGY_OBJ_THREAD) {
172 mask = affinity_mask_alloc();
173 affinity_mask_set(mask, cpu);
174 } else {
175 fprintf(stderr, "Unknown topology type (%d)\n", (int)type);
176 exit(EXIT_FAILURE);
178 // Check for unique topology objects above the thread level
179 if (type != TOPOLOGY_OBJ_THREAD) {
180 for (i = 0; i < num_unique; ++i) {
181 if (affinity_mask_equal(masks[i], mask)) {
182 affinity_mask_free(mask);
183 mask = NULL;
184 break;
188 if (mask)
189 masks[num_unique++] = mask;
191 place_nums = (int *)malloc(sizeof(int) * num_unique);
192 for (i = 0; i < num_unique; ++i)
193 place_nums[i] = i;
194 places->num_places = num_unique;
195 places->masks = masks;
196 places->place_nums = place_nums;
197 places->current_place = -1;
198 return places;
201 static place_list_t *topology_alloc_openmp_places() {
202 int place, i;
203 int num_places = omp_get_num_places();
204 place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
205 affinity_mask_t **masks =
206 (affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places);
207 int *place_nums = (int *)malloc(sizeof(int) * num_places);
208 for (place = 0; place < num_places; ++place) {
209 int num_procs = omp_get_place_num_procs(place);
210 int *ids = (int *)malloc(sizeof(int) * num_procs);
211 omp_get_place_proc_ids(place, ids);
212 affinity_mask_t *mask = affinity_mask_alloc();
213 for (i = 0; i < num_procs; ++i)
214 affinity_mask_set(mask, ids[i]);
215 masks[place] = mask;
216 place_nums[place] = place;
218 places->num_places = num_places;
219 places->place_nums = place_nums;
220 places->masks = masks;
221 places->current_place = omp_get_place_num();
222 return places;
225 static place_list_t *topology_alloc_openmp_partition() {
226 int p, i;
227 int num_places = omp_get_partition_num_places();
228 place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
229 int *place_nums = (int *)malloc(sizeof(int) * num_places);
230 affinity_mask_t **masks =
231 (affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places);
232 omp_get_partition_place_nums(place_nums);
233 for (p = 0; p < num_places; ++p) {
234 int place = place_nums[p];
235 int num_procs = omp_get_place_num_procs(place);
236 int *ids = (int *)malloc(sizeof(int) * num_procs);
237 if (num_procs == 0) {
238 fprintf(stderr, "place %d has 0 procs?\n", place);
239 exit(EXIT_FAILURE);
241 omp_get_place_proc_ids(place, ids);
242 affinity_mask_t *mask = affinity_mask_alloc();
243 for (i = 0; i < num_procs; ++i)
244 affinity_mask_set(mask, ids[i]);
245 if (affinity_mask_count(mask) == 0) {
246 fprintf(stderr, "place %d has 0 procs set?\n", place);
247 exit(EXIT_FAILURE);
249 masks[p] = mask;
251 places->num_places = num_places;
252 places->place_nums = place_nums;
253 places->masks = masks;
254 places->current_place = omp_get_place_num();
255 return places;
258 // Free the array of masks from one of: topology_alloc_type_masks()
259 // or topology_alloc_openmp_masks()
260 static void topology_free_places(place_list_t *places) {
261 int i;
262 for (i = 0; i < places->num_places; ++i)
263 affinity_mask_free(places->masks[i]);
264 free(places->masks);
265 free(places->place_nums);
266 free(places);
269 static void topology_print_places(const place_list_t *p) {
270 int i;
271 char buf[1024];
272 for (i = 0; i < p->num_places; ++i) {
273 affinity_mask_snprintf(buf, sizeof(buf), p->masks[i]);
274 printf("Place %d: %s\n", p->place_nums[i], buf);
278 // Print out an error message, possibly with two problem place lists,
279 // and then exit with failure
280 static void proc_bind_die(omp_proc_bind_t proc_bind, int T, int P,
281 const char *format, ...) {
282 va_list args;
283 va_start(args, format);
284 const char *pb;
285 switch (proc_bind) {
286 case omp_proc_bind_false:
287 pb = "False";
288 break;
289 case omp_proc_bind_true:
290 pb = "True";
291 break;
292 case omp_proc_bind_master:
293 pb = "Master (Primary)";
294 break;
295 case omp_proc_bind_close:
296 pb = "Close";
297 break;
298 case omp_proc_bind_spread:
299 pb = "Spread";
300 break;
301 default:
302 pb = "(Unknown Proc Bind Type)";
303 break;
305 if (proc_bind == omp_proc_bind_spread || proc_bind == omp_proc_bind_close) {
306 if (T <= P) {
307 fprintf(stderr, "%s : (T(%d) <= P(%d)) : ", pb, T, P);
308 } else {
309 fprintf(stderr, "%s : (T(%d) > P(%d)) : ", pb, T, P);
311 } else {
312 fprintf(stderr, "%s : T = %d, P = %d : ", pb, T, P);
314 vfprintf(stderr, format, args);
315 va_end(args);
317 exit(EXIT_FAILURE);
320 // Return 1 on failure, 0 on success.
321 static void proc_bind_check(omp_proc_bind_t proc_bind,
322 const place_list_t *parent, place_list_t **children,
323 int nchildren) {
324 place_list_t *partition;
325 int T, i, j, place, low, high, first, last, count, current_place, num_places;
326 const int *place_nums;
327 int P = parent->num_places;
329 // Find the correct T (there could be null entries in children)
330 place_list_t **partitions =
331 (place_list_t **)malloc(sizeof(place_list_t *) * nchildren);
332 T = 0;
333 for (i = 0; i < nchildren; ++i)
334 if (children[i])
335 partitions[T++] = children[i];
336 // Only able to check spread, close, master (primary)
337 if (proc_bind != omp_proc_bind_spread && proc_bind != omp_proc_bind_close &&
338 proc_bind != omp_proc_bind_master)
339 proc_bind_die(proc_bind, T, P, NULL, NULL,
340 "Cannot check this proc bind type\n");
342 if (proc_bind == omp_proc_bind_spread) {
343 if (T <= P) {
344 // Run through each subpartition
345 for (i = 0; i < T; ++i) {
346 partition = partitions[i];
347 place_nums = partition->place_nums;
348 num_places = partition->num_places;
349 current_place = partition->current_place;
350 // Correct count?
351 low = P / T;
352 high = P / T + (P % T ? 1 : 0);
353 if (num_places != low && num_places != high) {
354 proc_bind_die(proc_bind, T, P,
355 "Incorrect number of places for thread %d: %d. "
356 "Expecting between %d and %d\n",
357 i, num_places, low, high);
359 // Consecutive places?
360 for (j = 1; j < num_places; ++j) {
361 if (place_nums[j] != (place_nums[j - 1] + 1) % P) {
362 proc_bind_die(proc_bind, T, P,
363 "Not consecutive places: %d, %d in partition\n",
364 place_nums[j - 1], place_nums[j]);
367 first = place_nums[0];
368 last = place_nums[num_places - 1];
369 // Primary thread executes on place of the parent thread?
370 if (i == 0) {
371 if (current_place != parent->current_place) {
372 proc_bind_die(
373 proc_bind, T, P,
374 "Primary thread not on same place (%d) as parent thread (%d)\n",
375 current_place, parent->current_place);
377 } else {
378 // Thread's current place is first place within it's partition?
379 if (current_place != first) {
380 proc_bind_die(proc_bind, T, P,
381 "Thread's current place (%d) is not the first place "
382 "in its partition [%d, %d]\n",
383 current_place, first, last);
386 // Partitions don't have intersections?
387 int f1 = first;
388 int l1 = last;
389 for (j = 0; j < i; ++j) {
390 int f2 = partitions[j]->place_nums[0];
391 int l2 = partitions[j]->place_nums[partitions[j]->num_places - 1];
392 if (f1 > l1 && f2 > l2) {
393 proc_bind_die(proc_bind, T, P,
394 "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
395 l1, f2, l2);
397 if (f1 > l1 && f2 <= l2)
398 if (f1 < l2 || l1 > f2) {
399 proc_bind_die(proc_bind, T, P,
400 "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
401 l1, f2, l2);
403 if (f1 <= l1 && f2 > l2)
404 if (f2 < l1 || l2 > f1) {
405 proc_bind_die(proc_bind, T, P,
406 "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
407 l1, f2, l2);
409 if (f1 <= l1 && f2 <= l2)
410 if (!(f2 > l1 || l2 < f1)) {
411 proc_bind_die(proc_bind, T, P,
412 "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
413 l1, f2, l2);
417 } else {
418 // T > P
419 // Each partition has only one place?
420 for (i = 0; i < T; ++i) {
421 if (partitions[i]->num_places != 1) {
422 proc_bind_die(
423 proc_bind, T, P,
424 "Incorrect number of places for thread %d: %d. Expecting 1\n", i,
425 partitions[i]->num_places);
428 // Correct number of consecutive threads per partition?
429 low = T / P;
430 high = T / P + (T % P ? 1 : 0);
431 for (i = 1, count = 1; i < T; ++i) {
432 if (partitions[i]->place_nums[0] == partitions[i - 1]->place_nums[0]) {
433 count++;
434 if (count > high) {
435 proc_bind_die(
436 proc_bind, T, P,
437 "Too many threads have place %d for their partition\n",
438 partitions[i]->place_nums[0]);
440 } else {
441 if (count < low) {
442 proc_bind_die(
443 proc_bind, T, P,
444 "Not enough threads have place %d for their partition\n",
445 partitions[i]->place_nums[0]);
447 count = 1;
450 // Primary thread executes on place of the parent thread?
451 current_place = partitions[0]->place_nums[0];
452 if (parent->current_place != -1 &&
453 current_place != parent->current_place) {
454 proc_bind_die(
455 proc_bind, T, P,
456 "Primary thread not on same place (%d) as parent thread (%d)\n",
457 current_place, parent->current_place);
460 } else if (proc_bind == omp_proc_bind_close ||
461 proc_bind == omp_proc_bind_master) {
462 // Check that each subpartition is the same as the parent
463 for (i = 0; i < T; ++i) {
464 partition = partitions[i];
465 place_nums = partition->place_nums;
466 num_places = partition->num_places;
467 current_place = partition->current_place;
468 if (parent->num_places != num_places) {
469 proc_bind_die(proc_bind, T, P,
470 "Number of places in subpartition (%d) does not match "
471 "parent (%d)\n",
472 num_places, parent->num_places);
474 for (j = 0; j < num_places; ++j) {
475 if (parent->place_nums[j] != place_nums[j]) {
476 proc_bind_die(proc_bind, T, P,
477 "Subpartition place (%d) does not match "
478 "parent partition place (%d)\n",
479 place_nums[j], parent->place_nums[j]);
483 // Find index into place_nums of current place for parent
484 for (j = 0; j < parent->num_places; ++j)
485 if (parent->place_nums[j] == parent->current_place)
486 break;
487 if (proc_bind == omp_proc_bind_close) {
488 if (T <= P) {
489 // close T <= P
490 // check place assignment for each thread
491 for (i = 0; i < T; ++i) {
492 partition = partitions[i];
493 current_place = partition->current_place;
494 if (current_place != parent->place_nums[j]) {
495 proc_bind_die(
496 proc_bind, T, P,
497 "Thread %d's current place (%d) is incorrect. expected %d\n", i,
498 current_place, parent->place_nums[j]);
500 j = (j + 1) % parent->num_places;
502 } else {
503 // close T > P
504 // check place assignment for each thread
505 low = T / P;
506 high = T / P + (T % P ? 1 : 0);
507 count = 1;
508 if (partitions[0]->current_place != parent->current_place) {
509 proc_bind_die(
510 proc_bind, T, P,
511 "Primary thread's place (%d) is not parent thread's place (%d)\n",
512 partitions[0]->current_place, parent->current_place);
514 for (i = 1; i < T; ++i) {
515 current_place = partitions[i]->current_place;
516 if (current_place == parent->place_nums[j]) {
517 count++;
518 if (count > high) {
519 proc_bind_die(
520 proc_bind, T, P,
521 "Too many threads have place %d for their current place\n",
522 current_place);
524 } else {
525 if (count < low) {
526 proc_bind_die(
527 proc_bind, T, P,
528 "Not enough threads have place %d for their current place\n",
529 parent->place_nums[j]);
531 j = (j + 1) % parent->num_places;
532 if (current_place != parent->place_nums[j]) {
533 proc_bind_die(
534 proc_bind, T, P,
535 "Thread %d's place (%d) is not corret. Expected %d\n", i,
536 partitions[i]->current_place, parent->place_nums[j]);
538 count = 1;
542 } else {
543 // proc_bind_primary
544 // Every thread should be assigned to the primary thread's place
545 for (i = 0; i < T; ++i) {
546 if (partitions[i]->current_place != parent->current_place) {
547 proc_bind_die(
548 proc_bind, T, P,
549 "Thread %d's place (%d) is not the primary thread's place (%d)\n",
550 i, partitions[i]->current_place, parent->current_place);
556 // Check that each partition's current place is within the partition
557 for (i = 0; i < T; ++i) {
558 current_place = partitions[i]->current_place;
559 num_places = partitions[i]->num_places;
560 first = partitions[i]->place_nums[0];
561 last = partitions[i]->place_nums[num_places - 1];
562 for (j = 0; j < num_places; ++j)
563 if (partitions[i]->place_nums[j] == current_place)
564 break;
565 if (j == num_places) {
566 proc_bind_die(proc_bind, T, P,
567 "Thread %d's current place (%d) is not within its "
568 "partition [%d, %d]\n",
569 i, current_place, first, last);
573 free(partitions);
576 #endif