2 * Copyright (C) 2018 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Measure primitive rate under various circumstances.
28 * - rasterizer discard
31 * - degenerate primitives
32 * - subpixel primitives
39 #include "piglit-util-gl.h"
41 /* this must be a power of two to prevent precision issues */
42 #define WINDOW_SIZE 1024
44 PIGLIT_GL_TEST_CONFIG_BEGIN
46 config
.supports_gl_compat_version
= 10;
47 config
.window_width
= WINDOW_SIZE
;
48 config
.window_height
= WINDOW_SIZE
;
49 config
.window_visual
= PIGLIT_GL_VISUAL_RGBA
| PIGLIT_GL_VISUAL_DOUBLE
;
51 PIGLIT_GL_TEST_CONFIG_END
53 static unsigned gpu_freq_mhz
;
54 static GLint progs
[3];
57 piglit_init(int argc
, char **argv
)
59 for (unsigned i
= 1; i
< argc
; i
++) {
60 if (strncmp(argv
[i
], "-freq=", 6) == 0)
61 sscanf(argv
[i
] + 6, "%u", &gpu_freq_mhz
);
64 piglit_require_gl_version(32);
66 progs
[0] = piglit_build_simple_program(
69 " gl_Position = gl_Vertex; \n"
74 " gl_FragColor = vec4(1.0); \n"
77 progs
[1] = piglit_build_simple_program(
78 "#version 150 compatibility \n"
79 "varying vec4 v[4]; \n"
80 "attribute vec4 a[4]; \n"
82 " for (int i = 0; i < 4; i++) v[i] = a[i]; \n"
83 " gl_Position = gl_Vertex; \n"
86 "#version 150 compatibility \n"
87 "varying vec4 v[4]; \n"
89 " gl_FragColor = vec4(dot(v[0] + v[1] + v[2] + v[3], vec4(1.0)) == 1.0 ? 0.0 : 1.0); \n"
92 progs
[2] = piglit_build_simple_program(
93 "#version 150 compatibility \n"
94 "varying vec4 v[8]; \n"
95 "attribute vec4 a[8]; \n"
97 " for (int i = 0; i < 8; i++) v[i] = a[i]; \n"
98 " gl_Position = gl_Vertex; \n"
101 "#version 150 compatibility \n"
102 "varying vec4 v[8]; \n"
104 " gl_FragColor = vec4(dot(v[0] + v[1] + v[2] + v[3] + v[4] + v[5] + v[6] + v[7], vec4(1.0)) == 1.0 ? 0.0 : 1.0); \n"
107 glEnableClientState(GL_VERTEX_ARRAY
);
108 glEnable(GL_CULL_FACE
);
109 glPrimitiveRestartIndex(UINT32_MAX
);
113 gen_triangle_tile(unsigned num_quads_per_dim
, double prim_size_in_pixels
,
114 unsigned cull_percentage
,
115 bool back_face_culling
, bool view_culling
, bool degenerate_prims
,
116 unsigned max_vertices
, unsigned *num_vertices
, float *vertices
,
117 unsigned max_indices
, unsigned *num_indices
, unsigned *indices
)
119 /* clip space coordinates in both X and Y directions: */
120 const double first
= -1;
121 const double max_length
= 2;
122 const double d
= prim_size_in_pixels
* 2.0 / WINDOW_SIZE
;
124 assert(d
* num_quads_per_dim
<= max_length
);
125 assert(*num_vertices
== 0);
127 /* the vertex ordering is counter-clockwise */
128 for (unsigned ty
= 0; ty
< num_quads_per_dim
; ty
++) {
131 if (cull_percentage
== 0)
133 else if (cull_percentage
== 25)
135 else if (cull_percentage
== 50)
137 else if (cull_percentage
== 75)
139 else if (cull_percentage
== 100)
142 assert(!"wrong cull_percentage");
144 for (unsigned tx
= 0; tx
< num_quads_per_dim
; tx
++) {
148 /* view culling in different directions */
149 double xoffset
= 0, yoffset
= 0, zoffset
= 0;
151 if (cull
&& view_culling
) {
152 unsigned side
= (ty
/ 2) % 4;
154 if (side
== 0) xoffset
= -2;
155 else if (side
== 1) xoffset
= 2;
156 else if (side
== 2) yoffset
= -2;
157 else if (side
== 3) yoffset
= 2;
161 unsigned elem
= *num_vertices
* 3;
163 /* generate horizontal stripes with maximum reuse */
166 assert(*num_vertices
<= max_vertices
);
168 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
169 vertices
[elem
++] = yoffset
+ first
+ d
* y
;
170 vertices
[elem
++] = zoffset
;
172 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
173 vertices
[elem
++] = yoffset
+ first
+ d
* (y
+ 1);
174 vertices
[elem
++] = zoffset
;
177 int base_index
= *num_vertices
;
180 assert(*num_vertices
<= max_vertices
);
182 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
183 vertices
[elem
++] = yoffset
+ first
+ d
* y
;
184 vertices
[elem
++] = zoffset
;
186 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
187 vertices
[elem
++] = yoffset
+ first
+ d
* (y
+ 1);
188 vertices
[elem
++] = zoffset
;
190 /* generate indices */
191 unsigned idx
= *num_indices
;
193 assert(*num_indices
<= max_indices
);
195 indices
[idx
++] = base_index
- 2;
196 indices
[idx
++] = base_index
;
197 indices
[idx
++] = base_index
- 1;
199 indices
[idx
++] = base_index
- 1;
200 indices
[idx
++] = base_index
;
201 indices
[idx
++] = base_index
+ 1;
203 if (cull
&& back_face_culling
) {
204 /* switch the winding order */
205 unsigned tmp
= indices
[idx
- 6];
206 indices
[idx
- 6] = indices
[idx
- 5];
207 indices
[idx
- 5] = tmp
;
209 tmp
= indices
[idx
- 3];
210 indices
[idx
- 3] = indices
[idx
- 2];
211 indices
[idx
- 2] = tmp
;
214 if (cull
&& degenerate_prims
) {
215 indices
[idx
- 5] = indices
[idx
- 4];
216 indices
[idx
- 2] = indices
[idx
- 1];
219 unsigned elem
= *num_vertices
* 3;
221 assert(*num_vertices
<= max_vertices
);
223 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
224 vertices
[elem
++] = yoffset
+ first
+ d
* y
;
225 vertices
[elem
++] = zoffset
;
227 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
228 vertices
[elem
++] = yoffset
+ first
+ d
* y
;
229 vertices
[elem
++] = zoffset
;
231 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
232 vertices
[elem
++] = yoffset
+ first
+ d
* (y
+ 1);
233 vertices
[elem
++] = zoffset
;
235 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
236 vertices
[elem
++] = yoffset
+ first
+ d
* (y
+ 1);
237 vertices
[elem
++] = zoffset
;
239 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
240 vertices
[elem
++] = yoffset
+ first
+ d
* y
;
241 vertices
[elem
++] = zoffset
;
243 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
244 vertices
[elem
++] = yoffset
+ first
+ d
* (y
+ 1);
245 vertices
[elem
++] = zoffset
;
247 if (cull
&& back_face_culling
) {
248 /* switch the winding order */
250 memcpy(old
, vertices
+ elem
- 6*3, 6*3*4);
252 for (unsigned i
= 0; i
< 6; i
++) {
253 vertices
[elem
- 6*3 + i
*3 + 0] = old
[(5 - i
)*3 + 0];
254 vertices
[elem
- 6*3 + i
*3 + 1] = old
[(5 - i
)*3 + 1];
255 vertices
[elem
- 6*3 + i
*3 + 2] = old
[(5 - i
)*3 + 2];
259 if (cull
&& degenerate_prims
) {
260 /* use any previously generated vertices */
261 unsigned v0
= rand() % *num_vertices
;
262 unsigned v1
= rand() % *num_vertices
;
264 memcpy(&vertices
[elem
- 5*3], &vertices
[v0
*3], 12);
265 memcpy(&vertices
[elem
- 4*3], &vertices
[v0
*3], 12);
267 memcpy(&vertices
[elem
- 2*3], &vertices
[v1
*3], 12);
268 memcpy(&vertices
[elem
- 1*3], &vertices
[v1
*3], 12);
276 gen_triangle_strip_tile(unsigned num_quads_per_dim
, double prim_size_in_pixels
,
277 unsigned cull_percentage
,
278 bool back_face_culling
, bool view_culling
, bool degenerate_prims
,
279 unsigned max_vertices
, unsigned *num_vertices
, float *vertices
,
280 unsigned max_indices
, unsigned *num_indices
, unsigned *indices
)
282 /* clip space coordinates in both X and Y directions: */
283 const double first
= -1;
284 const double max_length
= 2;
285 const double d
= prim_size_in_pixels
* 2.0 / WINDOW_SIZE
;
287 assert(d
* num_quads_per_dim
<= max_length
);
288 assert(*num_vertices
== 0);
290 /* the vertex ordering is counter-clockwise */
291 for (unsigned y
= 0; y
< num_quads_per_dim
; y
++) {
294 if (cull_percentage
== 0)
296 else if (cull_percentage
== 25)
298 else if (cull_percentage
== 50)
300 else if (cull_percentage
== 75)
302 else if (cull_percentage
== 100)
305 assert(!"wrong cull_percentage");
307 /* view culling in different directions */
308 double xoffset
= 0, yoffset
= 0, zoffset
= 0;
310 if (cull
&& view_culling
) {
311 unsigned side
= (y
/ 2) % 4;
313 if (side
== 0) xoffset
= -2;
314 else if (side
== 1) xoffset
= 2;
315 else if (side
== 2) yoffset
= -2;
316 else if (side
== 3) yoffset
= 2;
319 if (cull
&& degenerate_prims
) {
320 unsigned elem
= *num_vertices
* 3;
321 *num_vertices
+= 2 + num_quads_per_dim
* 2;
322 assert(*num_vertices
<= max_vertices
);
324 for (unsigned x
= 0; x
< 2 + num_quads_per_dim
* 2; x
++) {
325 vertices
[elem
++] = 0;
326 vertices
[elem
++] = 0;
327 vertices
[elem
++] = 0;
332 unsigned elem
= *num_vertices
* 3;
333 bool add_degenerates
= y
> 0;
334 *num_vertices
+= (add_degenerates
? 4 : 0) + 2 + num_quads_per_dim
* 2;
335 assert(*num_vertices
<= max_vertices
);
341 if (cull
&& back_face_culling
) {
346 /* Add degenerated triangles to connect with the previous triangle strip. */
347 if (add_degenerates
) {
348 unsigned base
= elem
;
350 vertices
[elem
++] = vertices
[base
- 3];
351 vertices
[elem
++] = vertices
[base
- 2];
352 vertices
[elem
++] = vertices
[base
- 1];
355 for (unsigned i
= 0; i
< (add_degenerates
? 4 : 1); i
++) {
356 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
357 vertices
[elem
++] = yoffset
+ first
+ d
* y1
;
358 vertices
[elem
++] = zoffset
;
361 vertices
[elem
++] = xoffset
+ first
+ d
* x
;
362 vertices
[elem
++] = yoffset
+ first
+ d
* y0
;
363 vertices
[elem
++] = zoffset
;
365 for (; x
< num_quads_per_dim
; x
++) {
366 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
367 vertices
[elem
++] = yoffset
+ first
+ d
* y1
;
368 vertices
[elem
++] = zoffset
;
370 vertices
[elem
++] = xoffset
+ first
+ d
* (x
+ 1);
371 vertices
[elem
++] = yoffset
+ first
+ d
* y0
;
372 vertices
[elem
++] = zoffset
;
377 for (unsigned i
= 0; i
< *num_vertices
; i
++)
380 *num_indices
= *num_vertices
;
388 INDEXED_TRIANGLE_STRIP
,
389 INDEXED_TRIANGLE_STRIP_PRIM_RESTART
,
393 static enum draw_method global_draw_method
;
394 static unsigned count
;
395 static unsigned num_duplicates
;
396 static unsigned duplicate_index
;
397 static unsigned vb_size
, ib_size
;
400 run_draw(unsigned iterations
)
402 for (unsigned i
= 0; i
< iterations
; i
++) {
403 if (global_draw_method
== INDEXED_TRIANGLES
) {
404 glDrawElements(GL_TRIANGLES
, count
,
406 (void*)(long)(ib_size
* duplicate_index
));
407 } else if (global_draw_method
== TRIANGLES
) {
408 glDrawArrays(GL_TRIANGLES
, (vb_size
/ 12) * duplicate_index
, count
);
409 } else if (global_draw_method
== TRIANGLE_STRIP
) {
410 glDrawArrays(GL_TRIANGLE_STRIP
, (vb_size
/ 12) * duplicate_index
, count
);
411 } else if (global_draw_method
== INDEXED_TRIANGLE_STRIP
||
412 global_draw_method
== INDEXED_TRIANGLE_STRIP_PRIM_RESTART
) {
413 glDrawElements(GL_TRIANGLE_STRIP
, count
,
415 (void*)(long)(ib_size
* duplicate_index
));
418 duplicate_index
= (duplicate_index
+ 1) % num_duplicates
;
433 run_test(unsigned debug_num_iterations
, enum draw_method draw_method
,
434 enum cull_method cull_method
, unsigned num_quads_per_dim
,
435 double quad_size_in_pixels
, unsigned cull_percentage
)
437 const unsigned max_indices
= 8100000 * 3;
438 const unsigned max_vertices
= max_indices
;
440 while (num_quads_per_dim
* quad_size_in_pixels
>= WINDOW_SIZE
)
441 quad_size_in_pixels
*= 0.5;
443 /* Generate vertices. */
444 float *vertices
= (float*)malloc(max_vertices
* 12);
445 unsigned *indices
= NULL
;
447 if (draw_method
== INDEXED_TRIANGLES
||
448 draw_method
== INDEXED_TRIANGLE_STRIP
||
449 draw_method
== INDEXED_TRIANGLE_STRIP_PRIM_RESTART
)
450 indices
= (unsigned*)malloc(max_indices
* 4);
452 unsigned num_vertices
= 0, num_indices
= 0;
453 if (draw_method
== TRIANGLE_STRIP
||
454 draw_method
== INDEXED_TRIANGLE_STRIP
||
455 draw_method
== INDEXED_TRIANGLE_STRIP_PRIM_RESTART
) {
456 gen_triangle_strip_tile(num_quads_per_dim
, quad_size_in_pixels
,
458 cull_method
== BACK_FACE_CULLING
,
459 cull_method
== VIEW_CULLING
,
460 cull_method
== DEGENERATE_PRIMS
,
461 max_vertices
, &num_vertices
, vertices
,
462 max_indices
, &num_indices
, indices
);
464 gen_triangle_tile(num_quads_per_dim
, quad_size_in_pixels
,
466 cull_method
== BACK_FACE_CULLING
,
467 cull_method
== VIEW_CULLING
,
468 cull_method
== DEGENERATE_PRIMS
,
469 max_vertices
, &num_vertices
, vertices
,
470 max_indices
, &num_indices
, indices
);
473 vb_size
= num_vertices
* 12;
474 ib_size
= num_indices
* 4;
476 /* Duplicate buffers and switch between them, so that no data is cached
477 * between draws. 32 MB should be greater than any cache.
479 num_duplicates
= MAX2(1, 32*1024*1024 / vb_size
);
481 /* Create buffers. */
483 glGenBuffers(1, &vb
);
484 glBindBuffer(GL_ARRAY_BUFFER
, vb
);
485 glBufferData(GL_ARRAY_BUFFER
,
486 vb_size
* num_duplicates
, NULL
, GL_STATIC_DRAW
);
487 for (unsigned i
= 0; i
< num_duplicates
; i
++)
488 glBufferSubData(GL_ARRAY_BUFFER
, vb_size
* i
, vb_size
, vertices
);
492 glGenBuffers(1, &ib
);
493 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER
, ib
);
494 glBufferData(GL_ELEMENT_ARRAY_BUFFER
,
495 ib_size
* num_duplicates
, NULL
,
497 for (unsigned i
= 0; i
< num_duplicates
; i
++) {
498 glBufferSubData(GL_ELEMENT_ARRAY_BUFFER
, ib_size
* i
,
503 /* Make sure all uploads are finished. */
507 if (cull_method
== RASTERIZER_DISCARD
)
508 glEnable(GL_RASTERIZER_DISCARD
);
509 if (draw_method
== INDEXED_TRIANGLE_STRIP_PRIM_RESTART
)
510 glEnable(GL_PRIMITIVE_RESTART
);
512 glBindBuffer(GL_ARRAY_BUFFER
, vb
);
513 glVertexPointer(3, GL_FLOAT
, 0, NULL
);
516 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER
, ib
);
518 global_draw_method
= draw_method
;
519 count
= indices
? num_indices
: num_vertices
;
524 if (debug_num_iterations
)
525 run_draw(debug_num_iterations
);
527 rate
= perf_measure_gpu_rate(run_draw
, 0.05);
529 if (cull_method
== RASTERIZER_DISCARD
)
530 glDisable(GL_RASTERIZER_DISCARD
);
531 if (draw_method
== INDEXED_TRIANGLE_STRIP_PRIM_RESTART
)
532 glDisable(GL_PRIMITIVE_RESTART
);
535 glDeleteBuffers(1, &vb
);
537 glDeleteBuffers(1, &ib
);
542 run(enum draw_method draw_method
, enum cull_method cull_method
,
543 const unsigned *num_quads_per_dim
, const unsigned *num_prims
,
544 unsigned num_prim_sets
)
546 unsigned num_subtests
= 1;
547 static unsigned cull_percentages
[] = {100, 75, 50, 25};
548 static double quad_sizes_in_pixels
[] = {1.0 / 7, 0.25, 0.5};
550 if (cull_method
== BACK_FACE_CULLING
||
551 cull_method
== VIEW_CULLING
) {
552 num_subtests
= ARRAY_SIZE(cull_percentages
);
553 } else if (cull_method
== SUBPIXEL_PRIMS
) {
554 num_subtests
= ARRAY_SIZE(quad_sizes_in_pixels
);
557 for (unsigned subtest
= 0; subtest
< num_subtests
; subtest
++) {
558 /* 2 is the maximum prim size when everything fits into the window */
559 double quad_size_in_pixels
;
560 unsigned cull_percentage
;
562 if (cull_method
== SUBPIXEL_PRIMS
) {
563 quad_size_in_pixels
= quad_sizes_in_pixels
[subtest
];
566 quad_size_in_pixels
= 2;
567 cull_percentage
= cull_percentages
[subtest
];
571 draw_method
== INDEXED_TRIANGLES
? "glDrawElements" :
572 draw_method
== TRIANGLES
? "glDrawArraysT" :
573 draw_method
== TRIANGLE_STRIP
? "glDrawArraysTS" :
574 draw_method
== INDEXED_TRIANGLE_STRIP
? "glDrawElemsTS" :
577 if (cull_method
== NONE
||
578 cull_method
== RASTERIZER_DISCARD
) {
580 cull_method
== NONE
? "none" : "rasterizer discard");
581 } else if (cull_method
== SUBPIXEL_PRIMS
) {
582 printf("%2u small prims/pixel ",
583 (unsigned)((1.0 / quad_size_in_pixels
) *
584 (1.0 / quad_size_in_pixels
) * 2));
586 printf("%3u%% %-16s", cull_percentage
,
587 cull_method
== BACK_FACE_CULLING
? "back faces" :
588 cull_method
== VIEW_CULLING
? "culled by view" :
589 cull_method
== DEGENERATE_PRIMS
? "degenerate prims" :
594 for (unsigned prog
= 0; prog
< ARRAY_SIZE(progs
); prog
++) {
595 glUseProgram(progs
[prog
]);
600 for (int i
= 0; i
< num_prim_sets
; i
++) {
601 double rate
= run_test(0, draw_method
, cull_method
,
602 num_quads_per_dim
[i
],
603 quad_size_in_pixels
, cull_percentage
);
604 rate
*= num_prims
[i
];
607 rate
/= gpu_freq_mhz
* 1000000.0;
608 printf(",%7.4f", rate
);
610 printf(",%7.4f", rate
/ 1000000000);
622 glClear(GL_COLOR_BUFFER_BIT
| GL_DEPTH_BUFFER_BIT
);
626 glUseProgram(progs
[0]);
627 run_test(1, INDEXED_TRIANGLE_STRIP
, BACK_FACE_CULLING
, ceil(sqrt(0.5 * 512000)), 2, 50);
628 piglit_swap_buffers();
632 const unsigned num_quads_per_dim
[] = {
633 /* The second number is the approx. number of primitives. */
634 ceil(sqrt(0.5 * 1000)),
635 ceil(sqrt(0.5 * 2000)),
636 ceil(sqrt(0.5 * 4000)),
637 ceil(sqrt(0.5 * 6000)),
638 ceil(sqrt(0.5 * 8000)),
639 ceil(sqrt(0.5 * 16000)),
640 ceil(sqrt(0.5 * 32000)),
641 ceil(sqrt(0.5 * 128000)),
642 ceil(sqrt(0.5 * 512000)),
643 /* 512000 is the maximum number when everything fits into the window */
644 /* After that, the prim size decreases, so you'll get subpixel prims. */
645 ceil(sqrt(0.5 * 2000000)),
646 ceil(sqrt(0.5 * 8000000)),
649 unsigned num_prims
[ARRAY_SIZE(num_quads_per_dim
)];
650 for (int i
= 0; i
< ARRAY_SIZE(num_quads_per_dim
); i
++)
651 num_prims
[i
] = num_quads_per_dim
[i
] * num_quads_per_dim
[i
] * 2;
653 printf(" Measuring %-27s, 0 Varying 4 Varyings 8 Varyings\n",
654 gpu_freq_mhz
? "Prims/clock," : "GPrims/second,");
655 printf(" Draw Call , Cull Method ");
657 for (unsigned prog
= 0; prog
< ARRAY_SIZE(progs
); prog
++) {
660 for (int i
= 0; i
< ARRAY_SIZE(num_prims
); i
++)
661 printf(", %4uK", num_prims
[i
] / 1000);
665 for (int cull_method
= 0; cull_method
< NUM_CULL_METHODS
; cull_method
++)
666 run(INDEXED_TRIANGLES
, cull_method
, num_quads_per_dim
, num_prims
, ARRAY_SIZE(num_prims
));
668 /* glDrawArrays: Only test NONE, BACK_FACE_CULLING, and RASTERIZER_DISCARD. */
669 for (int draw_method
= TRIANGLES
; draw_method
< NUM_DRAW_METHODS
; draw_method
++) {
670 for (int cull_method
= 0; cull_method
<= RASTERIZER_DISCARD
; cull_method
++)
671 run(draw_method
, cull_method
, num_quads_per_dim
, num_prims
, ARRAY_SIZE(num_prims
));