2 * Copyright (c) 2018 Advanced Micro Devices
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file gs-max-output.cpp
27 * Stress the limits of what a geometry shader can output using a generic
28 * geometry shader with points as input and output primitives, allowing
30 * - number of input instances (instanced draws)
31 * - number of input points per instance
32 * - number of invocations (GS instances)
33 * - number of output vertices per invocation
34 * - number of output components per vertex
37 #include "piglit-util-gl.h"
42 #define WINDOW_SIZE 256
44 PIGLIT_GL_TEST_CONFIG_BEGIN
46 config
.supports_gl_compat_version
= 32;
47 config
.supports_gl_core_version
= 32;
48 config
.window_width
= WINDOW_SIZE
;
49 config
.window_height
= WINDOW_SIZE
;
50 config
.window_visual
= PIGLIT_GL_VISUAL_DOUBLE
| PIGLIT_GL_VISUAL_RGBA
;
51 config
.khr_no_error_support
= PIGLIT_NO_ERRORS
;
53 PIGLIT_GL_TEST_CONFIG_END
56 #define STR(x) PASTE(x)
59 unsigned num_instances
; /* draw instances */
60 unsigned num_points
; /* draw size / count */
61 unsigned num_invocations
; /* GS invocations / instances */
62 unsigned num_outputs
; /* # vertex output per GS invocation */
63 unsigned num_components
; /* # extra components per GS output vertex */
68 struct fragmentshaderkey
{
69 unsigned num_components
;
71 bool operator<(const fragmentshaderkey
&o
) const {
72 return num_components
< o
.num_components
;
76 struct geometryshaderkey
{
77 unsigned num_invocations
;
79 unsigned num_components
;
81 bool operator<(const geometryshaderkey
&o
) const {
82 if (num_invocations
< o
.num_invocations
)
84 if (num_invocations
> o
.num_invocations
)
86 if (num_outputs
< o
.num_outputs
)
88 if (num_outputs
> o
.num_outputs
)
90 return num_components
< o
.num_components
;
94 static std::map
<fragmentshaderkey
, GLuint
> fragmentshaders
;
95 static std::map
<geometryshaderkey
, GLuint
> testprograms
;
97 static const struct testcase default_testcase
= {
101 static GLuint vs_shader
;
103 static std::vector
<testcase
> testcases
;
104 static GLuint max_gs_invocations
;
105 static GLuint max_gs_out_vertices
;
106 static GLuint max_gs_total_out_components
;
107 static GLuint max_gs_out_components
;
108 static unsigned max_gs_out_vertices_real
;
110 static const char vs_text
[] =
113 "uniform int u_verts_per_instance;\n"
115 "out int vs_gs_id;\n"
118 " vs_gs_id = gl_InstanceID * u_verts_per_instance + gl_VertexID;\n"
121 /* Those numbers really don't matter much for what we're trying to do here. */
122 #define GEN_SEQUENCE \
123 "int seq_next(int x) {\n" \
124 " x = (x + 1) * 709900053;\n" \
125 " x = x ^ (x >> 17);\n" \
129 static const char gs_text
[] =
131 "#extension GL_ARB_gpu_shader5 : require\n"
133 "#define NUM_INVOCATIONS %d\n"
134 "#define NUM_OUT_VERTICES %d\n"
135 "#define NUM_PAYLOAD_COMPONENTS %d\n"
137 "layout(points, invocations = NUM_INVOCATIONS) in;\n"
138 "layout(points, max_vertices = NUM_OUT_VERTICES) out;\n"
140 "in int vs_gs_id[];\n"
141 "#if NUM_PAYLOAD_COMPONENTS\n"
142 "flat out int gs_ps_data[NUM_PAYLOAD_COMPONENTS];\n"
148 " for (int i = 0; i < NUM_OUT_VERTICES; ++i) {\n"
149 " int id = (vs_gs_id[0] * NUM_INVOCATIONS + gl_InvocationID) * NUM_OUT_VERTICES + i;\n"
150 " int x = id %% " STR(WINDOW_SIZE
) ";\n"
151 " int y = id / " STR(WINDOW_SIZE
) ";\n"
152 " gl_Position.x = (float(x) + 0.5) / " STR(WINDOW_SIZE
) " * 2.0 - 1.0;\n"
153 " gl_Position.y = (float(y) + 0.5) / " STR(WINDOW_SIZE
) " * 2.0 - 1.0;\n"
154 " gl_Position.z = 0.0;\n"
155 " gl_Position.w = 1.0;\n"
157 "#if NUM_PAYLOAD_COMPONENTS\n"
159 " for (int j = 0; j < NUM_PAYLOAD_COMPONENTS; ++j) {\n"
160 " gs_ps_data[j] = val;\n"
161 " val = seq_next(val);\n"
169 static const char fs_text
[] =
172 "#define NUM_PAYLOAD_COMPONENTS %d\n"
174 "#if NUM_PAYLOAD_COMPONENTS\n"
175 "flat in int gs_ps_data[NUM_PAYLOAD_COMPONENTS];\n"
177 "out vec4 out_color;\n"
182 "#if NUM_PAYLOAD_COMPONENTS\n"
183 " int id = int(gl_FragCoord.y) * " STR(WINDOW_SIZE
) " + int(gl_FragCoord.x);\n"
185 " for (int j = 0; j < NUM_PAYLOAD_COMPONENTS; ++j) {\n"
186 " if (val != gs_ps_data[j]) {\n"
187 " out_color.x = 1.0;\n"
188 " out_color.y = float(j) / (NUM_PAYLOAD_COMPONENTS - 1);\n"
189 " out_color.z = float(val & 0xff) / 255;\n"
190 " out_color.w = float(gs_ps_data[j] & 0xff) / 255;\n"
193 " val = seq_next(val);\n"
196 " out_color = vec4(0, 1, 0, 1);\n"
200 print_testcase(const struct testcase
*tc
)
202 printf("Case: instances = %u points = %u invocations = %u "
203 "outputs = %u components = %u\n",
204 tc
->num_instances
, tc
->num_points
, tc
->num_invocations
,
205 tc
->num_outputs
, tc
->num_components
);
209 add_testcase(const struct testcase
*tc
)
211 if (tc
->num_instances
> 64 * 1024 ||
212 tc
->num_points
> 64 * 1024 ||
213 tc
->num_invocations
> 64 * 1024 ||
214 tc
->num_outputs
> 64 * 1024 ||
215 tc
->num_components
> 64 * 1024) {
216 fprintf(stderr
, "Excessive test case size. Are you sure?\n");
221 /* Check against implementation-defined limits. */
222 if (tc
->num_outputs
> (unsigned)max_gs_out_vertices
) {
223 fprintf(stderr
, "Too many output vertices (max: %d)\n",
224 max_gs_out_vertices
);
228 if (tc
->num_outputs
* (4 + tc
->num_components
) > (unsigned)max_gs_total_out_components
) {
229 fprintf(stderr
, "Too many output components (max: %d)\n",
230 max_gs_total_out_components
);
234 if (tc
->num_invocations
> (unsigned)max_gs_invocations
) {
235 fprintf(stderr
, "Too many GS invocations (max: %d)\n",
241 /* Compile GS shader and link program */
242 geometryshaderkey gskey
;
243 gskey
.num_invocations
= tc
->num_invocations
;
244 gskey
.num_outputs
= tc
->num_outputs
;
245 gskey
.num_components
= tc
->num_components
;
246 if (testprograms
.find(gskey
) == testprograms
.end()) {
249 fragmentshaderkey fskey
;
250 fskey
.num_components
= tc
->num_components
;
251 std::map
<fragmentshaderkey
, GLuint
>::const_iterator fsit
=
252 fragmentshaders
.find(fskey
);
253 if (fsit
== fragmentshaders
.end()) {
254 if (asprintf(&text
, fs_text
, tc
->num_components
) < 0)
257 piglit_compile_shader_text(GL_FRAGMENT_SHADER
, text
);
260 fsit
= fragmentshaders
.insert(std::make_pair(fskey
, fs_shader
)).first
;
263 if (asprintf(&text
, gs_text
, tc
->num_invocations
, tc
->num_outputs
,
264 tc
->num_components
) < 0)
267 piglit_compile_shader_text(GL_GEOMETRY_SHADER
, text
);
270 GLuint prog
= glCreateProgram();
271 glAttachShader(prog
, vs_shader
);
272 glAttachShader(prog
, gs_shader
);
273 glAttachShader(prog
, fsit
->second
);
275 if (!piglit_link_check_status(prog
))
276 piglit_report_result(PIGLIT_FAIL
);
278 glDeleteShader(gs_shader
);
280 testprograms
.insert(std::make_pair(gskey
, prog
));
283 testcases
.push_back(*tc
);
287 run_testcase(const struct testcase
*tc
)
291 glClearColor(0, 0, 0, 1);
292 glClear(GL_COLOR_BUFFER_BIT
);
294 geometryshaderkey gskey
;
295 gskey
.num_invocations
= tc
->num_invocations
;
296 gskey
.num_outputs
= tc
->num_outputs
;
297 gskey
.num_components
= tc
->num_components
;
298 std::map
<geometryshaderkey
, GLuint
>::const_iterator progit
=
299 testprograms
.find(gskey
);
300 assert(progit
!= testprograms
.end());
302 glUseProgram(progit
->second
);
303 glUniform1i(glGetUniformLocation(progit
->second
, "u_verts_per_instance"),
306 glDrawArraysInstanced(GL_POINTS
, 0, tc
->num_points
, tc
->num_instances
);
308 float *expected
= new float[WINDOW_SIZE
* WINDOW_SIZE
* 4];
310 tc
->num_instances
* tc
->num_points
* tc
->num_invocations
* tc
->num_outputs
;
311 memset(expected
, 0, sizeof(float) * WINDOW_SIZE
* WINDOW_SIZE
* 4);
313 for (unsigned i
= 0; i
< WINDOW_SIZE
* WINDOW_SIZE
; ++i
) {
315 expected
[4 * i
+ 1] = 1.0;
316 expected
[4 * i
+ 3] = 1.0;
319 int result
= piglit_probe_image_rgba(0, 0, WINDOW_SIZE
, WINDOW_SIZE
, expected
);
325 generate_testcases_max(const testcase
&tc
, bool explicit_instances
, bool explicit_points
)
327 unsigned amplify
= tc
.num_invocations
* tc
.num_outputs
;
328 double target_in_points
= double(WINDOW_SIZE
* WINDOW_SIZE
) / amplify
;
330 if (!explicit_instances
) {
332 tc1
.num_instances
= MAX2(1, (unsigned)(target_in_points
/ tc1
.num_points
));
336 if (!explicit_points
) {
338 tc1
.num_points
= MAX2(1, (unsigned)(target_in_points
/ tc1
.num_instances
));
342 if (!explicit_instances
&& !explicit_points
) {
344 tc1
.num_instances
= MAX2(1, (unsigned)sqrt(target_in_points
));
345 tc1
.num_points
= MAX2(1, (unsigned)(target_in_points
/ tc1
.num_instances
));
349 if (explicit_instances
&& explicit_points
)
354 piglit_init(int argc
, char **argv
)
356 bool explicit_instances
= false;
357 bool explicit_points
= false;
358 bool explicit_invocations
= false;
359 bool explicit_outputs
= false;
360 bool explicit_components
= false;
361 bool scan_mode
= false;
362 unsigned scan_seed
= 0;
363 unsigned scan_count
= 0;
364 struct testcase explicit_testcase
;
366 piglit_require_extension("GL_ARB_gpu_shader5");
367 memcpy(&explicit_testcase
, &default_testcase
, sizeof(explicit_testcase
));
370 for (i
= 1; i
+ 1 < argc
; ++i
) {
371 if (!strcmp(argv
[i
], "-instances")) {
372 explicit_testcase
.num_instances
= atoi(argv
[i
+ 1]);
373 explicit_instances
= true;
375 } else if (!strcmp(argv
[i
], "-points")) {
376 explicit_testcase
.num_points
= atoi(argv
[i
+ 1]);
377 explicit_points
= true;
379 } else if (!strcmp(argv
[i
], "-invocations")) {
380 explicit_testcase
.num_invocations
= atoi(argv
[i
+ 1]);
381 explicit_invocations
= true;
383 } else if (!strcmp(argv
[i
], "-outputs")) {
384 explicit_testcase
.num_outputs
= atoi(argv
[i
+ 1]);
385 explicit_outputs
= true;
387 } else if (!strcmp(argv
[i
], "-components")) {
388 explicit_testcase
.num_components
= atoi(argv
[i
+ 1]);
389 explicit_components
= true;
391 } else if (!strcmp(argv
[i
], "-scan")) {
393 fprintf(stderr
, "-scan: too few arguments\n");
396 scan_seed
= atoi(argv
[i
+ 1]);
397 scan_count
= atoi(argv
[i
+ 2]);
404 fprintf(stderr
, "Unknown argument: %s\n", argv
[i
]);
408 vs_shader
= piglit_compile_shader_text(GL_VERTEX_SHADER
, vs_text
);
409 if (!piglit_check_gl_error(GL_NO_ERROR
))
410 piglit_report_result(PIGLIT_FAIL
);
412 /* Various other GL objects needed by the test */
413 glGenVertexArrays(1, &vao
);
414 glBindVertexArray(vao
);
416 glGetIntegerv(GL_MAX_GEOMETRY_OUTPUT_VERTICES
, (GLint
*)&max_gs_out_vertices
);
417 glGetIntegerv(GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS
,
418 (GLint
*)&max_gs_total_out_components
);
419 glGetIntegerv(GL_MAX_GEOMETRY_OUTPUT_COMPONENTS
,
420 (GLint
*)&max_gs_out_components
);
421 glGetIntegerv(GL_MAX_GEOMETRY_SHADER_INVOCATIONS
, (GLint
*)&max_gs_invocations
);
422 if (!piglit_check_gl_error(GL_NO_ERROR
))
423 piglit_report_result(PIGLIT_FAIL
);
425 max_gs_out_vertices_real
= MIN2(max_gs_out_vertices
,
426 max_gs_total_out_components
/ 4);
431 /* First, generate test cases that max out each of the dimensions */
432 testcase tc0
= explicit_testcase
;
433 if (!explicit_invocations
)
434 tc0
.num_invocations
= max_gs_invocations
;
436 if (!explicit_outputs
) {
439 if (!explicit_components
) {
440 tc1
.num_outputs
= max_gs_out_vertices_real
;
441 tc1
.num_components
= MIN2(max_gs_total_out_components
/ tc1
.num_outputs
,
442 max_gs_out_components
) - 4;
444 tc1
.num_outputs
= MIN2(max_gs_total_out_components
/ (4 + tc1
.num_components
),
445 max_gs_out_vertices_real
);
448 generate_testcases_max(tc1
, explicit_instances
, explicit_points
);
451 if (!explicit_components
) {
454 if (!explicit_outputs
) {
455 tc1
.num_components
= max_gs_out_components
- 4;
456 tc1
.num_outputs
= MIN2(max_gs_total_out_components
/ (4 + tc1
.num_components
),
457 max_gs_out_vertices_real
);
459 tc1
.num_components
= MIN2(max_gs_total_out_components
/ tc1
.num_outputs
,
460 max_gs_out_components
) - 4;
463 generate_testcases_max(tc1
, explicit_instances
, explicit_points
);
466 if (explicit_outputs
&& explicit_components
)
467 generate_testcases_max(tc0
, explicit_instances
, explicit_points
);
469 /* Generate additional tests randomly */
470 while (testcases
.size() < scan_count
) {
471 testcase tc
= explicit_testcase
;
473 if (!explicit_outputs
|| !explicit_components
) {
474 if (explicit_outputs
|| rand() & 1) {
475 unsigned max_components
=
476 MIN2(max_gs_total_out_components
/ tc
.num_outputs
,
477 max_gs_out_components
) - 4;
478 tc
.num_components
= rand() % (max_components
+ 1);
480 if (!explicit_outputs
) {
481 unsigned max_outputs
=
482 MIN2(max_gs_total_out_components
/ (4 + tc
.num_components
),
483 max_gs_out_vertices_real
);
484 tc
.num_outputs
= 1 + rand() % max_outputs
;
487 unsigned max_outputs
=
488 MIN2(max_gs_total_out_components
/ (4 + tc
.num_components
),
489 max_gs_out_vertices_real
);
490 tc
.num_outputs
= 1 + rand() % max_outputs
;
492 if (!explicit_components
) {
493 unsigned max_components
=
494 MIN2(max_gs_total_out_components
/ tc
.num_outputs
,
495 max_gs_out_components
) - 4;
496 tc
.num_components
= rand() % (max_components
+ 1);
501 if (!explicit_invocations
)
502 tc
.num_invocations
= 1 + rand() % max_gs_invocations
;
504 unsigned amplify
= tc
.num_invocations
* tc
.num_outputs
;
505 unsigned target_in_points
=
506 MAX2(1, (WINDOW_SIZE
* WINDOW_SIZE
+ amplify
- 1) / amplify
);
508 switch (rand() % 4) {
510 tc
.num_points
= 1 + rand() % target_in_points
;
511 tc
.num_instances
= 1 + rand() % (1 + target_in_points
/ tc
.num_points
);
514 tc
.num_instances
= 1 + rand() % target_in_points
;
515 tc
.num_points
= 1 + rand() % (1 + target_in_points
/ tc
.num_instances
);
518 unsigned min
= MAX2(1, sqrt(target_in_points
) / 2);
519 unsigned max
= MIN2(sqrt(target_in_points
) * 3 / 2,
521 tc
.num_instances
= min
+ rand() % (max
- min
+ 1);
522 tc
.num_points
= 1 + rand() % (1 + target_in_points
/ tc
.num_instances
);
530 add_testcase(&explicit_testcase
);
539 for (unsigned i
= 0; i
< testcases
.size(); ++i
) {
540 if (!run_testcase(&testcases
[i
]))
544 if (!piglit_check_gl_error(GL_NO_ERROR
))
547 piglit_present_results();
549 return pass
? PIGLIT_PASS
: PIGLIT_FAIL
;