3 # Copyright © 2018 Intel Corporation
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the "Software"),
7 # to deal in the Software without restriction, including without limitation
8 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 # and/or sell copies of the Software, and to permit persons to whom the
10 # Software is furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice (including the next
13 # paragraph) shall be included in all copies or substantial portions of the
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 # DEALINGS IN THE SOFTWARE.
24 from __future__
import print_function
, division
, absolute_import
30 from mako
import exceptions
32 from templates
import template_file
33 from modules
import utils
35 def _as_type(val
, type):
36 """Helper that casts with overflow"""
37 return np
.array([val
]).astype(type)[0]
41 return _as_type(val
, np
.int32
)
45 return _as_type(val
, np
.int64
)
49 return _as_type(val
, np
.uint32
)
53 return _as_type(val
, np
.uint64
)
56 def generate_results_commutative(srcs
, operator
):
57 """Generate results for an operator that is commutative.
59 Commutative operators will only generate an upper-right triangular
60 matrix of results, and the diagonal will be missing.
63 for i
in range(len(srcs
) - 1):
64 for j
in range(i
+ 1, len(srcs
)):
65 results
.append(operator(srcs
[i
], srcs
[j
]))
70 def generate_results_commutative_with_diagonal(srcs
, operator
):
71 """Generate results for an operator that is commutative.
73 Commutative operators will only generate an upper-right triangular
74 matrix of results, but the diagonal must also be explicitly stored.
77 for i
in range(len(srcs
)):
78 for j
in range(i
, len(srcs
)):
79 results
.append(operator(srcs
[i
], srcs
[j
]))
84 def generate_results_without_diagonal(srcs
, operator
):
85 """Generate full matrix of results without the diagonal."""
87 for i
in range(len(srcs
)):
88 for j
in range(len(srcs
)):
90 results
.append(operator(srcs
[i
], srcs
[j
]))
95 def generate_results_empty(unused1
, unused2
):
96 """Some tests don't need any explicit results stored in the shader."""
100 def abs_isub32(_a
, _b
):
101 a
= int32(uint32(_a
))
102 b
= int32(uint32(_b
))
105 np
.seterr(over
='ignore')
106 ret
= np
.uint32(a
- b
) if a
> b
else np
.uint32(b
- a
)
107 np
.seterr(over
=err
['over'])
112 def abs_isub64(_a
, _b
):
114 b
= _b
.astype(np
.int64
)
117 np
.seterr(over
='ignore')
118 ret
= np
.uint64(a
- b
) if a
> b
else np
.uint64(b
- a
)
119 np
.seterr(over
=err
['over'])
124 def abs_usub32(_a
, _b
):
128 return a
- b
if a
> b
else b
- a
131 def abs_usub64(_a
, _b
):
135 return a
- b
if a
> b
else b
- a
138 def iadd_sat32(_a
, _b
):
139 a
= int32(uint32(_a
))
140 b
= int32(uint32(_b
))
143 if b
> (np
.iinfo(np
.int32
).max - a
):
144 return np
.iinfo(np
.int32
).max
146 if b
< (np
.iinfo(np
.int32
).min - a
):
147 return np
.iinfo(np
.int32
).min
152 def uadd_sat32(_a
, _b
):
156 if b
> (np
.iinfo(np
.uint32
).max - a
):
157 return np
.iinfo(np
.uint32
).max
162 def iadd_sat64(_a
, _b
):
167 if b
> (np
.iinfo(np
.int64
).max - a
):
168 return np
.iinfo(np
.int64
).max
170 if b
< (np
.iinfo(np
.int64
).min - a
):
171 return np
.iinfo(np
.int64
).min
176 def uadd_sat64(_a
, _b
):
180 if b
> (np
.iinfo(np
.uint64
).max - a
):
181 return np
.iinfo(np
.uint64
).max
186 def isub_sat32(a
, b
):
187 r
= int64(int32(a
)) - int64(int32(b
))
189 if r
> int64(0x07fffffff):
190 return int32(0x7fffffff)
192 if r
< int64(-0x080000000):
193 return int32(-0x80000000)
198 def usub_sat32(_a
, _b
):
202 return a
- b
if a
> b
else uint32(0)
205 def isub_sat64(_a
, _b
):
210 if (a
- np
.iinfo(np
.int64
).max) > b
:
211 return np
.iinfo(np
.int64
).max
213 if a
< (np
.iinfo(np
.int64
).min + b
):
214 return np
.iinfo(np
.int64
).min
219 def usub_sat64(_a
, _b
):
223 return a
- b
if a
> b
else uint64(0)
226 def u_hadd32(_a
, _b
):
230 return (a
>> 1) + (b
>> 1) + ((a
& b
) & 1)
233 def s_hadd32(_a
, _b
):
234 a
= int32(uint32(_a
))
235 b
= int32(uint32(_b
))
237 return (a
>> 1) + (b
>> 1) + ((a
& b
) & 1)
240 def u_hadd64(_a
, _b
):
244 return (a
>> uint64(1)) + (b
>> uint64(1)) + ((a
& b
) & uint64(1))
247 def s_hadd64(_a
, _b
):
251 return (a
>> int64(1)) + (b
>> int64(1)) + ((a
& b
) & int64(1))
254 def u_rhadd32(_a
, _b
):
258 return (a
>> 1) + (b
>> 1) + ((a | b
) & 1)
261 def s_rhadd32(_a
, _b
):
262 a
= int32(uint32(_a
))
263 b
= int32(uint32(_b
))
265 return (a
>> 1) + (b
>> 1) + ((a | b
) & 1)
268 def u_rhadd64(_a
, _b
):
272 return (a
>> uint64(1)) + (b
>> uint64(1)) + ((a | b
) & uint64(1))
275 def s_rhadd64(_a
, _b
):
279 return (a
>> int64(1)) + (b
>> int64(1)) + ((a | b
) & int64(1))
282 def imul_32x16(a
, b
):
283 return int32(a
) * ((int32(b
) << 16) >> 16)
286 def umul_32x16(a
, b
):
287 return uint32(uint32(a
) * (uint32(b
) & 0x0000ffff))
290 def absoluteDifference32_sources():
292 for x
in range(0, 32, 4):
293 srcs
+= [ -(0x80000000 >> x
), -(0x7fffffff >> x
) ]
295 srcs
+= [-5, -3, -1, 0, 1, 3, 5]
297 for x
in range(32 - 4, 0, -4):
298 srcs
+= [ 0x7fffffff >> x
, 0x80000000 >> x
]
300 srcs
.append(0x7fffffff)
302 # Some prime numbers requiring from 14- to 32-bits to store. The last is
304 srcs
+= [ 0x00002ff9,
315 def absoluteDifference64_sources():
317 for x
in range(0, 64, 6):
318 srcs
+= [ -(0x8000000000000000 >> x
), -(0x7fffffffffffffff >> x
) ]
320 srcs
+= [-5, -3, -2, -1, 0, 1, 2, 3, 5]
322 for x
in range(64 - 4, 0, -6):
323 srcs
+= [ 0x7fffffffffffffff >> x
, 0x8000000000000000 >> x
]
325 srcs
.append(0x7fffffffffffffff)
327 # Some prime numbers requiring from 33- to 64-bits to store. The last is
329 srcs
+= [ 0x000000017ffffffb, # 33 bits
330 0x00000017ffffffef, # 37 bits
331 0x0000017ffffffff3, # 41 bits
332 0x000017ffffffffff, # 45 bits
333 0x00017fffffffffe1, # 49 bits
334 0x0005ffffffffffdd, # 51 bits
335 0x0017fffffffffff3, # 53 bits
336 0x017fffffffffffb5, # 57 bits
337 0x037fffffffffffe5, # 58 bits
338 0x17ffffffffffffe1, # 61 bits
339 0x5fffffffffffff89, # 63 bits
340 0xbfffffffffffffe1, # 64 bits
343 assert len(srcs
) == 64
344 return [uint64(x
) for x
in srcs
]
347 def addSaturate_int32_sources():
348 srcs
= [0, 1, -1, 2, 3, 0x40000000, 0x7fffffff, -0x7fffffff, -0x80000000 ]
351 for i
in range(2, 32):
352 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
355 srcs
.append(random
.randint(-0x7ffffffe, -2))
357 assert len(srcs
) == 43
361 def addSaturate_uint32_sources():
362 srcs
= [0, 1, 2, 3, 0x40000000, 0x7fffffff, 0x80000000, 0xf0f0f0f0, 0xff00ff00 ]
365 for i
in range(2, 32):
366 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
368 for i
in range(43 - len(srcs
)):
369 srcs
.append(random
.randint(-0x7ffffffe, -2))
371 assert len(srcs
) == 43
375 def addSaturate_int64_sources():
376 srcs
= [0, 1, -1, 2, 3, 0x4000000000000000, 0x7fffffffffffffff, -0x7fffffffffffffff, -0x8000000000000000 ]
379 for i
in range(16, 64):
380 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
382 while len(srcs
) < 62:
383 srcs
.append(random
.randint(-0x7ffffffffffffffe, -2))
385 assert len(srcs
) == 62
386 return [int64(uint64(x
)) for x
in srcs
]
389 def addSaturate_uint64_sources():
390 srcs
= [0, 1, 2, 3, 0x4000000000000000, 0x7fffffffffffffff, 0x8000000000000000, 0xf0f0f0f0f0f0f0f0, 0xff00ff00ff00ff00 ]
393 for i
in range(16, 64):
394 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
396 while len(srcs
) < 61:
397 srcs
.append(random
.randint(0, 0xffffffffffffffff))
399 srcs
.append(uint64(0xdeadbeefdeadbeef))
401 assert len(srcs
) == 62
402 return [uint64(x
) for x
in srcs
]
405 def countLeadingZeros_sources():
409 for i
in range(1024):
413 sources
.append(0xffffffff >> num_zeros
)
415 sources
.append((random
.randint(0, 0xffffffff) |
(1 << 31)) >> num_zeros
)
420 def countTrailingZeros_sources():
424 for i
in range(1024):
428 sources
.append(0xffffffff << num_zeros
)
430 sources
.append((random
.randint(0, 0xffffffff) |
1) << num_zeros
)
435 def multiply32x16_int32_sources():
436 srcs
= [0, 1, -1, int32(-0x80000000), -0x7fffffff, 0x7fffffff ]
439 for i
in range(2, 32, 3):
440 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
442 srcs
.append(random
.randint(0, 1 << 30) |
(1 << 30))
444 for i
in range(2, 32, 3):
445 srcs
.append(-(random
.randint(0, 1 << i
) |
(1 << i
)))
447 srcs
.append(-(random
.randint(0, 1 << 30) |
(1 << 30)))
449 while len(srcs
) < 512:
450 srcs
.append(random
.randint(-0x80000000, 0x7fffffff))
455 def subtractSaturate_int32_sources():
456 srcs
= [0, 1, -1, int32(-0x80000000), -0x7fffffff, 0x7fffffff ]
459 for i
in range(2, 32, 3):
460 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
462 srcs
.append(random
.randint(0, 1 << 30) |
(1 << 30))
464 for i
in range(2, 32, 3):
465 srcs
.append(-(random
.randint(0, 1 << i
) |
(1 << i
)))
467 srcs
.append(-(random
.randint(0, 1 << 30) |
(1 << 30)))
469 while len(srcs
) < 32:
470 srcs
.append(random
.randint(-0x80000000, 0x7fffffff))
472 assert len(srcs
) == 32
473 return [int32(x
) for x
in srcs
]
476 def subtractSaturate_uint32_sources():
477 srcs
= [0, 1, 0xf0f0f0f0 ]
480 for i
in range(2, 31):
481 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
483 assert len(srcs
) == 32
487 def subtractSaturate_int64_sources():
488 srcs
= [0, 1, -1, -0x8000000000000000, -0x7fffffffffffffff, 0x7fffffffffffffff ]
491 for i
in range(2, 32, 3):
492 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
494 srcs
.append(random
.randint(0, 1 << 30) |
(1 << 30))
496 for i
in range(16, 64, 3):
497 srcs
.append(-(random
.randint(0, 1 << i
) |
(1 << i
)))
499 srcs
.append(-(random
.randint(0, 1 << 30) |
(1 << 30)))
501 while len(srcs
) < 45:
502 srcs
.append(random
.randint(-0x8000000000000000, 0x7fffffffffffffff))
504 assert len(srcs
) == 45
505 return [int64(x
) for x
in srcs
]
508 def subtractSaturate_uint64_sources():
509 srcs
= [0, 1, 0xf0f0f0f0f0f0f0f0 ]
512 for i
in range(22, 64):
513 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
515 assert len(srcs
) == 45
520 'absoluteDifference-int': {
523 'sources': absoluteDifference32_sources
,
524 'results': generate_results_commutative
,
525 'template': 'absoluteDifference.shader_test.mako',
526 'func': 'absoluteDifference',
527 'operator': abs_isub32
,
531 'absoluteDifference-uint': {
534 'sources': absoluteDifference32_sources
,
535 'results': generate_results_commutative
,
536 'template': 'absoluteDifference.shader_test.mako',
537 'func': 'absoluteDifference',
538 'operator': abs_usub32
,
542 'absoluteDifference-int64': {
544 'output': 'uint64_t',
545 'sources': absoluteDifference64_sources
,
546 'results': generate_results_commutative
,
547 'template': 'absoluteDifference.shader_test.mako',
548 'func': 'absoluteDifference',
549 'operator': abs_isub64
,
550 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
551 'extensions': 'GL_ARB_gpu_shader_int64',
553 'absoluteDifference-uint64': {
555 'output': 'uint64_t',
556 'sources': absoluteDifference64_sources
,
557 'results': generate_results_commutative
,
558 'template': 'absoluteDifference.shader_test.mako',
559 'func': 'absoluteDifference',
560 'operator': abs_usub64
,
561 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
562 'extensions': 'GL_ARB_gpu_shader_int64',
564 'countLeadingZeros-uint': {
567 'sources': countLeadingZeros_sources
,
568 'results': generate_results_empty
,
569 'template': 'countLeadingZeros.shader_test.mako',
570 'func': 'countLeadingZeros',
575 'countTrailingZeros-uint': {
578 'sources': countTrailingZeros_sources
,
579 'results': generate_results_empty
,
580 'template': 'countLeadingZeros.shader_test.mako',
581 'func': 'countTrailingZeros',
589 'sources': addSaturate_int32_sources
,
590 'results': generate_results_commutative_with_diagonal
,
591 'template': 'addSaturate.shader_test.mako',
592 'func': 'addSaturate',
593 'operator': iadd_sat32
,
597 'addSaturate-uint': {
600 'sources': addSaturate_uint32_sources
,
601 'results': generate_results_commutative_with_diagonal
,
602 'template': 'addSaturate.shader_test.mako',
603 'func': 'addSaturate',
604 'operator': uadd_sat32
,
608 'addSaturate-int64': {
611 'sources': addSaturate_int64_sources
,
612 'results': generate_results_commutative_with_diagonal
,
613 'template': 'addSaturate.shader_test.mako',
614 'func': 'addSaturate',
615 'operator': iadd_sat64
,
616 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
617 'extensions': 'GL_ARB_gpu_shader_int64',
619 'addSaturate-uint64': {
621 'output': 'uint64_t',
622 'sources': addSaturate_uint64_sources
,
623 'results': generate_results_commutative_with_diagonal
,
624 'template': 'addSaturate.shader_test.mako',
625 'func': 'addSaturate',
626 'operator': uadd_sat64
,
627 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
628 'extensions': 'GL_ARB_gpu_shader_int64',
633 'sources': absoluteDifference32_sources
,
634 'results': generate_results_commutative
,
635 'template': 'absoluteDifference.shader_test.mako',
637 'operator': s_hadd32
,
644 'sources': absoluteDifference32_sources
,
645 'results': generate_results_commutative
,
646 'template': 'absoluteDifference.shader_test.mako',
648 'operator': u_hadd32
,
655 'sources': absoluteDifference64_sources
,
656 'results': generate_results_commutative
,
657 'template': 'absoluteDifference.shader_test.mako',
659 'operator': s_hadd64
,
660 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
661 'extensions': 'GL_ARB_gpu_shader_int64',
665 'output': 'uint64_t',
666 'sources': absoluteDifference64_sources
,
667 'results': generate_results_commutative
,
668 'template': 'absoluteDifference.shader_test.mako',
670 'operator': u_hadd64
,
671 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
672 'extensions': 'GL_ARB_gpu_shader_int64',
674 'averageRounded-int': {
677 'sources': absoluteDifference32_sources
,
678 'results': generate_results_commutative
,
679 'template': 'absoluteDifference.shader_test.mako',
680 'func': 'averageRounded',
681 'operator': s_rhadd32
,
685 'averageRounded-uint': {
688 'sources': absoluteDifference32_sources
,
689 'results': generate_results_commutative
,
690 'template': 'absoluteDifference.shader_test.mako',
691 'func': 'averageRounded',
692 'operator': u_rhadd32
,
696 'averageRounded-int64': {
699 'sources': absoluteDifference64_sources
,
700 'results': generate_results_commutative
,
701 'template': 'absoluteDifference.shader_test.mako',
702 'func': 'averageRounded',
703 'operator': s_rhadd64
,
704 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
705 'extensions': 'GL_ARB_gpu_shader_int64',
707 'averageRounded-uint64': {
709 'output': 'uint64_t',
710 'sources': absoluteDifference64_sources
,
711 'results': generate_results_commutative
,
712 'template': 'absoluteDifference.shader_test.mako',
713 'func': 'averageRounded',
714 'operator': u_rhadd64
,
715 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
716 'extensions': 'GL_ARB_gpu_shader_int64',
718 'multiply32x16-int': {
721 'sources': multiply32x16_int32_sources
,
722 'results': generate_results_empty
,
723 'template': 'multiply32x16.shader_test.mako',
724 'func': 'multiply32x16',
725 'operator': imul_32x16
,
729 'multiply32x16-uint': {
732 'sources': multiply32x16_int32_sources
,
733 'results': generate_results_empty
,
734 'template': 'multiply32x16.shader_test.mako',
735 'func': 'multiply32x16',
736 'operator': umul_32x16
,
740 'subtractSaturate-int': {
743 'sources': subtractSaturate_int32_sources
,
744 'results': generate_results_without_diagonal
,
745 'template': 'subtractSaturate.shader_test.mako',
746 'func': 'subtractSaturate',
747 'operator': isub_sat32
,
751 'subtractSaturate-uint': {
754 'sources': subtractSaturate_uint32_sources
,
755 'results': generate_results_without_diagonal
,
756 'template': 'subtractSaturate.shader_test.mako',
757 'func': 'subtractSaturate',
758 'operator': usub_sat32
,
762 'subtractSaturate-int64': {
765 'sources': subtractSaturate_int64_sources
,
766 'results': generate_results_without_diagonal
,
767 'template': 'subtractSaturate.shader_test.mako',
768 'func': 'subtractSaturate',
769 'operator': isub_sat64
,
770 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
771 'extensions': 'GL_ARB_gpu_shader_int64',
773 'subtractSaturate-uint64': {
775 'output': 'uint64_t',
776 'sources': subtractSaturate_uint64_sources
,
777 'results': generate_results_without_diagonal
,
778 'template': 'subtractSaturate.shader_test.mako',
779 'func': 'subtractSaturate',
780 'operator': usub_sat64
,
781 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
782 'extensions': 'GL_ARB_gpu_shader_int64',
789 dirname
= os
.path
.join('spec', 'intel_shader_integer_functions2',
790 'execution', 'built-in-functions')
791 utils
.safe_makedirs(dirname
)
793 for func
, attrib
in FUNCS
.items():
795 TEMPLATE
= template_file(os
.path
.basename(os
.path
.splitext(__file__
)[0]),
798 for execution_stage
in ('vs', 'fs'):
799 filename
= os
.path
.join(
800 dirname
, "{0}-{1}.shader_test".format(execution_stage
, func
))
803 extension_list
= ["GL_INTEL_shader_integer_functions2"]
804 if isinstance(attrib
['extensions'], str):
805 extension_list
+= [attrib
['extensions']]
806 elif attrib
['extensions'] is not None:
807 extension_list
+= attrib
['extensions']
809 with
open(filename
, 'w') as f
:
811 f
.write(TEMPLATE
.render_unicode(
812 execution_stage
=execution_stage
,
813 version
=attrib
['version'],
814 extensions
=sorted(extension_list
),
815 input_type
=attrib
['input'],
816 output_type
=attrib
['output'],
817 sources
=attrib
['sources'](),
818 results
=attrib
['results'](attrib
['sources'](), attrib
['operator']),
822 print(exceptions
.text_error_template().render(), file=sys
.stderr
)
827 if __name__
== '__main__':