3 # Copyright © 2018 Intel Corporation
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the "Software"),
7 # to deal in the Software without restriction, including without limitation
8 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 # and/or sell copies of the Software, and to permit persons to whom the
10 # Software is furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice (including the next
13 # paragraph) shall be included in all copies or substantial portions of the
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 # DEALINGS IN THE SOFTWARE.
24 from __future__
import print_function
, division
, absolute_import
30 from templates
import template_file
31 from modules
import utils
33 def generate_results_commutative(srcs
, operator
):
34 """Generate results for an operator that is commutative.
36 Commutative operators will only generate an upper-right triangular
37 matrix of results, and the diagonal will be missing.
40 for i
in range(len(srcs
) - 1):
41 for j
in range(i
+ 1, len(srcs
)):
42 results
.append(operator(srcs
[i
], srcs
[j
]))
47 def generate_results_commutative_with_diagonal(srcs
, operator
):
48 """Generate results for an operator that is commutative.
50 Commutative operators will only generate an upper-right triangular
51 matrix of results, but the diagonal must also be explicitly stored.
54 for i
in range(len(srcs
)):
55 for j
in range(i
, len(srcs
)):
56 results
.append(operator(srcs
[i
], srcs
[j
]))
61 def generate_results_without_diagonal(srcs
, operator
):
62 """Generate full matrix of results without the diagonal."""
64 for i
in range(len(srcs
)):
65 for j
in range(len(srcs
)):
67 results
.append(operator(srcs
[i
], srcs
[j
]))
72 def generate_results_empty(unused1
, unused2
):
73 """Some tests don't need any explicit results stored in the shader."""
77 def abs_isub32(_a
, _b
):
78 a
= np
.int32(np
.uint32(_a
))
79 b
= np
.int32(np
.uint32(_b
))
82 np
.seterr(over
='ignore')
83 ret
= np
.uint32(a
- b
) if a
> b
else np
.uint32(b
- a
)
84 np
.seterr(over
=err
['over'])
89 def abs_isub64(_a
, _b
):
91 b
= _b
.astype(np
.int64
)
94 np
.seterr(over
='ignore')
95 ret
= np
.uint64(a
- b
) if a
> b
else np
.uint64(b
- a
)
96 np
.seterr(over
=err
['over'])
101 def abs_usub32(_a
, _b
):
105 return a
- b
if a
> b
else b
- a
108 def abs_usub64(_a
, _b
):
112 return a
- b
if a
> b
else b
- a
115 def iadd_sat32(_a
, _b
):
116 a
= np
.int32(np
.uint32(_a
))
117 b
= np
.int32(np
.uint32(_b
))
120 if b
> (np
.iinfo(np
.int32
).max - a
):
121 return np
.iinfo(np
.int32
).max
123 if b
< (np
.iinfo(np
.int32
).min - a
):
124 return np
.iinfo(np
.int32
).min
129 def uadd_sat32(_a
, _b
):
133 if b
> (np
.iinfo(np
.uint32
).max - a
):
134 return np
.iinfo(np
.uint32
).max
139 def iadd_sat64(_a
, _b
):
144 if b
> (np
.iinfo(np
.int64
).max - a
):
145 return np
.iinfo(np
.int64
).max
147 if b
< (np
.iinfo(np
.int64
).min - a
):
148 return np
.iinfo(np
.int64
).min
153 def uadd_sat64(_a
, _b
):
157 if b
> (np
.iinfo(np
.uint64
).max - a
):
158 return np
.iinfo(np
.uint64
).max
163 def isub_sat32(a
, b
):
164 r
= np
.int64(np
.int32(a
)) - np
.int64(np
.int32(b
))
166 if r
> np
.int64(0x07fffffff):
167 return np
.int32(0x7fffffff)
169 if r
< np
.int64(-0x080000000):
170 return np
.int32(-0x80000000)
175 def usub_sat32(_a
, _b
):
179 return a
- b
if a
> b
else np
.uint32(0)
182 def isub_sat64(_a
, _b
):
187 if (a
- np
.iinfo(np
.int64
).max) > b
:
188 return np
.iinfo(np
.int64
).max
190 if a
< (np
.iinfo(np
.int64
).min + b
):
191 return np
.iinfo(np
.int64
).min
196 def usub_sat64(_a
, _b
):
200 return a
- b
if a
> b
else np
.uint64(0)
203 def u_hadd32(_a
, _b
):
207 return (a
>> 1) + (b
>> 1) + ((a
& b
) & 1)
210 def s_hadd32(_a
, _b
):
211 a
= np
.int32(np
.uint32(_a
))
212 b
= np
.int32(np
.uint32(_b
))
214 return (a
>> 1) + (b
>> 1) + ((a
& b
) & 1)
217 def u_hadd64(_a
, _b
):
221 return (a
>> np
.uint64(1)) + (b
>> np
.uint64(1)) + ((a
& b
) & np
.uint64(1))
224 def s_hadd64(_a
, _b
):
228 return (a
>> np
.int64(1)) + (b
>> np
.int64(1)) + ((a
& b
) & np
.int64(1))
231 def u_rhadd32(_a
, _b
):
235 return (a
>> 1) + (b
>> 1) + ((a | b
) & 1)
238 def s_rhadd32(_a
, _b
):
239 a
= np
.int32(np
.uint32(_a
))
240 b
= np
.int32(np
.uint32(_b
))
242 return (a
>> 1) + (b
>> 1) + ((a | b
) & 1)
245 def u_rhadd64(_a
, _b
):
249 return (a
>> np
.uint64(1)) + (b
>> np
.uint64(1)) + ((a | b
) & np
.uint64(1))
252 def s_rhadd64(_a
, _b
):
256 return (a
>> np
.int64(1)) + (b
>> np
.int64(1)) + ((a | b
) & np
.int64(1))
259 def imul_32x16(a
, b
):
260 return np
.int32(a
) * ((np
.int32(b
) << 16) >> 16)
263 def umul_32x16(a
, b
):
264 return np
.uint32(np
.uint32(a
) * (np
.uint32(b
) & 0x0000ffff))
267 def absoluteDifference32_sources():
269 for x
in range(0, 32, 4):
270 srcs
+= [ -(0x80000000 >> x
), -(0x7fffffff >> x
) ]
272 srcs
+= [-5, -3, -1, 0, 1, 3, 5]
274 for x
in range(32 - 4, 0, -4):
275 srcs
+= [ 0x7fffffff >> x
, 0x80000000 >> x
]
277 srcs
.append(0x7fffffff)
279 # Some prime numbers requiring from 14- to 32-bits to store. The last is
281 srcs
+= [ 0x00002ff9,
292 def absoluteDifference64_sources():
294 for x
in range(0, 64, 6):
295 srcs
+= [ -(0x8000000000000000 >> x
), -(0x7fffffffffffffff >> x
) ]
297 srcs
+= [-5, -3, -2, -1, 0, 1, 2, 3, 5]
299 for x
in range(64 - 4, 0, -6):
300 srcs
+= [ 0x7fffffffffffffff >> x
, 0x8000000000000000 >> x
]
302 srcs
.append(0x7fffffffffffffff)
304 # Some prime numbers requiring from 33- to 64-bits to store. The last is
306 srcs
+= [ 0x000000017ffffffb, # 33 bits
307 0x00000017ffffffef, # 37 bits
308 0x0000017ffffffff3, # 41 bits
309 0x000017ffffffffff, # 45 bits
310 0x00017fffffffffe1, # 49 bits
311 0x0005ffffffffffdd, # 51 bits
312 0x0017fffffffffff3, # 53 bits
313 0x017fffffffffffb5, # 57 bits
314 0x037fffffffffffe5, # 58 bits
315 0x17ffffffffffffe1, # 61 bits
316 0x5fffffffffffff89, # 63 bits
317 0xbfffffffffffffe1, # 64 bits
320 assert len(srcs
) == 64
321 return [np
.uint64(x
) for x
in srcs
]
324 def addSaturate_int32_sources():
325 srcs
= [0, 1, -1, 2, 3, 0x40000000, 0x7fffffff, -0x7fffffff, -0x80000000 ]
328 for i
in range(2, 32):
329 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
332 srcs
.append(random
.randint(-0x7ffffffe, -2))
334 assert len(srcs
) == 43
338 def addSaturate_uint32_sources():
339 srcs
= [0, 1, 2, 3, 0x40000000, 0x7fffffff, 0x80000000, 0xf0f0f0f0, 0xff00ff00 ]
342 for i
in range(2, 32):
343 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
345 for i
in range(43 - len(srcs
)):
346 srcs
.append(random
.randint(-0x7ffffffe, -2))
348 assert len(srcs
) == 43
352 def addSaturate_int64_sources():
353 srcs
= [0, 1, -1, 2, 3, 0x4000000000000000, 0x7fffffffffffffff, -0x7fffffffffffffff, -0x8000000000000000 ]
356 for i
in range(16, 64):
357 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
359 while len(srcs
) < 62:
360 srcs
.append(random
.randint(-0x7ffffffffffffffe, -2))
362 assert len(srcs
) == 62
363 return [np
.int64(np
.uint64(x
)) for x
in srcs
]
366 def addSaturate_uint64_sources():
367 srcs
= [0, 1, 2, 3, 0x4000000000000000, 0x7fffffffffffffff, 0x8000000000000000, 0xf0f0f0f0f0f0f0f0, 0xff00ff00ff00ff00 ]
370 for i
in range(16, 64):
371 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
373 while len(srcs
) < 61:
374 srcs
.append(random
.randint(0, 0xffffffffffffffff))
376 srcs
.append(np
.uint64(0xdeadbeefdeadbeef))
378 assert len(srcs
) == 62
379 return [np
.uint64(x
) for x
in srcs
]
382 def countLeadingZeros_sources():
386 for i
in range(1024):
390 sources
.append(0xffffffff >> num_zeros
)
392 sources
.append((random
.randint(0, 0xffffffff) |
(1 << 31)) >> num_zeros
)
397 def countTrailingZeros_sources():
401 for i
in range(1024):
405 sources
.append(0xffffffff << num_zeros
)
407 sources
.append((random
.randint(0, 0xffffffff) |
1) << num_zeros
)
412 def multiply32x16_int32_sources():
413 srcs
= [0, 1, -1, np
.int32(-0x80000000), -0x7fffffff, 0x7fffffff ]
416 for i
in range(2, 32, 3):
417 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
419 srcs
.append(random
.randint(0, 1 << 30) |
(1 << 30))
421 for i
in range(2, 32, 3):
422 srcs
.append(-(random
.randint(0, 1 << i
) |
(1 << i
)))
424 srcs
.append(-(random
.randint(0, 1 << 30) |
(1 << 30)))
426 while len(srcs
) < 512:
427 srcs
.append(random
.randint(-0x80000000, 0x7fffffff))
432 def subtractSaturate_int32_sources():
433 srcs
= [0, 1, -1, np
.int32(-0x80000000), -0x7fffffff, 0x7fffffff ]
436 for i
in range(2, 32, 3):
437 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
439 srcs
.append(random
.randint(0, 1 << 30) |
(1 << 30))
441 for i
in range(2, 32, 3):
442 srcs
.append(-(random
.randint(0, 1 << i
) |
(1 << i
)))
444 srcs
.append(-(random
.randint(0, 1 << 30) |
(1 << 30)))
446 while len(srcs
) < 32:
447 srcs
.append(random
.randint(-0x80000000, 0x7fffffff))
449 assert len(srcs
) == 32
450 return [np
.int32(x
) for x
in srcs
]
453 def subtractSaturate_uint32_sources():
454 srcs
= [0, 1, 0xf0f0f0f0 ]
457 for i
in range(2, 31):
458 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
460 assert len(srcs
) == 32
464 def subtractSaturate_int64_sources():
465 srcs
= [0, 1, -1, -0x8000000000000000, -0x7fffffffffffffff, 0x7fffffffffffffff ]
468 for i
in range(2, 32, 3):
469 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
471 srcs
.append(random
.randint(0, 1 << 30) |
(1 << 30))
473 for i
in range(16, 64, 3):
474 srcs
.append(-(random
.randint(0, 1 << i
) |
(1 << i
)))
476 srcs
.append(-(random
.randint(0, 1 << 30) |
(1 << 30)))
478 while len(srcs
) < 45:
479 srcs
.append(random
.randint(-0x8000000000000000, 0x7fffffffffffffff))
481 assert len(srcs
) == 45
482 return [np
.int64(x
) for x
in srcs
]
485 def subtractSaturate_uint64_sources():
486 srcs
= [0, 1, 0xf0f0f0f0f0f0f0f0 ]
489 for i
in range(22, 64):
490 srcs
.append(random
.randint(0, 1 << i
) |
(1 << i
))
492 assert len(srcs
) == 45
497 'absoluteDifference-int': {
500 'sources': absoluteDifference32_sources
,
501 'results': generate_results_commutative
,
502 'template': 'absoluteDifference.shader_test.mako',
503 'func': 'absoluteDifference',
504 'operator': abs_isub32
,
508 'absoluteDifference-uint': {
511 'sources': absoluteDifference32_sources
,
512 'results': generate_results_commutative
,
513 'template': 'absoluteDifference.shader_test.mako',
514 'func': 'absoluteDifference',
515 'operator': abs_usub32
,
519 'absoluteDifference-int64': {
521 'output': 'uint64_t',
522 'sources': absoluteDifference64_sources
,
523 'results': generate_results_commutative
,
524 'template': 'absoluteDifference.shader_test.mako',
525 'func': 'absoluteDifference',
526 'operator': abs_isub64
,
527 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
528 'extensions': 'GL_ARB_gpu_shader_int64',
530 'absoluteDifference-uint64': {
532 'output': 'uint64_t',
533 'sources': absoluteDifference64_sources
,
534 'results': generate_results_commutative
,
535 'template': 'absoluteDifference.shader_test.mako',
536 'func': 'absoluteDifference',
537 'operator': abs_usub64
,
538 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
539 'extensions': 'GL_ARB_gpu_shader_int64',
541 'countLeadingZeros-uint': {
544 'sources': countLeadingZeros_sources
,
545 'results': generate_results_empty
,
546 'template': 'countLeadingZeros.shader_test.mako',
547 'func': 'countLeadingZeros',
552 'countTrailingZeros-uint': {
555 'sources': countTrailingZeros_sources
,
556 'results': generate_results_empty
,
557 'template': 'countLeadingZeros.shader_test.mako',
558 'func': 'countTrailingZeros',
566 'sources': addSaturate_int32_sources
,
567 'results': generate_results_commutative_with_diagonal
,
568 'template': 'addSaturate.shader_test.mako',
569 'func': 'addSaturate',
570 'operator': iadd_sat32
,
574 'addSaturate-uint': {
577 'sources': addSaturate_uint32_sources
,
578 'results': generate_results_commutative_with_diagonal
,
579 'template': 'addSaturate.shader_test.mako',
580 'func': 'addSaturate',
581 'operator': uadd_sat32
,
585 'addSaturate-int64': {
588 'sources': addSaturate_int64_sources
,
589 'results': generate_results_commutative_with_diagonal
,
590 'template': 'addSaturate.shader_test.mako',
591 'func': 'addSaturate',
592 'operator': iadd_sat64
,
593 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
594 'extensions': 'GL_ARB_gpu_shader_int64',
596 'addSaturate-uint64': {
598 'output': 'uint64_t',
599 'sources': addSaturate_uint64_sources
,
600 'results': generate_results_commutative_with_diagonal
,
601 'template': 'addSaturate.shader_test.mako',
602 'func': 'addSaturate',
603 'operator': uadd_sat64
,
604 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
605 'extensions': 'GL_ARB_gpu_shader_int64',
610 'sources': absoluteDifference32_sources
,
611 'results': generate_results_commutative
,
612 'template': 'absoluteDifference.shader_test.mako',
614 'operator': s_hadd32
,
621 'sources': absoluteDifference32_sources
,
622 'results': generate_results_commutative
,
623 'template': 'absoluteDifference.shader_test.mako',
625 'operator': u_hadd32
,
632 'sources': absoluteDifference64_sources
,
633 'results': generate_results_commutative
,
634 'template': 'absoluteDifference.shader_test.mako',
636 'operator': s_hadd64
,
637 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
638 'extensions': 'GL_ARB_gpu_shader_int64',
642 'output': 'uint64_t',
643 'sources': absoluteDifference64_sources
,
644 'results': generate_results_commutative
,
645 'template': 'absoluteDifference.shader_test.mako',
647 'operator': u_hadd64
,
648 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
649 'extensions': 'GL_ARB_gpu_shader_int64',
651 'averageRounded-int': {
654 'sources': absoluteDifference32_sources
,
655 'results': generate_results_commutative
,
656 'template': 'absoluteDifference.shader_test.mako',
657 'func': 'averageRounded',
658 'operator': s_rhadd32
,
662 'averageRounded-uint': {
665 'sources': absoluteDifference32_sources
,
666 'results': generate_results_commutative
,
667 'template': 'absoluteDifference.shader_test.mako',
668 'func': 'averageRounded',
669 'operator': u_rhadd32
,
673 'averageRounded-int64': {
676 'sources': absoluteDifference64_sources
,
677 'results': generate_results_commutative
,
678 'template': 'absoluteDifference.shader_test.mako',
679 'func': 'averageRounded',
680 'operator': s_rhadd64
,
681 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
682 'extensions': 'GL_ARB_gpu_shader_int64',
684 'averageRounded-uint64': {
686 'output': 'uint64_t',
687 'sources': absoluteDifference64_sources
,
688 'results': generate_results_commutative
,
689 'template': 'absoluteDifference.shader_test.mako',
690 'func': 'averageRounded',
691 'operator': u_rhadd64
,
692 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
693 'extensions': 'GL_ARB_gpu_shader_int64',
695 'multiply32x16-int': {
698 'sources': multiply32x16_int32_sources
,
699 'results': generate_results_empty
,
700 'template': 'multiply32x16.shader_test.mako',
701 'func': 'multiply32x16',
702 'operator': imul_32x16
,
706 'multiply32x16-uint': {
709 'sources': multiply32x16_int32_sources
,
710 'results': generate_results_empty
,
711 'template': 'multiply32x16.shader_test.mako',
712 'func': 'multiply32x16',
713 'operator': umul_32x16
,
717 'subtractSaturate-int': {
720 'sources': subtractSaturate_int32_sources
,
721 'results': generate_results_without_diagonal
,
722 'template': 'subtractSaturate.shader_test.mako',
723 'func': 'subtractSaturate',
724 'operator': isub_sat32
,
728 'subtractSaturate-uint': {
731 'sources': subtractSaturate_uint32_sources
,
732 'results': generate_results_without_diagonal
,
733 'template': 'subtractSaturate.shader_test.mako',
734 'func': 'subtractSaturate',
735 'operator': usub_sat32
,
739 'subtractSaturate-int64': {
742 'sources': subtractSaturate_int64_sources
,
743 'results': generate_results_without_diagonal
,
744 'template': 'subtractSaturate.shader_test.mako',
745 'func': 'subtractSaturate',
746 'operator': isub_sat64
,
747 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
748 'extensions': 'GL_ARB_gpu_shader_int64',
750 'subtractSaturate-uint64': {
752 'output': 'uint64_t',
753 'sources': subtractSaturate_uint64_sources
,
754 'results': generate_results_without_diagonal
,
755 'template': 'subtractSaturate.shader_test.mako',
756 'func': 'subtractSaturate',
757 'operator': usub_sat64
,
758 'version': '4.00', # GL_ARB_gpu_shader_int64 requires 4.0.
759 'extensions': 'GL_ARB_gpu_shader_int64',
766 dirname
= os
.path
.join('spec', 'intel_shader_integer_functions2',
767 'execution', 'built-in-functions')
768 utils
.safe_makedirs(dirname
)
770 for func
, attrib
in FUNCS
.items():
772 TEMPLATE
= template_file(os
.path
.basename(os
.path
.splitext(__file__
)[0]),
775 for execution_stage
in ('vs', 'fs'):
776 filename
= os
.path
.join(
777 dirname
, "{0}-{1}.shader_test".format(execution_stage
, func
))
780 extension_list
= ["GL_INTEL_shader_integer_functions2"]
781 if isinstance(attrib
['extensions'], str):
782 extension_list
+= [attrib
['extensions']]
783 elif attrib
['extensions'] is not None:
784 extension_list
+= attrib
['extensions']
786 with
open(filename
, 'w') as f
:
787 f
.write(TEMPLATE
.render_unicode(
788 execution_stage
=execution_stage
,
789 version
=attrib
['version'],
790 extensions
=sorted(extension_list
),
791 input_type
=attrib
['input'],
792 output_type
=attrib
['output'],
793 sources
=attrib
['sources'](),
794 results
=attrib
['results'](attrib
['sources'](), attrib
['operator']),
799 if __name__
== '__main__':