1 //===-- generic
/lib
/misc
/shuffle.cl ------------------------------
===//
3 // Part of the LLVM Project
, under the Apache License v2.0 with LLVM Exceptions.
4 // See https
://llvm.org
/LICENSE.txt for license information.
5 // SPDX-License-Identifier
: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------
===//
11 #define _CLC_ELEMENT_CASES2
(VAR) \
12 case
0: return VAR.s0
; \
13 case
1: return VAR.s1
;
15 #define _CLC_ELEMENT_CASES4
(VAR) \
16 _CLC_ELEMENT_CASES2
(VAR) \
17 case
2: return VAR.s2
; \
18 case
3: return VAR.s3
;
20 #define _CLC_ELEMENT_CASES8
(VAR) \
21 _CLC_ELEMENT_CASES4
(VAR) \
22 case
4: return VAR.s4
; \
23 case
5: return VAR.s5
; \
24 case
6: return VAR.s6
; \
25 case
7: return VAR.s7
;
27 #define _CLC_ELEMENT_CASES16
(VAR) \
28 _CLC_ELEMENT_CASES8
(VAR) \
29 case
8: return VAR.s8
; \
30 case
9: return VAR.s9
; \
31 case
10: return VAR.sA
; \
32 case
11: return VAR.sB
; \
33 case
12: return VAR.sC
; \
34 case
13: return VAR.sD
; \
35 case
14: return VAR.sE
; \
36 case
15: return VAR.sF
;
38 #define _CLC_GET_ELEMENT_DEFINE
(ARGTYPE, ARGSIZE
, IDXTYPE
) \
39 inline ARGTYPE __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##IDXTYPE
(ARGTYPE##ARGSIZE x
, IDXTYPE idx
) {\
41 _CLC_ELEMENT_CASES
##ARGSIZE
(x) \
46 #define _CLC_SHUFFLE_SET_ONE_ELEMENT
(ARGTYPE, ARGSIZE
, INDEX
, MASKTYPE
) \
47 ret_val.s
##INDEX
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s
##INDEX
); \
49 #define _CLC_SHUFFLE_SET_2_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
50 ret_val.s0
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s0
); \
51 ret_val.s1
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s1
);
53 #define _CLC_SHUFFLE_SET_4_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
54 _CLC_SHUFFLE_SET_2_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
55 ret_val.s2
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s2
); \
56 ret_val.s3
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s3
);
58 #define _CLC_SHUFFLE_SET_8_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
59 _CLC_SHUFFLE_SET_4_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
60 ret_val.s4
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s4
); \
61 ret_val.s5
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s5
); \
62 ret_val.s6
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s6
); \
63 ret_val.s7
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s7
);
65 #define _CLC_SHUFFLE_SET_16_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
66 _CLC_SHUFFLE_SET_8_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
67 ret_val.s8
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s8
); \
68 ret_val.s9
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.s9
); \
69 ret_val.sA
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.sA
); \
70 ret_val.sB
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.sB
); \
71 ret_val.sC
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.sC
); \
72 ret_val.sD
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.sD
); \
73 ret_val.sE
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.sE
); \
74 ret_val.sF
= __clc_get_el_
##ARGTYPE
##ARGSIZE
##_
##MASKTYPE
(x, mask.sF
); \
76 #define _CLC_SHUFFLE_DEFINE2
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
77 _CLC_DEF _CLC_OVERLOAD ARGTYPE
##2 shuffle
(ARGTYPE##ARGSIZE x
, MASKTYPE
##2 mask
){ \
79 mask
&= (MASKTYPE##2)(ARGSIZE-1); \
80 _CLC_SHUFFLE_SET_2_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
84 #define _CLC_SHUFFLE_DEFINE4
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
85 _CLC_DEF _CLC_OVERLOAD ARGTYPE
##4 shuffle
(ARGTYPE##ARGSIZE x
, MASKTYPE
##4 mask
){ \
87 mask
&= (MASKTYPE##4)(ARGSIZE-1); \
88 _CLC_SHUFFLE_SET_4_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
92 #define _CLC_SHUFFLE_DEFINE8
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
93 _CLC_DEF _CLC_OVERLOAD ARGTYPE
##8 shuffle
(ARGTYPE##ARGSIZE x
, MASKTYPE
##8 mask
){ \
95 mask
&= (MASKTYPE##8)(ARGSIZE-1); \
96 _CLC_SHUFFLE_SET_8_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
100 #define _CLC_SHUFFLE_DEFINE16
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
101 _CLC_DEF _CLC_OVERLOAD ARGTYPE
##16 shuffle
(ARGTYPE##ARGSIZE x
, MASKTYPE
##16 mask
){ \
102 ARGTYPE
##16 ret_val
; \
103 mask
&= (MASKTYPE##16)(ARGSIZE-1); \
104 _CLC_SHUFFLE_SET_16_ELEMENTS
(ARGTYPE, ARGSIZE
, MASKTYPE
) \
108 #define _CLC_VECTOR_SHUFFLE_MASKSIZE
(INTYPE, ARGSIZE
, MASKTYPE
) \
109 _CLC_GET_ELEMENT_DEFINE
(INTYPE, ARGSIZE
, MASKTYPE
) \
110 _CLC_SHUFFLE_DEFINE2
(INTYPE, ARGSIZE
, MASKTYPE
) \
111 _CLC_SHUFFLE_DEFINE4
(INTYPE, ARGSIZE
, MASKTYPE
) \
112 _CLC_SHUFFLE_DEFINE8
(INTYPE, ARGSIZE
, MASKTYPE
) \
113 _CLC_SHUFFLE_DEFINE16
(INTYPE, ARGSIZE
, MASKTYPE
) \
115 #define _CLC_VECTOR_SHUFFLE_INSIZE
(TYPE, MASKTYPE
) \
116 _CLC_VECTOR_SHUFFLE_MASKSIZE
(TYPE, 2, MASKTYPE
) \
117 _CLC_VECTOR_SHUFFLE_MASKSIZE
(TYPE, 4, MASKTYPE
) \
118 _CLC_VECTOR_SHUFFLE_MASKSIZE
(TYPE, 8, MASKTYPE
) \
119 _CLC_VECTOR_SHUFFLE_MASKSIZE
(TYPE, 16, MASKTYPE
) \
123 _CLC_VECTOR_SHUFFLE_INSIZE
(char, uchar
)
124 _CLC_VECTOR_SHUFFLE_INSIZE
(short, ushort
)
125 _CLC_VECTOR_SHUFFLE_INSIZE
(int, uint
)
126 _CLC_VECTOR_SHUFFLE_INSIZE
(long, ulong
)
127 _CLC_VECTOR_SHUFFLE_INSIZE
(uchar, uchar
)
128 _CLC_VECTOR_SHUFFLE_INSIZE
(ushort, ushort
)
129 _CLC_VECTOR_SHUFFLE_INSIZE
(uint, uint
)
130 _CLC_VECTOR_SHUFFLE_INSIZE
(ulong, ulong
)
131 _CLC_VECTOR_SHUFFLE_INSIZE
(float, uint
)
133 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
134 _CLC_VECTOR_SHUFFLE_INSIZE
(double, ulong
)
137 #pragma OPENCL EXTENSION cl_khr_fp16
: enable
138 _CLC_VECTOR_SHUFFLE_INSIZE
(half, ushort
)
141 #undef _CLC_ELEMENT_CASES2
142 #undef _CLC_ELEMENT_CASES4
143 #undef _CLC_ELEMENT_CASES8
144 #undef _CLC_ELEMENT_CASES16
145 #undef _CLC_GET_ELEMENT_DEFINE
146 #undef _CLC_SHUFFLE_SET_ONE_ELEMENT
147 #undef _CLC_SHUFFLE_SET_2_ELEMENTS
148 #undef _CLC_SHUFFLE_SET_4_ELEMENTS
149 #undef _CLC_SHUFFLE_SET_8_ELEMENTS
150 #undef _CLC_SHUFFLE_SET_16_ELEMENTS
151 #undef _CLC_SHUFFLE_DEFINE2
152 #undef _CLC_SHUFFLE_DEFINE4
153 #undef _CLC_SHUFFLE_DEFINE8
154 #undef _CLC_SHUFFLE_DEFINE16
155 #undef _CLC_VECTOR_SHUFFLE_MASKSIZE
156 #undef _CLC_VECTOR_SHUFFLE_INSIZE