[X86] Better handling of impossibly large stack frames (#124217)
[llvm-project.git] / libclc / generic / lib / misc / shuffle.cl
blob8ecb5847ca7a5fd133af9b4b3ee6e761fe3798e4
1 //===-- generic/lib/misc/shuffle.cl ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include <clc/clc.h>
11 #define _CLC_ELEMENT_CASES2(VAR) \
12 case 0: return VAR.s0; \
13 case 1: return VAR.s1;
15 #define _CLC_ELEMENT_CASES4(VAR) \
16 _CLC_ELEMENT_CASES2(VAR) \
17 case 2: return VAR.s2; \
18 case 3: return VAR.s3;
20 #define _CLC_ELEMENT_CASES8(VAR) \
21 _CLC_ELEMENT_CASES4(VAR) \
22 case 4: return VAR.s4; \
23 case 5: return VAR.s5; \
24 case 6: return VAR.s6; \
25 case 7: return VAR.s7;
27 #define _CLC_ELEMENT_CASES16(VAR) \
28 _CLC_ELEMENT_CASES8(VAR) \
29 case 8: return VAR.s8; \
30 case 9: return VAR.s9; \
31 case 10: return VAR.sA; \
32 case 11: return VAR.sB; \
33 case 12: return VAR.sC; \
34 case 13: return VAR.sD; \
35 case 14: return VAR.sE; \
36 case 15: return VAR.sF;
38 #define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
39 inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
40 switch (idx){ \
41 _CLC_ELEMENT_CASES##ARGSIZE(x) \
42 default: return 0; \
43 } \
44 } \
46 #define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
47 ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \
49 #define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
50 ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
51 ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
53 #define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
54 _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
55 ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
56 ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
58 #define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
59 _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
60 ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
61 ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
62 ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
63 ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
65 #define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
66 _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
67 ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
68 ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
69 ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
70 ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
71 ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
72 ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
73 ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
74 ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
76 #define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
77 _CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \
78 ARGTYPE##2 ret_val; \
79 mask &= (MASKTYPE##2)(ARGSIZE-1); \
80 _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
81 return ret_val; \
84 #define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
85 _CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \
86 ARGTYPE##4 ret_val; \
87 mask &= (MASKTYPE##4)(ARGSIZE-1); \
88 _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
89 return ret_val; \
92 #define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
93 _CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \
94 ARGTYPE##8 ret_val; \
95 mask &= (MASKTYPE##8)(ARGSIZE-1); \
96 _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
97 return ret_val; \
100 #define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
101 _CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \
102 ARGTYPE##16 ret_val; \
103 mask &= (MASKTYPE##16)(ARGSIZE-1); \
104 _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
105 return ret_val; \
108 #define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
109 _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
110 _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
111 _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
112 _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
113 _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
115 #define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
116 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
117 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
118 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
119 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
123 _CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
124 _CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
125 _CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
126 _CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
127 _CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
128 _CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
129 _CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
130 _CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
131 _CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
132 #ifdef cl_khr_fp64
133 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
134 _CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
135 #endif
136 #ifdef cl_khr_fp16
137 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
138 _CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
139 #endif
141 #undef _CLC_ELEMENT_CASES2
142 #undef _CLC_ELEMENT_CASES4
143 #undef _CLC_ELEMENT_CASES8
144 #undef _CLC_ELEMENT_CASES16
145 #undef _CLC_GET_ELEMENT_DEFINE
146 #undef _CLC_SHUFFLE_SET_ONE_ELEMENT
147 #undef _CLC_SHUFFLE_SET_2_ELEMENTS
148 #undef _CLC_SHUFFLE_SET_4_ELEMENTS
149 #undef _CLC_SHUFFLE_SET_8_ELEMENTS
150 #undef _CLC_SHUFFLE_SET_16_ELEMENTS
151 #undef _CLC_SHUFFLE_DEFINE2
152 #undef _CLC_SHUFFLE_DEFINE4
153 #undef _CLC_SHUFFLE_DEFINE8
154 #undef _CLC_SHUFFLE_DEFINE16
155 #undef _CLC_VECTOR_SHUFFLE_MASKSIZE
156 #undef _CLC_VECTOR_SHUFFLE_INSIZE