Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / aarch64-sve-vls-arith-ops.c
bloba4648e9d59dcbf88f255bfe7610ead6581b1a113
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
3 // RUN: -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
4 // RUN: -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
6 // REQUIRES: aarch64-registered-target
8 #include <arm_sve.h>
10 #define N 512
12 typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
13 typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
14 typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
15 typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
17 typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
18 typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
19 typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
20 typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
22 typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
23 typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
24 typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
26 typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
28 // ADDITION
30 // CHECK-LABEL: @add_i8(
31 // CHECK-NEXT: entry:
32 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
33 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
34 // CHECK-NEXT: [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
35 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
36 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
38 fixed_int8_t add_i8(fixed_int8_t a, fixed_int8_t b) {
39 return a + b;
42 // CHECK-LABEL: @add_i16(
43 // CHECK-NEXT: entry:
44 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
45 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
46 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
47 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
48 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
50 fixed_int16_t add_i16(fixed_int16_t a, fixed_int16_t b) {
51 return a + b;
54 // CHECK-LABEL: @add_i32(
55 // CHECK-NEXT: entry:
56 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
57 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
58 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
59 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
60 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
62 fixed_int32_t add_i32(fixed_int32_t a, fixed_int32_t b) {
63 return a + b;
66 // CHECK-LABEL: @add_i64(
67 // CHECK-NEXT: entry:
68 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
69 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
70 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
71 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
72 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
74 fixed_int64_t add_i64(fixed_int64_t a, fixed_int64_t b) {
75 return a + b;
78 // CHECK-LABEL: @add_u8(
79 // CHECK-NEXT: entry:
80 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
81 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
82 // CHECK-NEXT: [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
83 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
84 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
86 fixed_uint8_t add_u8(fixed_uint8_t a, fixed_uint8_t b) {
87 return a + b;
90 // CHECK-LABEL: @add_u16(
91 // CHECK-NEXT: entry:
92 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
93 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
94 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
95 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
96 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
98 fixed_uint16_t add_u16(fixed_uint16_t a, fixed_uint16_t b) {
99 return a + b;
102 // CHECK-LABEL: @add_u32(
103 // CHECK-NEXT: entry:
104 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
105 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
106 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
107 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
108 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
110 fixed_uint32_t add_u32(fixed_uint32_t a, fixed_uint32_t b) {
111 return a + b;
114 // CHECK-LABEL: @add_u64(
115 // CHECK-NEXT: entry:
116 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
117 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
118 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
119 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
120 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
122 fixed_uint64_t add_u64(fixed_uint64_t a, fixed_uint64_t b) {
123 return a + b;
126 // CHECK-LABEL: @add_f16(
127 // CHECK-NEXT: entry:
128 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
129 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
130 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
131 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
132 // CHECK-NEXT: [[ADD:%.*]] = fadd <32 x float> [[CONV]], [[CONV2]]
133 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[ADD]] to <32 x half>
134 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
135 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
137 fixed_float16_t add_f16(fixed_float16_t a, fixed_float16_t b) {
138 return a + b;
141 // CHECK-LABEL: @add_f32(
142 // CHECK-NEXT: entry:
143 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
144 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
145 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[A]], [[B]]
146 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
147 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
149 fixed_float32_t add_f32(fixed_float32_t a, fixed_float32_t b) {
150 return a + b;
153 // CHECK-LABEL: @add_f64(
154 // CHECK-NEXT: entry:
155 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
156 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
157 // CHECK-NEXT: [[ADD:%.*]] = fadd <8 x double> [[A]], [[B]]
158 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
159 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
161 fixed_float64_t add_f64(fixed_float64_t a, fixed_float64_t b) {
162 return a + b;
165 // CHECK-LABEL: @add_inplace_i8(
166 // CHECK-NEXT: entry:
167 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
168 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
169 // CHECK-NEXT: [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
170 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
171 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
173 fixed_int8_t add_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
174 return a += b;
177 // CHECK-LABEL: @add_inplace_i16(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
180 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
181 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
182 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
183 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
185 fixed_int16_t add_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
186 return a += b;
189 // CHECK-LABEL: @add_inplace_i32(
190 // CHECK-NEXT: entry:
191 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
192 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
193 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
194 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
195 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
197 fixed_int32_t add_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
198 return a += b;
201 // CHECK-LABEL: @add_inplace_i64(
202 // CHECK-NEXT: entry:
203 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
204 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
205 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
206 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
207 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
209 fixed_int64_t add_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
210 return a += b;
213 // CHECK-LABEL: @add_inplace_u8(
214 // CHECK-NEXT: entry:
215 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
216 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
217 // CHECK-NEXT: [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
218 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
219 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
221 fixed_uint8_t add_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
222 return a += b;
225 // CHECK-LABEL: @add_inplace_u16(
226 // CHECK-NEXT: entry:
227 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
228 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
229 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
230 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
231 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
233 fixed_uint16_t add_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
234 return a += b;
237 // CHECK-LABEL: @add_inplace_u32(
238 // CHECK-NEXT: entry:
239 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
240 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
241 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
242 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
243 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
245 fixed_uint32_t add_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
246 return a += b;
249 // CHECK-LABEL: @add_inplace_u64(
250 // CHECK-NEXT: entry:
251 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
252 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
253 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
254 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
255 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
257 fixed_uint64_t add_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
258 return a += b;
261 // CHECK-LABEL: @add_inplace_f16(
262 // CHECK-NEXT: entry:
263 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
264 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
265 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[B]] to <32 x float>
266 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[A]] to <32 x float>
267 // CHECK-NEXT: [[ADD:%.*]] = fadd <32 x float> [[CONV2]], [[CONV]]
268 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[ADD]] to <32 x half>
269 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
270 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
272 fixed_float16_t add_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
273 return a += b;
276 // CHECK-LABEL: @add_inplace_f32(
277 // CHECK-NEXT: entry:
278 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
279 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
280 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[A]], [[B]]
281 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
282 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
284 fixed_float32_t add_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
285 return a += b;
288 // CHECK-LABEL: @add_inplace_f64(
289 // CHECK-NEXT: entry:
290 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
291 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
292 // CHECK-NEXT: [[ADD:%.*]] = fadd <8 x double> [[A]], [[B]]
293 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
294 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
296 fixed_float64_t add_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
297 return a += b;
300 // CHECK-LABEL: @add_scalar_i8(
301 // CHECK-NEXT: entry:
302 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
303 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
304 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
305 // CHECK-NEXT: [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
306 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
307 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
309 fixed_int8_t add_scalar_i8(fixed_int8_t a, int8_t b) {
310 return a + b;
313 // CHECK-LABEL: @add_scalar_i16(
314 // CHECK-NEXT: entry:
315 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
316 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
317 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
318 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
319 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
320 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
322 fixed_int16_t add_scalar_i16(fixed_int16_t a, int16_t b) {
323 return a + b;
326 // CHECK-LABEL: @add_scalar_i32(
327 // CHECK-NEXT: entry:
328 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
329 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
330 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
331 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
332 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
333 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
335 fixed_int32_t add_scalar_i32(fixed_int32_t a, int32_t b) {
336 return a + b;
339 // CHECK-LABEL: @add_scalar_i64(
340 // CHECK-NEXT: entry:
341 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
342 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
343 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
344 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
345 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
346 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
348 fixed_int64_t add_scalar_i64(fixed_int64_t a, int64_t b) {
349 return a + b;
352 // CHECK-LABEL: @add_scalar_u8(
353 // CHECK-NEXT: entry:
354 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
355 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
356 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
357 // CHECK-NEXT: [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
358 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
359 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
361 fixed_uint8_t add_scalar_u8(fixed_uint8_t a, uint8_t b) {
362 return a + b;
365 // CHECK-LABEL: @add_scalar_u16(
366 // CHECK-NEXT: entry:
367 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
368 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
369 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
370 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
371 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
372 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
374 fixed_uint16_t add_scalar_u16(fixed_uint16_t a, uint16_t b) {
375 return a + b;
378 // CHECK-LABEL: @add_scalar_u32(
379 // CHECK-NEXT: entry:
380 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
381 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
382 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
383 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
384 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
385 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
387 fixed_uint32_t add_scalar_u32(fixed_uint32_t a, uint32_t b) {
388 return a + b;
391 // CHECK-LABEL: @add_scalar_u64(
392 // CHECK-NEXT: entry:
393 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
394 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
395 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
396 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
397 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
398 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
400 fixed_uint64_t add_scalar_u64(fixed_uint64_t a, uint64_t b) {
401 return a + b;
404 // CHECK-LABEL: @add_scalar_f16(
405 // CHECK-NEXT: entry:
406 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
407 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
408 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
409 // CHECK-NEXT: [[ADD:%.*]] = fadd <32 x half> [[A]], [[SPLAT_SPLAT]]
410 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[ADD]], i64 0)
411 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
413 fixed_float16_t add_scalar_f16(fixed_float16_t a, __fp16 b) {
414 return a + b;
417 // CHECK-LABEL: @add_scalar_f32(
418 // CHECK-NEXT: entry:
419 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
420 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
421 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
422 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[A]], [[SPLAT_SPLAT]]
423 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
424 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
426 fixed_float32_t add_scalar_f32(fixed_float32_t a, float b) {
427 return a + b;
430 // CHECK-LABEL: @add_scalar_f64(
431 // CHECK-NEXT: entry:
432 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
433 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
434 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
435 // CHECK-NEXT: [[ADD:%.*]] = fadd <8 x double> [[A]], [[SPLAT_SPLAT]]
436 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
437 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
439 fixed_float64_t add_scalar_f64(fixed_float64_t a, double b) {
440 return a + b;
443 // SUBTRACTION
445 // CHECK-LABEL: @sub_i8(
446 // CHECK-NEXT: entry:
447 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
448 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
449 // CHECK-NEXT: [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
450 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
451 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
453 fixed_int8_t sub_i8(fixed_int8_t a, fixed_int8_t b) {
454 return a - b;
457 // CHECK-LABEL: @sub_i16(
458 // CHECK-NEXT: entry:
459 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
460 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
461 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
462 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
463 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
465 fixed_int16_t sub_i16(fixed_int16_t a, fixed_int16_t b) {
466 return a - b;
469 // CHECK-LABEL: @sub_i32(
470 // CHECK-NEXT: entry:
471 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
472 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
473 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
474 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
475 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
477 fixed_int32_t sub_i32(fixed_int32_t a, fixed_int32_t b) {
478 return a - b;
481 // CHECK-LABEL: @sub_i64(
482 // CHECK-NEXT: entry:
483 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
484 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
485 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
486 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
487 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
489 fixed_int64_t sub_i64(fixed_int64_t a, fixed_int64_t b) {
490 return a - b;
493 // CHECK-LABEL: @sub_u8(
494 // CHECK-NEXT: entry:
495 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
496 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
497 // CHECK-NEXT: [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
498 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
499 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
501 fixed_uint8_t sub_u8(fixed_uint8_t a, fixed_uint8_t b) {
502 return a - b;
505 // CHECK-LABEL: @sub_u16(
506 // CHECK-NEXT: entry:
507 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
508 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
509 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
510 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
511 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
513 fixed_uint16_t sub_u16(fixed_uint16_t a, fixed_uint16_t b) {
514 return a - b;
517 // CHECK-LABEL: @sub_u32(
518 // CHECK-NEXT: entry:
519 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
520 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
521 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
522 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
523 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
525 fixed_uint32_t sub_u32(fixed_uint32_t a, fixed_uint32_t b) {
526 return a - b;
529 // CHECK-LABEL: @sub_u64(
530 // CHECK-NEXT: entry:
531 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
532 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
533 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
534 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
535 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
537 fixed_uint64_t sub_u64(fixed_uint64_t a, fixed_uint64_t b) {
538 return a - b;
541 // CHECK-LABEL: @sub_f16(
542 // CHECK-NEXT: entry:
543 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
544 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
545 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
546 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
547 // CHECK-NEXT: [[SUB:%.*]] = fsub <32 x float> [[CONV]], [[CONV2]]
548 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[SUB]] to <32 x half>
549 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
550 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
552 fixed_float16_t sub_f16(fixed_float16_t a, fixed_float16_t b) {
553 return a - b;
556 // CHECK-LABEL: @sub_f32(
557 // CHECK-NEXT: entry:
558 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
559 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
560 // CHECK-NEXT: [[SUB:%.*]] = fsub <16 x float> [[A]], [[B]]
561 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
562 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
564 fixed_float32_t sub_f32(fixed_float32_t a, fixed_float32_t b) {
565 return a - b;
568 // CHECK-LABEL: @sub_f64(
569 // CHECK-NEXT: entry:
570 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
571 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
572 // CHECK-NEXT: [[SUB:%.*]] = fsub <8 x double> [[A]], [[B]]
573 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
574 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
576 fixed_float64_t sub_f64(fixed_float64_t a, fixed_float64_t b) {
577 return a - b;
580 // CHECK-LABEL: @sub_inplace_i8(
581 // CHECK-NEXT: entry:
582 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
583 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
584 // CHECK-NEXT: [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
585 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
586 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
588 fixed_int8_t sub_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
589 return a - b;
592 // CHECK-LABEL: @sub_inplace_i16(
593 // CHECK-NEXT: entry:
594 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
595 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
596 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
597 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
598 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
600 fixed_int16_t sub_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
601 return a - b;
604 // CHECK-LABEL: @sub_inplace_i32(
605 // CHECK-NEXT: entry:
606 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
607 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
608 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
609 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
610 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
612 fixed_int32_t sub_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
613 return a - b;
616 // CHECK-LABEL: @sub_inplace_i64(
617 // CHECK-NEXT: entry:
618 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
619 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
620 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
621 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
622 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
624 fixed_int64_t sub_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
625 return a - b;
628 // CHECK-LABEL: @sub_inplace_u8(
629 // CHECK-NEXT: entry:
630 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
631 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
632 // CHECK-NEXT: [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
633 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
634 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
636 fixed_uint8_t sub_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
637 return a - b;
640 // CHECK-LABEL: @sub_inplace_u16(
641 // CHECK-NEXT: entry:
642 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
643 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
644 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
645 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
646 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
648 fixed_uint16_t sub_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
649 return a - b;
652 // CHECK-LABEL: @sub_inplace_u32(
653 // CHECK-NEXT: entry:
654 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
655 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
656 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
657 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
658 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
660 fixed_uint32_t sub_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
661 return a - b;
664 // CHECK-LABEL: @sub_inplace_u64(
665 // CHECK-NEXT: entry:
666 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
667 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
668 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
669 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
670 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
672 fixed_uint64_t sub_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
673 return a - b;
676 // CHECK-LABEL: @sub_inplace_f16(
677 // CHECK-NEXT: entry:
678 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
679 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
680 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
681 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
682 // CHECK-NEXT: [[SUB:%.*]] = fsub <32 x float> [[CONV]], [[CONV2]]
683 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[SUB]] to <32 x half>
684 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
685 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
687 fixed_float16_t sub_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
688 return a - b;
691 // CHECK-LABEL: @sub_inplace_f32(
692 // CHECK-NEXT: entry:
693 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
694 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
695 // CHECK-NEXT: [[SUB:%.*]] = fsub <16 x float> [[A]], [[B]]
696 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
697 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
699 fixed_float32_t sub_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
700 return a - b;
703 // CHECK-LABEL: @sub_inplace_f64(
704 // CHECK-NEXT: entry:
705 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
706 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
707 // CHECK-NEXT: [[SUB:%.*]] = fsub <8 x double> [[A]], [[B]]
708 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
709 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
711 fixed_float64_t sub_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
712 return a - b;
715 // CHECK-LABEL: @sub_scalar_i8(
716 // CHECK-NEXT: entry:
717 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
718 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
719 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
720 // CHECK-NEXT: [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
721 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
722 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
724 fixed_int8_t sub_scalar_i8(fixed_int8_t a, int8_t b) {
725 return a - b;
728 // CHECK-LABEL: @sub_scalar_i16(
729 // CHECK-NEXT: entry:
730 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
731 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
732 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
733 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
734 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
735 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
737 fixed_int16_t sub_scalar_i16(fixed_int16_t a, int16_t b) {
738 return a - b;
741 // CHECK-LABEL: @sub_scalar_i32(
742 // CHECK-NEXT: entry:
743 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
744 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
745 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
746 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
747 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
748 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
750 fixed_int32_t sub_scalar_i32(fixed_int32_t a, int32_t b) {
751 return a - b;
754 // CHECK-LABEL: @sub_scalar_i64(
755 // CHECK-NEXT: entry:
756 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
757 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
758 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
759 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
760 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
761 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
763 fixed_int64_t sub_scalar_i64(fixed_int64_t a, int64_t b) {
764 return a - b;
767 // CHECK-LABEL: @sub_scalar_u8(
768 // CHECK-NEXT: entry:
769 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
770 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
771 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
772 // CHECK-NEXT: [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
773 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
774 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
776 fixed_uint8_t sub_scalar_u8(fixed_uint8_t a, uint8_t b) {
777 return a - b;
780 // CHECK-LABEL: @sub_scalar_u16(
781 // CHECK-NEXT: entry:
782 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
783 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
784 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
785 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
786 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
787 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
789 fixed_uint16_t sub_scalar_u16(fixed_uint16_t a, uint16_t b) {
790 return a - b;
793 // CHECK-LABEL: @sub_scalar_u32(
794 // CHECK-NEXT: entry:
795 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
796 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
797 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
798 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
799 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
800 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
802 fixed_uint32_t sub_scalar_u32(fixed_uint32_t a, uint32_t b) {
803 return a - b;
806 // CHECK-LABEL: @sub_scalar_u64(
807 // CHECK-NEXT: entry:
808 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
809 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
810 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
811 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
812 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
813 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
815 fixed_uint64_t sub_scalar_u64(fixed_uint64_t a, uint64_t b) {
816 return a - b;
819 // CHECK-LABEL: @sub_scalar_f16(
820 // CHECK-NEXT: entry:
821 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
822 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
823 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
824 // CHECK-NEXT: [[SUB:%.*]] = fsub <32 x half> [[A]], [[SPLAT_SPLAT]]
825 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[SUB]], i64 0)
826 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
828 fixed_float16_t sub_scalar_f16(fixed_float16_t a, __fp16 b) {
829 return a - b;
832 // CHECK-LABEL: @sub_scalar_f32(
833 // CHECK-NEXT: entry:
834 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
835 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
836 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
837 // CHECK-NEXT: [[SUB:%.*]] = fsub <16 x float> [[A]], [[SPLAT_SPLAT]]
838 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
839 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
841 fixed_float32_t sub_scalar_f32(fixed_float32_t a, float b) {
842 return a - b;
845 // CHECK-LABEL: @sub_scalar_f64(
846 // CHECK-NEXT: entry:
847 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
848 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
849 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
850 // CHECK-NEXT: [[SUB:%.*]] = fsub <8 x double> [[A]], [[SPLAT_SPLAT]]
851 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
852 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
854 fixed_float64_t sub_scalar_f64(fixed_float64_t a, double b) {
855 return a - b;
858 // MULTIPLICATION
860 // CHECK-LABEL: @mul_i8(
861 // CHECK-NEXT: entry:
862 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
863 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
864 // CHECK-NEXT: [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
865 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
866 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
868 fixed_int8_t mul_i8(fixed_int8_t a, fixed_int8_t b) {
869 return a * b;
872 // CHECK-LABEL: @mul_i16(
873 // CHECK-NEXT: entry:
874 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
875 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
876 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
877 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
878 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
880 fixed_int16_t mul_i16(fixed_int16_t a, fixed_int16_t b) {
881 return a * b;
884 // CHECK-LABEL: @mul_i32(
885 // CHECK-NEXT: entry:
886 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
887 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
888 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
889 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
890 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
892 fixed_int32_t mul_i32(fixed_int32_t a, fixed_int32_t b) {
893 return a * b;
896 // CHECK-LABEL: @mul_i64(
897 // CHECK-NEXT: entry:
898 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
899 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
900 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
901 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
902 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
904 fixed_int64_t mul_i64(fixed_int64_t a, fixed_int64_t b) {
905 return a * b;
908 // CHECK-LABEL: @mul_u8(
909 // CHECK-NEXT: entry:
910 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
911 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
912 // CHECK-NEXT: [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
913 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
914 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
916 fixed_uint8_t mul_u8(fixed_uint8_t a, fixed_uint8_t b) {
917 return a * b;
920 // CHECK-LABEL: @mul_u16(
921 // CHECK-NEXT: entry:
922 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
923 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
924 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
925 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
926 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
928 fixed_uint16_t mul_u16(fixed_uint16_t a, fixed_uint16_t b) {
929 return a * b;
932 // CHECK-LABEL: @mul_u32(
933 // CHECK-NEXT: entry:
934 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
935 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
936 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
937 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
938 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
940 fixed_uint32_t mul_u32(fixed_uint32_t a, fixed_uint32_t b) {
941 return a * b;
944 // CHECK-LABEL: @mul_u64(
945 // CHECK-NEXT: entry:
946 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
947 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
948 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
949 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
950 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
952 fixed_uint64_t mul_u64(fixed_uint64_t a, fixed_uint64_t b) {
953 return a * b;
956 // CHECK-LABEL: @mul_f16(
957 // CHECK-NEXT: entry:
958 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
959 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
960 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
961 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
962 // CHECK-NEXT: [[MUL:%.*]] = fmul <32 x float> [[CONV]], [[CONV2]]
963 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[MUL]] to <32 x half>
964 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
965 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
967 fixed_float16_t mul_f16(fixed_float16_t a, fixed_float16_t b) {
968 return a * b;
971 // CHECK-LABEL: @mul_f32(
972 // CHECK-NEXT: entry:
973 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
974 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
975 // CHECK-NEXT: [[MUL:%.*]] = fmul <16 x float> [[A]], [[B]]
976 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
977 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
979 fixed_float32_t mul_f32(fixed_float32_t a, fixed_float32_t b) {
980 return a * b;
983 // CHECK-LABEL: @mul_f64(
984 // CHECK-NEXT: entry:
985 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
986 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
987 // CHECK-NEXT: [[MUL:%.*]] = fmul <8 x double> [[A]], [[B]]
988 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
989 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
991 fixed_float64_t mul_f64(fixed_float64_t a, fixed_float64_t b) {
992 return a * b;
995 // CHECK-LABEL: @mul_inplace_i8(
996 // CHECK-NEXT: entry:
997 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
998 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
999 // CHECK-NEXT: [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
1000 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
1001 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1003 fixed_int8_t mul_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
1004 return a * b;
1007 // CHECK-LABEL: @mul_inplace_i16(
1008 // CHECK-NEXT: entry:
1009 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1010 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1011 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
1012 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
1013 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1015 fixed_int16_t mul_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
1016 return a * b;
1019 // CHECK-LABEL: @mul_inplace_i32(
1020 // CHECK-NEXT: entry:
1021 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1022 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1023 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
1024 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
1025 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1027 fixed_int32_t mul_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
1028 return a * b;
1031 // CHECK-LABEL: @mul_inplace_i64(
1032 // CHECK-NEXT: entry:
1033 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1034 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1035 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
1036 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
1037 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1039 fixed_int64_t mul_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
1040 return a * b;
1043 // CHECK-LABEL: @mul_inplace_u8(
1044 // CHECK-NEXT: entry:
1045 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1046 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1047 // CHECK-NEXT: [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
1048 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
1049 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1051 fixed_uint8_t mul_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
1052 return a * b;
1055 // CHECK-LABEL: @mul_inplace_u16(
1056 // CHECK-NEXT: entry:
1057 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1058 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1059 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
1060 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
1061 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1063 fixed_uint16_t mul_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
1064 return a * b;
1067 // CHECK-LABEL: @mul_inplace_u32(
1068 // CHECK-NEXT: entry:
1069 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1070 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1071 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
1072 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
1073 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1075 fixed_uint32_t mul_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
1076 return a * b;
1079 // CHECK-LABEL: @mul_inplace_u64(
1080 // CHECK-NEXT: entry:
1081 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1082 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1083 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
1084 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
1085 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1087 fixed_uint64_t mul_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
1088 return a * b;
1091 // CHECK-LABEL: @mul_inplace_f16(
1092 // CHECK-NEXT: entry:
1093 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
1094 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
1095 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
1096 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
1097 // CHECK-NEXT: [[MUL:%.*]] = fmul <32 x float> [[CONV]], [[CONV2]]
1098 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[MUL]] to <32 x half>
1099 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
1100 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
1102 fixed_float16_t mul_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
1103 return a * b;
1106 // CHECK-LABEL: @mul_inplace_f32(
1107 // CHECK-NEXT: entry:
1108 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
1109 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
1110 // CHECK-NEXT: [[MUL:%.*]] = fmul <16 x float> [[A]], [[B]]
1111 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
1112 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
1114 fixed_float32_t mul_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
1115 return a * b;
1118 // CHECK-LABEL: @mul_inplace_f64(
1119 // CHECK-NEXT: entry:
1120 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
1121 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
1122 // CHECK-NEXT: [[MUL:%.*]] = fmul <8 x double> [[A]], [[B]]
1123 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
1124 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
1126 fixed_float64_t mul_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
1127 return a * b;
1130 // CHECK-LABEL: @mul_scalar_i8(
1131 // CHECK-NEXT: entry:
1132 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1133 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
1134 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
1135 // CHECK-NEXT: [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
1136 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
1137 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1139 fixed_int8_t mul_scalar_i8(fixed_int8_t a, int8_t b) {
1140 return a * b;
1143 // CHECK-LABEL: @mul_scalar_i16(
1144 // CHECK-NEXT: entry:
1145 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1146 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
1147 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
1148 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
1149 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
1150 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1152 fixed_int16_t mul_scalar_i16(fixed_int16_t a, int16_t b) {
1153 return a * b;
1156 // CHECK-LABEL: @mul_scalar_i32(
1157 // CHECK-NEXT: entry:
1158 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1159 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
1160 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1161 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
1162 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
1163 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1165 fixed_int32_t mul_scalar_i32(fixed_int32_t a, int32_t b) {
1166 return a * b;
1169 // CHECK-LABEL: @mul_scalar_i64(
1170 // CHECK-NEXT: entry:
1171 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1172 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
1173 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1174 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
1175 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
1176 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1178 fixed_int64_t mul_scalar_i64(fixed_int64_t a, int64_t b) {
1179 return a * b;
1182 // CHECK-LABEL: @mul_scalar_u8(
1183 // CHECK-NEXT: entry:
1184 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1185 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
1186 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
1187 // CHECK-NEXT: [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
1188 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
1189 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1191 fixed_uint8_t mul_scalar_u8(fixed_uint8_t a, uint8_t b) {
1192 return a * b;
1195 // CHECK-LABEL: @mul_scalar_u16(
1196 // CHECK-NEXT: entry:
1197 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1198 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
1199 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
1200 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
1201 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
1202 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1204 fixed_uint16_t mul_scalar_u16(fixed_uint16_t a, uint16_t b) {
1205 return a * b;
1208 // CHECK-LABEL: @mul_scalar_u32(
1209 // CHECK-NEXT: entry:
1210 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1211 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
1212 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1213 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
1214 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
1215 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1217 fixed_uint32_t mul_scalar_u32(fixed_uint32_t a, uint32_t b) {
1218 return a * b;
1221 // CHECK-LABEL: @mul_scalar_u64(
1222 // CHECK-NEXT: entry:
1223 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1224 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
1225 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1226 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
1227 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
1228 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1230 fixed_uint64_t mul_scalar_u64(fixed_uint64_t a, uint64_t b) {
1231 return a * b;
1234 // CHECK-LABEL: @mul_scalar_f16(
1235 // CHECK-NEXT: entry:
1236 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
1237 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
1238 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
1239 // CHECK-NEXT: [[MUL:%.*]] = fmul <32 x half> [[A]], [[SPLAT_SPLAT]]
1240 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[MUL]], i64 0)
1241 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
1243 fixed_float16_t mul_scalar_f16(fixed_float16_t a, __fp16 b) {
1244 return a * b;
1247 // CHECK-LABEL: @mul_scalar_f32(
1248 // CHECK-NEXT: entry:
1249 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
1250 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
1251 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
1252 // CHECK-NEXT: [[MUL:%.*]] = fmul <16 x float> [[A]], [[SPLAT_SPLAT]]
1253 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
1254 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
1256 fixed_float32_t mul_scalar_f32(fixed_float32_t a, float b) {
1257 return a * b;
1260 // CHECK-LABEL: @mul_scalar_f64(
1261 // CHECK-NEXT: entry:
1262 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
1263 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
1264 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
1265 // CHECK-NEXT: [[MUL:%.*]] = fmul <8 x double> [[A]], [[SPLAT_SPLAT]]
1266 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
1267 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
1269 fixed_float64_t mul_scalar_f64(fixed_float64_t a, double b) {
1270 return a * b;
1273 // DIVISION
1275 // CHECK-LABEL: @div_i8(
1276 // CHECK-NEXT: entry:
1277 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1278 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1279 // CHECK-NEXT: [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[B]]
1280 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
1281 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1283 fixed_int8_t div_i8(fixed_int8_t a, fixed_int8_t b) {
1284 return a / b;
1287 // CHECK-LABEL: @div_i16(
1288 // CHECK-NEXT: entry:
1289 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1290 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1291 // CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[B]]
1292 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
1293 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1295 fixed_int16_t div_i16(fixed_int16_t a, fixed_int16_t b) {
1296 return a / b;
1299 // CHECK-LABEL: @div_i32(
1300 // CHECK-NEXT: entry:
1301 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1302 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1303 // CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[B]]
1304 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
1305 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1307 fixed_int32_t div_i32(fixed_int32_t a, fixed_int32_t b) {
1308 return a / b;
1311 // CHECK-LABEL: @div_i64(
1312 // CHECK-NEXT: entry:
1313 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1314 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1315 // CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[B]]
1316 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
1317 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1319 fixed_int64_t div_i64(fixed_int64_t a, fixed_int64_t b) {
1320 return a / b;
1323 // CHECK-LABEL: @div_u8(
1324 // CHECK-NEXT: entry:
1325 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1326 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1327 // CHECK-NEXT: [[DIV:%.*]] = udiv <64 x i8> [[A]], [[B]]
1328 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
1329 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1331 fixed_uint8_t div_u8(fixed_uint8_t a, fixed_uint8_t b) {
1332 return a / b;
1335 // CHECK-LABEL: @div_u16(
1336 // CHECK-NEXT: entry:
1337 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1338 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1339 // CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i16> [[A]], [[B]]
1340 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
1341 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1343 fixed_uint16_t div_u16(fixed_uint16_t a, fixed_uint16_t b) {
1344 return a / b;
1347 // CHECK-LABEL: @div_u32(
1348 // CHECK-NEXT: entry:
1349 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1350 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1351 // CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i32> [[A]], [[B]]
1352 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
1353 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1355 fixed_uint32_t div_u32(fixed_uint32_t a, fixed_uint32_t b) {
1356 return a / b;
1359 // CHECK-LABEL: @div_u64(
1360 // CHECK-NEXT: entry:
1361 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1362 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1363 // CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i64> [[A]], [[B]]
1364 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
1365 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1367 fixed_uint64_t div_u64(fixed_uint64_t a, fixed_uint64_t b) {
1368 return a / b;
1371 // CHECK-LABEL: @div_f16(
1372 // CHECK-NEXT: entry:
1373 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
1374 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
1375 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
1376 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
1377 // CHECK-NEXT: [[DIV:%.*]] = fdiv <32 x float> [[CONV]], [[CONV2]]
1378 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[DIV]] to <32 x half>
1379 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
1380 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
1382 fixed_float16_t div_f16(fixed_float16_t a, fixed_float16_t b) {
1383 return a / b;
1386 // CHECK-LABEL: @div_f32(
1387 // CHECK-NEXT: entry:
1388 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
1389 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
1390 // CHECK-NEXT: [[DIV:%.*]] = fdiv <16 x float> [[A]], [[B]]
1391 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
1392 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
1394 fixed_float32_t div_f32(fixed_float32_t a, fixed_float32_t b) {
1395 return a / b;
1398 // CHECK-LABEL: @div_f64(
1399 // CHECK-NEXT: entry:
1400 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
1401 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
1402 // CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x double> [[A]], [[B]]
1403 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
1404 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
1406 fixed_float64_t div_f64(fixed_float64_t a, fixed_float64_t b) {
1407 return a / b;
1410 // CHECK-LABEL: @div_inplace_i8(
1411 // CHECK-NEXT: entry:
1412 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1413 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1414 // CHECK-NEXT: [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[B]]
1415 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
1416 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1418 fixed_int8_t div_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
1419 return a / b;
1422 // CHECK-LABEL: @div_inplace_i16(
1423 // CHECK-NEXT: entry:
1424 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1425 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1426 // CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[B]]
1427 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
1428 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1430 fixed_int16_t div_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
1431 return a / b;
1434 // CHECK-LABEL: @div_inplace_i32(
1435 // CHECK-NEXT: entry:
1436 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1437 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1438 // CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[B]]
1439 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
1440 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1442 fixed_int32_t div_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
1443 return a / b;
1446 // CHECK-LABEL: @div_inplace_i64(
1447 // CHECK-NEXT: entry:
1448 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1449 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1450 // CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[B]]
1451 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
1452 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1454 fixed_int64_t div_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
1455 return a / b;
1458 // CHECK-LABEL: @div_inplace_u8(
1459 // CHECK-NEXT: entry:
1460 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1461 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1462 // CHECK-NEXT: [[DIV:%.*]] = udiv <64 x i8> [[A]], [[B]]
1463 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
1464 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1466 fixed_uint8_t div_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
1467 return a / b;
1470 // CHECK-LABEL: @div_inplace_u16(
1471 // CHECK-NEXT: entry:
1472 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1473 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1474 // CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i16> [[A]], [[B]]
1475 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
1476 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1478 fixed_uint16_t div_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
1479 return a / b;
1482 // CHECK-LABEL: @div_inplace_u32(
1483 // CHECK-NEXT: entry:
1484 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1485 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1486 // CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i32> [[A]], [[B]]
1487 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
1488 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1490 fixed_uint32_t div_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
1491 return a / b;
1494 // CHECK-LABEL: @div_inplace_u64(
1495 // CHECK-NEXT: entry:
1496 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1497 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1498 // CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i64> [[A]], [[B]]
1499 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
1500 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1502 fixed_uint64_t div_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
1503 return a / b;
1506 // CHECK-LABEL: @div_inplace_f16(
1507 // CHECK-NEXT: entry:
1508 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
1509 // CHECK-NEXT: [[B:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
1510 // CHECK-NEXT: [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
1511 // CHECK-NEXT: [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
1512 // CHECK-NEXT: [[DIV:%.*]] = fdiv <32 x float> [[CONV]], [[CONV2]]
1513 // CHECK-NEXT: [[CONV3:%.*]] = fptrunc <32 x float> [[DIV]] to <32 x half>
1514 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
1515 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
1517 fixed_float16_t div_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
1518 return a / b;
1521 // CHECK-LABEL: @div_inplace_f32(
1522 // CHECK-NEXT: entry:
1523 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
1524 // CHECK-NEXT: [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
1525 // CHECK-NEXT: [[DIV:%.*]] = fdiv <16 x float> [[A]], [[B]]
1526 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
1527 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
1529 fixed_float32_t div_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
1530 return a / b;
1533 // CHECK-LABEL: @div_inplace_f64(
1534 // CHECK-NEXT: entry:
1535 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
1536 // CHECK-NEXT: [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
1537 // CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x double> [[A]], [[B]]
1538 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
1539 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
1541 fixed_float64_t div_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
1542 return a / b;
1545 // CHECK-LABEL: @div_scalar_i8(
1546 // CHECK-NEXT: entry:
1547 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1548 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
1549 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
1550 // CHECK-NEXT: [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
1551 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
1552 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1554 fixed_int8_t div_scalar_i8(fixed_int8_t a, int8_t b) {
1555 return a / b;
1558 // CHECK-LABEL: @div_scalar_i16(
1559 // CHECK-NEXT: entry:
1560 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1561 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
1562 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
1563 // CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
1564 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
1565 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1567 fixed_int16_t div_scalar_i16(fixed_int16_t a, int16_t b) {
1568 return a / b;
1571 // CHECK-LABEL: @div_scalar_i32(
1572 // CHECK-NEXT: entry:
1573 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1574 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
1575 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1576 // CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
1577 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
1578 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1580 fixed_int32_t div_scalar_i32(fixed_int32_t a, int32_t b) {
1581 return a / b;
1584 // CHECK-LABEL: @div_scalar_i64(
1585 // CHECK-NEXT: entry:
1586 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1587 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
1588 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1589 // CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
1590 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
1591 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1593 fixed_int64_t div_scalar_i64(fixed_int64_t a, int64_t b) {
1594 return a / b;
1597 // CHECK-LABEL: @div_scalar_u8(
1598 // CHECK-NEXT: entry:
1599 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1600 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
1601 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
1602 // CHECK-NEXT: [[DIV:%.*]] = udiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
1603 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
1604 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1606 fixed_uint8_t div_scalar_u8(fixed_uint8_t a, uint8_t b) {
1607 return a / b;
1610 // CHECK-LABEL: @div_scalar_u16(
1611 // CHECK-NEXT: entry:
1612 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1613 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
1614 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
1615 // CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
1616 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
1617 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1619 fixed_uint16_t div_scalar_u16(fixed_uint16_t a, uint16_t b) {
1620 return a / b;
1623 // CHECK-LABEL: @div_scalar_u32(
1624 // CHECK-NEXT: entry:
1625 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1626 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
1627 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1628 // CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
1629 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
1630 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1632 fixed_uint32_t div_scalar_u32(fixed_uint32_t a, uint32_t b) {
1633 return a / b;
1636 // CHECK-LABEL: @div_scalar_u64(
1637 // CHECK-NEXT: entry:
1638 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1639 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
1640 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1641 // CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
1642 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
1643 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1645 fixed_uint64_t div_scalar_u64(fixed_uint64_t a, uint64_t b) {
1646 return a / b;
1649 // CHECK-LABEL: @div_scalar_f16(
1650 // CHECK-NEXT: entry:
1651 // CHECK-NEXT: [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
1652 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
1653 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
1654 // CHECK-NEXT: [[DIV:%.*]] = fdiv <32 x half> [[A]], [[SPLAT_SPLAT]]
1655 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[DIV]], i64 0)
1656 // CHECK-NEXT: ret <vscale x 8 x half> [[CASTSCALABLESVE]]
1658 fixed_float16_t div_scalar_f16(fixed_float16_t a, __fp16 b) {
1659 return a / b;
1662 // CHECK-LABEL: @div_scalar_f32(
1663 // CHECK-NEXT: entry:
1664 // CHECK-NEXT: [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
1665 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
1666 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
1667 // CHECK-NEXT: [[DIV:%.*]] = fdiv <16 x float> [[A]], [[SPLAT_SPLAT]]
1668 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
1669 // CHECK-NEXT: ret <vscale x 4 x float> [[CASTSCALABLESVE]]
1671 fixed_float32_t div_scalar_f32(fixed_float32_t a, float b) {
1672 return a / b;
1675 // CHECK-LABEL: @div_scalar_f64(
1676 // CHECK-NEXT: entry:
1677 // CHECK-NEXT: [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
1678 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
1679 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
1680 // CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x double> [[A]], [[SPLAT_SPLAT]]
1681 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
1682 // CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
1684 fixed_float64_t div_scalar_f64(fixed_float64_t a, double b) {
1685 return a / b;
1688 // REMAINDER
1690 // CHECK-LABEL: @rem_i8(
1691 // CHECK-NEXT: entry:
1692 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1693 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1694 // CHECK-NEXT: [[REM:%.*]] = srem <64 x i8> [[A]], [[B]]
1695 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
1696 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1698 fixed_int8_t rem_i8(fixed_int8_t a, fixed_int8_t b) {
1699 return a % b;
1702 // CHECK-LABEL: @rem_i16(
1703 // CHECK-NEXT: entry:
1704 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1705 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1706 // CHECK-NEXT: [[REM:%.*]] = srem <32 x i16> [[A]], [[B]]
1707 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
1708 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1710 fixed_int16_t rem_i16(fixed_int16_t a, fixed_int16_t b) {
1711 return a % b;
1714 // CHECK-LABEL: @rem_i32(
1715 // CHECK-NEXT: entry:
1716 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1717 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1718 // CHECK-NEXT: [[REM:%.*]] = srem <16 x i32> [[A]], [[B]]
1719 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
1720 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1722 fixed_int32_t rem_i32(fixed_int32_t a, fixed_int32_t b) {
1723 return a % b;
1726 // CHECK-LABEL: @rem_i64(
1727 // CHECK-NEXT: entry:
1728 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1729 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1730 // CHECK-NEXT: [[REM:%.*]] = srem <8 x i64> [[A]], [[B]]
1731 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
1732 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1734 fixed_int64_t rem_i64(fixed_int64_t a, fixed_int64_t b) {
1735 return a % b;
1738 // CHECK-LABEL: @rem_u8(
1739 // CHECK-NEXT: entry:
1740 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1741 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1742 // CHECK-NEXT: [[REM:%.*]] = urem <64 x i8> [[A]], [[B]]
1743 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
1744 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1746 fixed_uint8_t rem_u8(fixed_uint8_t a, fixed_uint8_t b) {
1747 return a % b;
1750 // CHECK-LABEL: @rem_u16(
1751 // CHECK-NEXT: entry:
1752 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1753 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1754 // CHECK-NEXT: [[REM:%.*]] = urem <32 x i16> [[A]], [[B]]
1755 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
1756 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1758 fixed_uint16_t rem_u16(fixed_uint16_t a, fixed_uint16_t b) {
1759 return a % b;
1762 // CHECK-LABEL: @rem_u32(
1763 // CHECK-NEXT: entry:
1764 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1765 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1766 // CHECK-NEXT: [[REM:%.*]] = urem <16 x i32> [[A]], [[B]]
1767 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
1768 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1770 fixed_uint32_t rem_u32(fixed_uint32_t a, fixed_uint32_t b) {
1771 return a % b;
1774 // CHECK-LABEL: @rem_u64(
1775 // CHECK-NEXT: entry:
1776 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1777 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1778 // CHECK-NEXT: [[REM:%.*]] = urem <8 x i64> [[A]], [[B]]
1779 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
1780 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1782 fixed_uint64_t rem_u64(fixed_uint64_t a, fixed_uint64_t b) {
1783 return a % b;
1786 // CHECK-LABEL: @rem_inplace_i8(
1787 // CHECK-NEXT: entry:
1788 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1789 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1790 // CHECK-NEXT: [[REM:%.*]] = srem <64 x i8> [[A]], [[B]]
1791 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
1792 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1794 fixed_int8_t rem_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
1795 return a % b;
1798 // CHECK-LABEL: @rem_inplace_i16(
1799 // CHECK-NEXT: entry:
1800 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1801 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1802 // CHECK-NEXT: [[REM:%.*]] = srem <32 x i16> [[A]], [[B]]
1803 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
1804 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1806 fixed_int16_t rem_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
1807 return a % b;
1810 // CHECK-LABEL: @rem_inplace_i32(
1811 // CHECK-NEXT: entry:
1812 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1813 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1814 // CHECK-NEXT: [[REM:%.*]] = srem <16 x i32> [[A]], [[B]]
1815 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
1816 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1818 fixed_int32_t rem_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
1819 return a % b;
1822 // CHECK-LABEL: @rem_inplace_i64(
1823 // CHECK-NEXT: entry:
1824 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1825 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1826 // CHECK-NEXT: [[REM:%.*]] = srem <8 x i64> [[A]], [[B]]
1827 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
1828 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1830 fixed_int64_t rem_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
1831 return a % b;
1834 // CHECK-LABEL: @rem_inplace_u8(
1835 // CHECK-NEXT: entry:
1836 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1837 // CHECK-NEXT: [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
1838 // CHECK-NEXT: [[REM:%.*]] = urem <64 x i8> [[A]], [[B]]
1839 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
1840 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1842 fixed_uint8_t rem_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
1843 return a % b;
1846 // CHECK-LABEL: @rem_inplace_u16(
1847 // CHECK-NEXT: entry:
1848 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1849 // CHECK-NEXT: [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
1850 // CHECK-NEXT: [[REM:%.*]] = urem <32 x i16> [[A]], [[B]]
1851 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
1852 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1854 fixed_uint16_t rem_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
1855 return a % b;
1858 // CHECK-LABEL: @rem_inplace_u32(
1859 // CHECK-NEXT: entry:
1860 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1861 // CHECK-NEXT: [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
1862 // CHECK-NEXT: [[REM:%.*]] = urem <16 x i32> [[A]], [[B]]
1863 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
1864 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1866 fixed_uint32_t rem_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
1867 return a % b;
1870 // CHECK-LABEL: @rem_inplace_u64(
1871 // CHECK-NEXT: entry:
1872 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1873 // CHECK-NEXT: [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
1874 // CHECK-NEXT: [[REM:%.*]] = urem <8 x i64> [[A]], [[B]]
1875 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
1876 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1878 fixed_uint64_t rem_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
1879 return a % b;
1882 // CHECK-LABEL: @rem_scalar_i8(
1883 // CHECK-NEXT: entry:
1884 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1885 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
1886 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
1887 // CHECK-NEXT: [[REM:%.*]] = srem <64 x i8> [[A]], [[SPLAT_SPLAT]]
1888 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
1889 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1891 fixed_int8_t rem_scalar_i8(fixed_int8_t a, int8_t b) {
1892 return a % b;
1895 // CHECK-LABEL: @rem_scalar_i16(
1896 // CHECK-NEXT: entry:
1897 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1898 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
1899 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
1900 // CHECK-NEXT: [[REM:%.*]] = srem <32 x i16> [[A]], [[SPLAT_SPLAT]]
1901 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
1902 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1904 fixed_int16_t rem_scalar_i16(fixed_int16_t a, int16_t b) {
1905 return a % b;
1908 // CHECK-LABEL: @rem_scalar_i32(
1909 // CHECK-NEXT: entry:
1910 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1911 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
1912 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1913 // CHECK-NEXT: [[REM:%.*]] = srem <16 x i32> [[A]], [[SPLAT_SPLAT]]
1914 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
1915 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1917 fixed_int32_t rem_scalar_i32(fixed_int32_t a, int32_t b) {
1918 return a % b;
1921 // CHECK-LABEL: @rem_scalar_i64(
1922 // CHECK-NEXT: entry:
1923 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1924 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
1925 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1926 // CHECK-NEXT: [[REM:%.*]] = srem <8 x i64> [[A]], [[SPLAT_SPLAT]]
1927 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
1928 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1930 fixed_int64_t rem_scalar_i64(fixed_int64_t a, int64_t b) {
1931 return a % b;
1934 // CHECK-LABEL: @rem_scalar_u8(
1935 // CHECK-NEXT: entry:
1936 // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
1937 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
1938 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
1939 // CHECK-NEXT: [[REM:%.*]] = urem <64 x i8> [[A]], [[SPLAT_SPLAT]]
1940 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
1941 // CHECK-NEXT: ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
1943 fixed_uint8_t rem_scalar_u8(fixed_uint8_t a, uint8_t b) {
1944 return a % b;
1947 // CHECK-LABEL: @rem_scalar_u16(
1948 // CHECK-NEXT: entry:
1949 // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
1950 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
1951 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
1952 // CHECK-NEXT: [[REM:%.*]] = urem <32 x i16> [[A]], [[SPLAT_SPLAT]]
1953 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
1954 // CHECK-NEXT: ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
1956 fixed_uint16_t rem_scalar_u16(fixed_uint16_t a, uint16_t b) {
1957 return a % b;
1960 // CHECK-LABEL: @rem_scalar_u32(
1961 // CHECK-NEXT: entry:
1962 // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
1963 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
1964 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1965 // CHECK-NEXT: [[REM:%.*]] = urem <16 x i32> [[A]], [[SPLAT_SPLAT]]
1966 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
1967 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
1969 fixed_uint32_t rem_scalar_u32(fixed_uint32_t a, uint32_t b) {
1970 return a % b;
1973 // CHECK-LABEL: @rem_scalar_u64(
1974 // CHECK-NEXT: entry:
1975 // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
1976 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
1977 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1978 // CHECK-NEXT: [[REM:%.*]] = urem <8 x i64> [[A]], [[SPLAT_SPLAT]]
1979 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
1980 // CHECK-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
1982 fixed_uint64_t rem_scalar_u64(fixed_uint64_t a, uint64_t b) {
1983 return a % b;