[clang-format] Fix a bug in aligning comments above PPDirective (#72791)
[llvm-project.git] / clang / test / CodeGen / riscv-rvv-vls-arith-ops.c
blob76fcf38a0d98f04d6671e203970055f139c02777
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \
3 // RUN: -target-feature +f -target-feature +d -disable-O0-optnone \
4 // RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \
5 // RUN: opt -S -passes=sroa | FileCheck %s
7 // REQUIRES: riscv-registered-target
9 #include <stdint.h>
11 typedef __rvv_int8m1_t vint8m1_t;
12 typedef __rvv_uint8m1_t vuint8m1_t;
13 typedef __rvv_int16m1_t vint16m1_t;
14 typedef __rvv_uint16m1_t vuint16m1_t;
15 typedef __rvv_int32m1_t vint32m1_t;
16 typedef __rvv_uint32m1_t vuint32m1_t;
17 typedef __rvv_int64m1_t vint64m1_t;
18 typedef __rvv_uint64m1_t vuint64m1_t;
19 typedef __rvv_float32m1_t vfloat32m1_t;
20 typedef __rvv_float64m1_t vfloat64m1_t;
22 typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
23 typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
24 typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
25 typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
27 typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
28 typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
29 typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
30 typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
32 typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
33 typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
35 // ADDITION
37 // CHECK-LABEL: @add_i8(
38 // CHECK-NEXT: entry:
39 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
40 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
41 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
42 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
43 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
45 fixed_int8m1_t add_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
46 return a + b;
49 // CHECK-LABEL: @add_i16(
50 // CHECK-NEXT: entry:
51 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
52 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
53 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
54 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
55 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
57 fixed_int16m1_t add_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
58 return a + b;
61 // CHECK-LABEL: @add_i32(
62 // CHECK-NEXT: entry:
63 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
64 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
65 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
66 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
67 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
69 fixed_int32m1_t add_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
70 return a + b;
73 // CHECK-LABEL: @add_i64(
74 // CHECK-NEXT: entry:
75 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
76 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
77 // CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
78 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
79 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
81 fixed_int64m1_t add_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
82 return a + b;
85 // CHECK-LABEL: @add_u8(
86 // CHECK-NEXT: entry:
87 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
88 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
89 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
90 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
91 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
93 fixed_uint8m1_t add_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
94 return a + b;
97 // CHECK-LABEL: @add_u16(
98 // CHECK-NEXT: entry:
99 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
100 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
101 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
102 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
103 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
105 fixed_uint16m1_t add_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
106 return a + b;
109 // CHECK-LABEL: @add_u32(
110 // CHECK-NEXT: entry:
111 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
112 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
113 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
114 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
115 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
117 fixed_uint32m1_t add_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
118 return a + b;
121 // CHECK-LABEL: @add_u64(
122 // CHECK-NEXT: entry:
123 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
124 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
125 // CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
126 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
127 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
129 fixed_uint64m1_t add_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
130 return a + b;
133 // CHECK-LABEL: @add_f32(
134 // CHECK-NEXT: entry:
135 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
136 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
137 // CHECK-NEXT: [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]]
138 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
139 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
141 fixed_float32m1_t add_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
142 return a + b;
145 // CHECK-LABEL: @add_f64(
146 // CHECK-NEXT: entry:
147 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
148 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
149 // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]]
150 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
151 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
153 fixed_float64m1_t add_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
154 return a + b;
157 // CHECK-LABEL: @add_inplace_i8(
158 // CHECK-NEXT: entry:
159 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
160 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
161 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
162 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
163 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
165 fixed_int8m1_t add_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
166 return a += b;
169 // CHECK-LABEL: @add_inplace_i16(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
172 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
173 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
174 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
175 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
177 fixed_int16m1_t add_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
178 return a += b;
181 // CHECK-LABEL: @add_inplace_i32(
182 // CHECK-NEXT: entry:
183 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
184 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
185 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
186 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
187 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
189 fixed_int32m1_t add_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
190 return a += b;
193 // CHECK-LABEL: @add_inplace_i64(
194 // CHECK-NEXT: entry:
195 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
196 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
197 // CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
198 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
199 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
201 fixed_int64m1_t add_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
202 return a += b;
205 // CHECK-LABEL: @add_inplace_u8(
206 // CHECK-NEXT: entry:
207 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
208 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
209 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
210 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
211 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
213 fixed_uint8m1_t add_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
214 return a += b;
217 // CHECK-LABEL: @add_inplace_u16(
218 // CHECK-NEXT: entry:
219 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
220 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
221 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
222 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
223 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
225 fixed_uint16m1_t add_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
226 return a += b;
229 // CHECK-LABEL: @add_inplace_u32(
230 // CHECK-NEXT: entry:
231 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
232 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
233 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
234 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
235 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
237 fixed_uint32m1_t add_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
238 return a += b;
241 // CHECK-LABEL: @add_inplace_u64(
242 // CHECK-NEXT: entry:
243 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
244 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
245 // CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
246 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
247 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
249 fixed_uint64m1_t add_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
250 return a += b;
253 // CHECK-LABEL: @add_inplace_f32(
254 // CHECK-NEXT: entry:
255 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
256 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
257 // CHECK-NEXT: [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]]
258 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
259 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
261 fixed_float32m1_t add_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
262 return a += b;
265 // CHECK-LABEL: @add_inplace_f64(
266 // CHECK-NEXT: entry:
267 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
268 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
269 // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]]
270 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
271 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
273 fixed_float64m1_t add_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
274 return a += b;
277 // CHECK-LABEL: @add_scalar_i8(
278 // CHECK-NEXT: entry:
279 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
280 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
281 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
282 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]]
283 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
284 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
286 fixed_int8m1_t add_scalar_i8(fixed_int8m1_t a, int8_t b) {
287 return a + b;
290 // CHECK-LABEL: @add_scalar_i16(
291 // CHECK-NEXT: entry:
292 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
293 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
294 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
295 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]]
296 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
297 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
299 fixed_int16m1_t add_scalar_i16(fixed_int16m1_t a, int16_t b) {
300 return a + b;
303 // CHECK-LABEL: @add_scalar_i32(
304 // CHECK-NEXT: entry:
305 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
306 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
307 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
308 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]]
309 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
310 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
312 fixed_int32m1_t add_scalar_i32(fixed_int32m1_t a, int32_t b) {
313 return a + b;
316 // CHECK-LABEL: @add_scalar_i64(
317 // CHECK-NEXT: entry:
318 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
319 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
320 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
321 // CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]]
322 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
323 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
325 fixed_int64m1_t add_scalar_i64(fixed_int64m1_t a, int64_t b) {
326 return a + b;
329 // CHECK-LABEL: @add_scalar_u8(
330 // CHECK-NEXT: entry:
331 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
332 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
333 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
334 // CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]]
335 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
336 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
338 fixed_uint8m1_t add_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
339 return a + b;
342 // CHECK-LABEL: @add_scalar_u16(
343 // CHECK-NEXT: entry:
344 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
345 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
346 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
347 // CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]]
348 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
349 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
351 fixed_uint16m1_t add_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
352 return a + b;
355 // CHECK-LABEL: @add_scalar_u32(
356 // CHECK-NEXT: entry:
357 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
358 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
359 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
360 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]]
361 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
362 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
364 fixed_uint32m1_t add_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
365 return a + b;
368 // CHECK-LABEL: @add_scalar_u64(
369 // CHECK-NEXT: entry:
370 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
371 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
372 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
373 // CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]]
374 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
375 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
377 fixed_uint64m1_t add_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
378 return a + b;
381 // CHECK-LABEL: @add_scalar_f32(
382 // CHECK-NEXT: entry:
383 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
384 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
385 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
386 // CHECK-NEXT: [[ADD:%.*]] = fadd <8 x float> [[A]], [[SPLAT_SPLAT]]
387 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
388 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
390 fixed_float32m1_t add_scalar_f32(fixed_float32m1_t a, float b) {
391 return a + b;
394 // CHECK-LABEL: @add_scalar_f64(
395 // CHECK-NEXT: entry:
396 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
397 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
398 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
399 // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x double> [[A]], [[SPLAT_SPLAT]]
400 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
401 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
403 fixed_float64m1_t add_scalar_f64(fixed_float64m1_t a, double b) {
404 return a + b;
407 // SUBTRACTION
409 // CHECK-LABEL: @sub_i8(
410 // CHECK-NEXT: entry:
411 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
412 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
413 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
414 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
415 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
417 fixed_int8m1_t sub_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
418 return a - b;
421 // CHECK-LABEL: @sub_i16(
422 // CHECK-NEXT: entry:
423 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
424 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
425 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
426 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
427 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
429 fixed_int16m1_t sub_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
430 return a - b;
433 // CHECK-LABEL: @sub_i32(
434 // CHECK-NEXT: entry:
435 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
436 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
437 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
438 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
439 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
441 fixed_int32m1_t sub_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
442 return a - b;
445 // CHECK-LABEL: @sub_i64(
446 // CHECK-NEXT: entry:
447 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
448 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
449 // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
450 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
451 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
453 fixed_int64m1_t sub_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
454 return a - b;
457 // CHECK-LABEL: @sub_u8(
458 // CHECK-NEXT: entry:
459 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
460 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
461 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
462 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
463 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
465 fixed_uint8m1_t sub_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
466 return a - b;
469 // CHECK-LABEL: @sub_u16(
470 // CHECK-NEXT: entry:
471 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
472 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
473 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
474 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
475 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
477 fixed_uint16m1_t sub_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
478 return a - b;
481 // CHECK-LABEL: @sub_u32(
482 // CHECK-NEXT: entry:
483 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
484 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
485 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
486 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
487 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
489 fixed_uint32m1_t sub_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
490 return a - b;
493 // CHECK-LABEL: @sub_u64(
494 // CHECK-NEXT: entry:
495 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
496 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
497 // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
498 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
499 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
501 fixed_uint64m1_t sub_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
502 return a - b;
505 // CHECK-LABEL: @sub_f32(
506 // CHECK-NEXT: entry:
507 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
508 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
509 // CHECK-NEXT: [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]]
510 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
511 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
513 fixed_float32m1_t sub_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
514 return a - b;
517 // CHECK-LABEL: @sub_f64(
518 // CHECK-NEXT: entry:
519 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
520 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
521 // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]]
522 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
523 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
525 fixed_float64m1_t sub_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
526 return a - b;
529 // CHECK-LABEL: @sub_inplace_i8(
530 // CHECK-NEXT: entry:
531 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
532 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
533 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
534 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
535 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
537 fixed_int8m1_t sub_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
538 return a - b;
541 // CHECK-LABEL: @sub_inplace_i16(
542 // CHECK-NEXT: entry:
543 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
544 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
545 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
546 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
547 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
549 fixed_int16m1_t sub_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
550 return a - b;
553 // CHECK-LABEL: @sub_inplace_i32(
554 // CHECK-NEXT: entry:
555 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
556 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
557 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
558 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
559 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
561 fixed_int32m1_t sub_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
562 return a - b;
565 // CHECK-LABEL: @sub_inplace_i64(
566 // CHECK-NEXT: entry:
567 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
568 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
569 // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
570 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
571 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
573 fixed_int64m1_t sub_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
574 return a - b;
577 // CHECK-LABEL: @sub_inplace_u8(
578 // CHECK-NEXT: entry:
579 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
580 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
581 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
582 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
583 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
585 fixed_uint8m1_t sub_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
586 return a - b;
589 // CHECK-LABEL: @sub_inplace_u16(
590 // CHECK-NEXT: entry:
591 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
592 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
593 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
594 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
595 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
597 fixed_uint16m1_t sub_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
598 return a - b;
601 // CHECK-LABEL: @sub_inplace_u32(
602 // CHECK-NEXT: entry:
603 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
604 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
605 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
606 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
607 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
609 fixed_uint32m1_t sub_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
610 return a - b;
613 // CHECK-LABEL: @sub_inplace_u64(
614 // CHECK-NEXT: entry:
615 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
616 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
617 // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
618 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
619 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
621 fixed_uint64m1_t sub_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
622 return a - b;
625 // CHECK-LABEL: @sub_inplace_f32(
626 // CHECK-NEXT: entry:
627 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
628 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
629 // CHECK-NEXT: [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]]
630 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
631 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
633 fixed_float32m1_t sub_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
634 return a - b;
637 // CHECK-LABEL: @sub_inplace_f64(
638 // CHECK-NEXT: entry:
639 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
640 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
641 // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]]
642 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
643 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
645 fixed_float64m1_t sub_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
646 return a - b;
649 // CHECK-LABEL: @sub_scalar_i8(
650 // CHECK-NEXT: entry:
651 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
652 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
653 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
654 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]]
655 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
656 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
658 fixed_int8m1_t sub_scalar_i8(fixed_int8m1_t a, int8_t b) {
659 return a - b;
662 // CHECK-LABEL: @sub_scalar_i16(
663 // CHECK-NEXT: entry:
664 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
665 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
666 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
667 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]]
668 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
669 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
671 fixed_int16m1_t sub_scalar_i16(fixed_int16m1_t a, int16_t b) {
672 return a - b;
675 // CHECK-LABEL: @sub_scalar_i32(
676 // CHECK-NEXT: entry:
677 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
678 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
679 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
680 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]]
681 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
682 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
684 fixed_int32m1_t sub_scalar_i32(fixed_int32m1_t a, int32_t b) {
685 return a - b;
688 // CHECK-LABEL: @sub_scalar_i64(
689 // CHECK-NEXT: entry:
690 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
691 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
692 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
693 // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]]
694 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
695 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
697 fixed_int64m1_t sub_scalar_i64(fixed_int64m1_t a, int64_t b) {
698 return a - b;
701 // CHECK-LABEL: @sub_scalar_u8(
702 // CHECK-NEXT: entry:
703 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
704 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
705 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
706 // CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]]
707 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
708 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
710 fixed_uint8m1_t sub_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
711 return a - b;
714 // CHECK-LABEL: @sub_scalar_u16(
715 // CHECK-NEXT: entry:
716 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
717 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
718 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
719 // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]]
720 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
721 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
723 fixed_uint16m1_t sub_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
724 return a - b;
727 // CHECK-LABEL: @sub_scalar_u32(
728 // CHECK-NEXT: entry:
729 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
730 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
731 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
732 // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]]
733 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
734 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
736 fixed_uint32m1_t sub_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
737 return a - b;
740 // CHECK-LABEL: @sub_scalar_u64(
741 // CHECK-NEXT: entry:
742 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
743 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
744 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
745 // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]]
746 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
747 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
749 fixed_uint64m1_t sub_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
750 return a - b;
753 // CHECK-LABEL: @sub_scalar_f32(
754 // CHECK-NEXT: entry:
755 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
756 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
757 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
758 // CHECK-NEXT: [[SUB:%.*]] = fsub <8 x float> [[A]], [[SPLAT_SPLAT]]
759 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
760 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
762 fixed_float32m1_t sub_scalar_f32(fixed_float32m1_t a, float b) {
763 return a - b;
766 // CHECK-LABEL: @sub_scalar_f64(
767 // CHECK-NEXT: entry:
768 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
769 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
770 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
771 // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x double> [[A]], [[SPLAT_SPLAT]]
772 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
773 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
775 fixed_float64m1_t sub_scalar_f64(fixed_float64m1_t a, double b) {
776 return a - b;
779 // MULTIPLICATION
781 // CHECK-LABEL: @mul_i8(
782 // CHECK-NEXT: entry:
783 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
784 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
785 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
786 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
787 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
789 fixed_int8m1_t mul_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
790 return a * b;
793 // CHECK-LABEL: @mul_i16(
794 // CHECK-NEXT: entry:
795 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
796 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
797 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
798 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
799 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
801 fixed_int16m1_t mul_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
802 return a * b;
805 // CHECK-LABEL: @mul_i32(
806 // CHECK-NEXT: entry:
807 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
808 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
809 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
810 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
811 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
813 fixed_int32m1_t mul_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
814 return a * b;
817 // CHECK-LABEL: @mul_i64(
818 // CHECK-NEXT: entry:
819 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
820 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
821 // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
822 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
823 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
825 fixed_int64m1_t mul_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
826 return a * b;
829 // CHECK-LABEL: @mul_u8(
830 // CHECK-NEXT: entry:
831 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
832 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
833 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
834 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
835 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
837 fixed_uint8m1_t mul_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
838 return a * b;
841 // CHECK-LABEL: @mul_u16(
842 // CHECK-NEXT: entry:
843 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
844 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
845 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
846 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
847 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
849 fixed_uint16m1_t mul_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
850 return a * b;
853 // CHECK-LABEL: @mul_u32(
854 // CHECK-NEXT: entry:
855 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
856 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
857 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
858 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
859 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
861 fixed_uint32m1_t mul_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
862 return a * b;
865 // CHECK-LABEL: @mul_u64(
866 // CHECK-NEXT: entry:
867 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
868 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
869 // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
870 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
871 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
873 fixed_uint64m1_t mul_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
874 return a * b;
877 // CHECK-LABEL: @mul_f32(
878 // CHECK-NEXT: entry:
879 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
880 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
881 // CHECK-NEXT: [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]]
882 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
883 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
885 fixed_float32m1_t mul_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
886 return a * b;
889 // CHECK-LABEL: @mul_f64(
890 // CHECK-NEXT: entry:
891 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
892 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
893 // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]]
894 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
895 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
897 fixed_float64m1_t mul_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
898 return a * b;
901 // CHECK-LABEL: @mul_inplace_i8(
902 // CHECK-NEXT: entry:
903 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
904 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
905 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
906 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
907 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
909 fixed_int8m1_t mul_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
910 return a * b;
913 // CHECK-LABEL: @mul_inplace_i16(
914 // CHECK-NEXT: entry:
915 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
916 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
917 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
918 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
919 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
921 fixed_int16m1_t mul_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
922 return a * b;
925 // CHECK-LABEL: @mul_inplace_i32(
926 // CHECK-NEXT: entry:
927 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
928 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
929 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
930 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
931 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
933 fixed_int32m1_t mul_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
934 return a * b;
937 // CHECK-LABEL: @mul_inplace_i64(
938 // CHECK-NEXT: entry:
939 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
940 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
941 // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
942 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
943 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
945 fixed_int64m1_t mul_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
946 return a * b;
949 // CHECK-LABEL: @mul_inplace_u8(
950 // CHECK-NEXT: entry:
951 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
952 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
953 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
954 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
955 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
957 fixed_uint8m1_t mul_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
958 return a * b;
961 // CHECK-LABEL: @mul_inplace_u16(
962 // CHECK-NEXT: entry:
963 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
964 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
965 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
966 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
967 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
969 fixed_uint16m1_t mul_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
970 return a * b;
973 // CHECK-LABEL: @mul_inplace_u32(
974 // CHECK-NEXT: entry:
975 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
976 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
977 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
978 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
979 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
981 fixed_uint32m1_t mul_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
982 return a * b;
985 // CHECK-LABEL: @mul_inplace_u64(
986 // CHECK-NEXT: entry:
987 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
988 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
989 // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
990 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
991 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
993 fixed_uint64m1_t mul_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
994 return a * b;
997 // CHECK-LABEL: @mul_inplace_f32(
998 // CHECK-NEXT: entry:
999 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
1000 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
1001 // CHECK-NEXT: [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]]
1002 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
1003 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
1005 fixed_float32m1_t mul_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
1006 return a * b;
1009 // CHECK-LABEL: @mul_inplace_f64(
1010 // CHECK-NEXT: entry:
1011 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
1012 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
1013 // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]]
1014 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
1015 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
1017 fixed_float64m1_t mul_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
1018 return a * b;
1021 // CHECK-LABEL: @mul_scalar_i8(
1022 // CHECK-NEXT: entry:
1023 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1024 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
1025 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
1026 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]]
1027 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
1028 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1030 fixed_int8m1_t mul_scalar_i8(fixed_int8m1_t a, int8_t b) {
1031 return a * b;
1034 // CHECK-LABEL: @mul_scalar_i16(
1035 // CHECK-NEXT: entry:
1036 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1037 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
1038 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
1039 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]]
1040 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
1041 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1043 fixed_int16m1_t mul_scalar_i16(fixed_int16m1_t a, int16_t b) {
1044 return a * b;
1047 // CHECK-LABEL: @mul_scalar_i32(
1048 // CHECK-NEXT: entry:
1049 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1050 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
1051 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
1052 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]]
1053 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
1054 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1056 fixed_int32m1_t mul_scalar_i32(fixed_int32m1_t a, int32_t b) {
1057 return a * b;
1060 // CHECK-LABEL: @mul_scalar_i64(
1061 // CHECK-NEXT: entry:
1062 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1063 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
1064 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1065 // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]]
1066 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
1067 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1069 fixed_int64m1_t mul_scalar_i64(fixed_int64m1_t a, int64_t b) {
1070 return a * b;
1073 // CHECK-LABEL: @mul_scalar_u8(
1074 // CHECK-NEXT: entry:
1075 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1076 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
1077 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
1078 // CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]]
1079 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
1080 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1082 fixed_uint8m1_t mul_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
1083 return a * b;
1086 // CHECK-LABEL: @mul_scalar_u16(
1087 // CHECK-NEXT: entry:
1088 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1089 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
1090 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
1091 // CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]]
1092 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
1093 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1095 fixed_uint16m1_t mul_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
1096 return a * b;
1099 // CHECK-LABEL: @mul_scalar_u32(
1100 // CHECK-NEXT: entry:
1101 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1102 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
1103 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
1104 // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]]
1105 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
1106 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1108 fixed_uint32m1_t mul_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
1109 return a * b;
1112 // CHECK-LABEL: @mul_scalar_u64(
1113 // CHECK-NEXT: entry:
1114 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1115 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
1116 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1117 // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]]
1118 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
1119 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1121 fixed_uint64m1_t mul_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
1122 return a * b;
1125 // CHECK-LABEL: @mul_scalar_f32(
1126 // CHECK-NEXT: entry:
1127 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
1128 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
1129 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1130 // CHECK-NEXT: [[MUL:%.*]] = fmul <8 x float> [[A]], [[SPLAT_SPLAT]]
1131 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
1132 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
1134 fixed_float32m1_t mul_scalar_f32(fixed_float32m1_t a, float b) {
1135 return a * b;
1138 // CHECK-LABEL: @mul_scalar_f64(
1139 // CHECK-NEXT: entry:
1140 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
1141 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
1142 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1143 // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[A]], [[SPLAT_SPLAT]]
1144 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
1145 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
1147 fixed_float64m1_t mul_scalar_f64(fixed_float64m1_t a, double b) {
1148 return a * b;
1151 // DIVISION
1153 // CHECK-LABEL: @div_i8(
1154 // CHECK-NEXT: entry:
1155 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1156 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1157 // CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]]
1158 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
1159 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1161 fixed_int8m1_t div_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
1162 return a / b;
1165 // CHECK-LABEL: @div_i16(
1166 // CHECK-NEXT: entry:
1167 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1168 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1169 // CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]]
1170 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
1171 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1173 fixed_int16m1_t div_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
1174 return a / b;
1177 // CHECK-LABEL: @div_i32(
1178 // CHECK-NEXT: entry:
1179 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1180 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1181 // CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]]
1182 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
1183 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1185 fixed_int32m1_t div_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
1186 return a / b;
1189 // CHECK-LABEL: @div_i64(
1190 // CHECK-NEXT: entry:
1191 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1192 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1193 // CHECK-NEXT: [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]]
1194 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
1195 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1197 fixed_int64m1_t div_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
1198 return a / b;
1201 // CHECK-LABEL: @div_u8(
1202 // CHECK-NEXT: entry:
1203 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1204 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1205 // CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]]
1206 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
1207 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1209 fixed_uint8m1_t div_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
1210 return a / b;
1213 // CHECK-LABEL: @div_u16(
1214 // CHECK-NEXT: entry:
1215 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1216 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1217 // CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]]
1218 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
1219 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1221 fixed_uint16m1_t div_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
1222 return a / b;
1225 // CHECK-LABEL: @div_u32(
1226 // CHECK-NEXT: entry:
1227 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1228 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1229 // CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]]
1230 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
1231 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1233 fixed_uint32m1_t div_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
1234 return a / b;
1237 // CHECK-LABEL: @div_u64(
1238 // CHECK-NEXT: entry:
1239 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1240 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1241 // CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]]
1242 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
1243 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1245 fixed_uint64m1_t div_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
1246 return a / b;
1249 // CHECK-LABEL: @div_f32(
1250 // CHECK-NEXT: entry:
1251 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
1252 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
1253 // CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]]
1254 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
1255 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
1257 fixed_float32m1_t div_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
1258 return a / b;
1261 // CHECK-LABEL: @div_f64(
1262 // CHECK-NEXT: entry:
1263 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
1264 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
1265 // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]]
1266 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
1267 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
1269 fixed_float64m1_t div_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
1270 return a / b;
1273 // CHECK-LABEL: @div_inplace_i8(
1274 // CHECK-NEXT: entry:
1275 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1276 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1277 // CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]]
1278 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
1279 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1281 fixed_int8m1_t div_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
1282 return a / b;
1285 // CHECK-LABEL: @div_inplace_i16(
1286 // CHECK-NEXT: entry:
1287 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1288 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1289 // CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]]
1290 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
1291 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1293 fixed_int16m1_t div_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
1294 return a / b;
1297 // CHECK-LABEL: @div_inplace_i32(
1298 // CHECK-NEXT: entry:
1299 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1300 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1301 // CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]]
1302 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
1303 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1305 fixed_int32m1_t div_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
1306 return a / b;
1309 // CHECK-LABEL: @div_inplace_i64(
1310 // CHECK-NEXT: entry:
1311 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1312 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1313 // CHECK-NEXT: [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]]
1314 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
1315 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1317 fixed_int64m1_t div_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
1318 return a / b;
1321 // CHECK-LABEL: @div_inplace_u8(
1322 // CHECK-NEXT: entry:
1323 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1324 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1325 // CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]]
1326 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
1327 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1329 fixed_uint8m1_t div_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
1330 return a / b;
1333 // CHECK-LABEL: @div_inplace_u16(
1334 // CHECK-NEXT: entry:
1335 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1336 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1337 // CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]]
1338 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
1339 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1341 fixed_uint16m1_t div_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
1342 return a / b;
1345 // CHECK-LABEL: @div_inplace_u32(
1346 // CHECK-NEXT: entry:
1347 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1348 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1349 // CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]]
1350 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
1351 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1353 fixed_uint32m1_t div_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
1354 return a / b;
1357 // CHECK-LABEL: @div_inplace_u64(
1358 // CHECK-NEXT: entry:
1359 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1360 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1361 // CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]]
1362 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
1363 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1365 fixed_uint64m1_t div_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
1366 return a / b;
1369 // CHECK-LABEL: @div_inplace_f32(
1370 // CHECK-NEXT: entry:
1371 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
1372 // CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
1373 // CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]]
1374 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
1375 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
1377 fixed_float32m1_t div_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
1378 return a / b;
1381 // CHECK-LABEL: @div_inplace_f64(
1382 // CHECK-NEXT: entry:
1383 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
1384 // CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
1385 // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]]
1386 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
1387 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
1389 fixed_float64m1_t div_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
1390 return a / b;
1393 // CHECK-LABEL: @div_scalar_i8(
1394 // CHECK-NEXT: entry:
1395 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1396 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
1397 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
1398 // CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[SPLAT_SPLAT]]
1399 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
1400 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1402 fixed_int8m1_t div_scalar_i8(fixed_int8m1_t a, int8_t b) {
1403 return a / b;
1406 // CHECK-LABEL: @div_scalar_i16(
1407 // CHECK-NEXT: entry:
1408 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1409 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
1410 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
1411 // CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[SPLAT_SPLAT]]
1412 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
1413 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1415 fixed_int16m1_t div_scalar_i16(fixed_int16m1_t a, int16_t b) {
1416 return a / b;
1419 // CHECK-LABEL: @div_scalar_i32(
1420 // CHECK-NEXT: entry:
1421 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1422 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
1423 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
1424 // CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[SPLAT_SPLAT]]
1425 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
1426 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1428 fixed_int32m1_t div_scalar_i32(fixed_int32m1_t a, int32_t b) {
1429 return a / b;
1432 // CHECK-LABEL: @div_scalar_i64(
1433 // CHECK-NEXT: entry:
1434 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1435 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
1436 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1437 // CHECK-NEXT: [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[SPLAT_SPLAT]]
1438 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
1439 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1441 fixed_int64m1_t div_scalar_i64(fixed_int64m1_t a, int64_t b) {
1442 return a / b;
1445 // CHECK-LABEL: @div_scalar_u8(
1446 // CHECK-NEXT: entry:
1447 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1448 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
1449 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
1450 // CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i8> [[A]], [[SPLAT_SPLAT]]
1451 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
1452 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1454 fixed_uint8m1_t div_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
1455 return a / b;
1458 // CHECK-LABEL: @div_scalar_u16(
1459 // CHECK-NEXT: entry:
1460 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1461 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
1462 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
1463 // CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i16> [[A]], [[SPLAT_SPLAT]]
1464 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
1465 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1467 fixed_uint16m1_t div_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
1468 return a / b;
1471 // CHECK-LABEL: @div_scalar_u32(
1472 // CHECK-NEXT: entry:
1473 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1474 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
1475 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
1476 // CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i32> [[A]], [[SPLAT_SPLAT]]
1477 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
1478 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1480 fixed_uint32m1_t div_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
1481 return a / b;
1484 // CHECK-LABEL: @div_scalar_u64(
1485 // CHECK-NEXT: entry:
1486 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1487 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
1488 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1489 // CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i64> [[A]], [[SPLAT_SPLAT]]
1490 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
1491 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1493 fixed_uint64m1_t div_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
1494 return a / b;
1497 // CHECK-LABEL: @div_scalar_f32(
1498 // CHECK-NEXT: entry:
1499 // CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
1500 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
1501 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1502 // CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[A]], [[SPLAT_SPLAT]]
1503 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
1504 // CHECK-NEXT: ret <vscale x 2 x float> [[CAST_SCALABLE]]
1506 fixed_float32m1_t div_scalar_f32(fixed_float32m1_t a, float b) {
1507 return a / b;
1510 // CHECK-LABEL: @div_scalar_f64(
1511 // CHECK-NEXT: entry:
1512 // CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
1513 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
1514 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1515 // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x double> [[A]], [[SPLAT_SPLAT]]
1516 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
1517 // CHECK-NEXT: ret <vscale x 1 x double> [[CAST_SCALABLE]]
1519 fixed_float64m1_t div_scalar_f64(fixed_float64m1_t a, double b) {
1520 return a / b;
1523 // REMAINDER
1525 // CHECK-LABEL: @rem_i8(
1526 // CHECK-NEXT: entry:
1527 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1528 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1529 // CHECK-NEXT: [[REM:%.*]] = srem <32 x i8> [[A]], [[B]]
1530 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
1531 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1533 fixed_int8m1_t rem_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
1534 return a % b;
1537 // CHECK-LABEL: @rem_i16(
1538 // CHECK-NEXT: entry:
1539 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1540 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1541 // CHECK-NEXT: [[REM:%.*]] = srem <16 x i16> [[A]], [[B]]
1542 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
1543 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1545 fixed_int16m1_t rem_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
1546 return a % b;
1549 // CHECK-LABEL: @rem_i32(
1550 // CHECK-NEXT: entry:
1551 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1552 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1553 // CHECK-NEXT: [[REM:%.*]] = srem <8 x i32> [[A]], [[B]]
1554 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
1555 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1557 fixed_int32m1_t rem_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
1558 return a % b;
1561 // CHECK-LABEL: @rem_i64(
1562 // CHECK-NEXT: entry:
1563 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1564 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1565 // CHECK-NEXT: [[REM:%.*]] = srem <4 x i64> [[A]], [[B]]
1566 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
1567 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1569 fixed_int64m1_t rem_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
1570 return a % b;
1573 // CHECK-LABEL: @rem_u8(
1574 // CHECK-NEXT: entry:
1575 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1576 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1577 // CHECK-NEXT: [[REM:%.*]] = urem <32 x i8> [[A]], [[B]]
1578 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
1579 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1581 fixed_uint8m1_t rem_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
1582 return a % b;
1585 // CHECK-LABEL: @rem_u16(
1586 // CHECK-NEXT: entry:
1587 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1588 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1589 // CHECK-NEXT: [[REM:%.*]] = urem <16 x i16> [[A]], [[B]]
1590 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
1591 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1593 fixed_uint16m1_t rem_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
1594 return a % b;
1597 // CHECK-LABEL: @rem_u32(
1598 // CHECK-NEXT: entry:
1599 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1600 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1601 // CHECK-NEXT: [[REM:%.*]] = urem <8 x i32> [[A]], [[B]]
1602 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
1603 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1605 fixed_uint32m1_t rem_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
1606 return a % b;
1609 // CHECK-LABEL: @rem_u64(
1610 // CHECK-NEXT: entry:
1611 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1612 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1613 // CHECK-NEXT: [[REM:%.*]] = urem <4 x i64> [[A]], [[B]]
1614 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
1615 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1617 fixed_uint64m1_t rem_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
1618 return a % b;
1621 // CHECK-LABEL: @rem_inplace_i8(
1622 // CHECK-NEXT: entry:
1623 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1624 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1625 // CHECK-NEXT: [[REM:%.*]] = srem <32 x i8> [[A]], [[B]]
1626 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
1627 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1629 fixed_int8m1_t rem_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
1630 return a % b;
1633 // CHECK-LABEL: @rem_inplace_i16(
1634 // CHECK-NEXT: entry:
1635 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1636 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1637 // CHECK-NEXT: [[REM:%.*]] = srem <16 x i16> [[A]], [[B]]
1638 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
1639 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1641 fixed_int16m1_t rem_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
1642 return a % b;
1645 // CHECK-LABEL: @rem_inplace_i32(
1646 // CHECK-NEXT: entry:
1647 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1648 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1649 // CHECK-NEXT: [[REM:%.*]] = srem <8 x i32> [[A]], [[B]]
1650 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
1651 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1653 fixed_int32m1_t rem_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
1654 return a % b;
1657 // CHECK-LABEL: @rem_inplace_i64(
1658 // CHECK-NEXT: entry:
1659 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1660 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1661 // CHECK-NEXT: [[REM:%.*]] = srem <4 x i64> [[A]], [[B]]
1662 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
1663 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1665 fixed_int64m1_t rem_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
1666 return a % b;
1669 // CHECK-LABEL: @rem_inplace_u8(
1670 // CHECK-NEXT: entry:
1671 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1672 // CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
1673 // CHECK-NEXT: [[REM:%.*]] = urem <32 x i8> [[A]], [[B]]
1674 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
1675 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1677 fixed_uint8m1_t rem_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
1678 return a % b;
1681 // CHECK-LABEL: @rem_inplace_u16(
1682 // CHECK-NEXT: entry:
1683 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1684 // CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
1685 // CHECK-NEXT: [[REM:%.*]] = urem <16 x i16> [[A]], [[B]]
1686 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
1687 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1689 fixed_uint16m1_t rem_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
1690 return a % b;
1693 // CHECK-LABEL: @rem_inplace_u32(
1694 // CHECK-NEXT: entry:
1695 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1696 // CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
1697 // CHECK-NEXT: [[REM:%.*]] = urem <8 x i32> [[A]], [[B]]
1698 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
1699 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1701 fixed_uint32m1_t rem_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
1702 return a % b;
1705 // CHECK-LABEL: @rem_inplace_u64(
1706 // CHECK-NEXT: entry:
1707 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1708 // CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
1709 // CHECK-NEXT: [[REM:%.*]] = urem <4 x i64> [[A]], [[B]]
1710 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
1711 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1713 fixed_uint64m1_t rem_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
1714 return a % b;
1717 // CHECK-LABEL: @rem_scalar_i8(
1718 // CHECK-NEXT: entry:
1719 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1720 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
1721 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
1722 // CHECK-NEXT: [[REM:%.*]] = srem <32 x i8> [[A]], [[SPLAT_SPLAT]]
1723 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
1724 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1726 fixed_int8m1_t rem_scalar_i8(fixed_int8m1_t a, int8_t b) {
1727 return a % b;
1730 // CHECK-LABEL: @rem_scalar_i16(
1731 // CHECK-NEXT: entry:
1732 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1733 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
1734 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
1735 // CHECK-NEXT: [[REM:%.*]] = srem <16 x i16> [[A]], [[SPLAT_SPLAT]]
1736 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
1737 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1739 fixed_int16m1_t rem_scalar_i16(fixed_int16m1_t a, int16_t b) {
1740 return a % b;
1743 // CHECK-LABEL: @rem_scalar_i32(
1744 // CHECK-NEXT: entry:
1745 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1746 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
1747 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
1748 // CHECK-NEXT: [[REM:%.*]] = srem <8 x i32> [[A]], [[SPLAT_SPLAT]]
1749 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
1750 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1752 fixed_int32m1_t rem_scalar_i32(fixed_int32m1_t a, int32_t b) {
1753 return a % b;
1756 // CHECK-LABEL: @rem_scalar_i64(
1757 // CHECK-NEXT: entry:
1758 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1759 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
1760 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1761 // CHECK-NEXT: [[REM:%.*]] = srem <4 x i64> [[A]], [[SPLAT_SPLAT]]
1762 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
1763 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1765 fixed_int64m1_t rem_scalar_i64(fixed_int64m1_t a, int64_t b) {
1766 return a % b;
1769 // CHECK-LABEL: @rem_scalar_u8(
1770 // CHECK-NEXT: entry:
1771 // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
1772 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
1773 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
1774 // CHECK-NEXT: [[REM:%.*]] = urem <32 x i8> [[A]], [[SPLAT_SPLAT]]
1775 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
1776 // CHECK-NEXT: ret <vscale x 8 x i8> [[CAST_SCALABLE]]
1778 fixed_uint8m1_t rem_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
1779 return a % b;
1782 // CHECK-LABEL: @rem_scalar_u16(
1783 // CHECK-NEXT: entry:
1784 // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
1785 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
1786 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
1787 // CHECK-NEXT: [[REM:%.*]] = urem <16 x i16> [[A]], [[SPLAT_SPLAT]]
1788 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
1789 // CHECK-NEXT: ret <vscale x 4 x i16> [[CAST_SCALABLE]]
1791 fixed_uint16m1_t rem_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
1792 return a % b;
1795 // CHECK-LABEL: @rem_scalar_u32(
1796 // CHECK-NEXT: entry:
1797 // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
1798 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
1799 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
1800 // CHECK-NEXT: [[REM:%.*]] = urem <8 x i32> [[A]], [[SPLAT_SPLAT]]
1801 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
1802 // CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
1804 fixed_uint32m1_t rem_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
1805 return a % b;
1808 // CHECK-LABEL: @rem_scalar_u64(
1809 // CHECK-NEXT: entry:
1810 // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
1811 // CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
1812 // CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1813 // CHECK-NEXT: [[REM:%.*]] = urem <4 x i64> [[A]], [[SPLAT_SPLAT]]
1814 // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
1815 // CHECK-NEXT: ret <vscale x 1 x i64> [[CAST_SCALABLE]]
1817 fixed_uint64m1_t rem_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
1818 return a % b;