1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vhcaddq_rot90_s8(
11 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 0, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
12 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
14 int8x16_t
test_vhcaddq_rot90_s8(int8x16_t a
, int8x16_t b
)
17 return vhcaddq_rot90(a
, b
);
19 return vhcaddq_rot90_s8(a
, b
);
23 // CHECK-LABEL: @test_vhcaddq_rot90_s16(
25 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
26 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
28 int16x8_t
test_vhcaddq_rot90_s16(int16x8_t a
, int16x8_t b
)
31 return vhcaddq_rot90(a
, b
);
33 return vhcaddq_rot90_s16(a
, b
);
37 // CHECK-LABEL: @test_vhcaddq_rot90_s32(
39 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
40 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
42 int32x4_t
test_vhcaddq_rot90_s32(int32x4_t a
, int32x4_t b
)
45 return vhcaddq_rot90(a
, b
);
47 return vhcaddq_rot90_s32(a
, b
);
51 // CHECK-LABEL: @test_vhcaddq_rot270_s8(
53 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 1, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
54 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
56 int8x16_t
test_vhcaddq_rot270_s8(int8x16_t a
, int8x16_t b
)
59 return vhcaddq_rot270(a
, b
);
61 return vhcaddq_rot270_s8(a
, b
);
65 // CHECK-LABEL: @test_vhcaddq_rot270_s16(
67 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 1, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
68 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
70 int16x8_t
test_vhcaddq_rot270_s16(int16x8_t a
, int16x8_t b
)
73 return vhcaddq_rot270(a
, b
);
75 return vhcaddq_rot270_s16(a
, b
);
79 // CHECK-LABEL: @test_vhcaddq_rot270_s32(
81 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
82 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
84 int32x4_t
test_vhcaddq_rot270_s32(int32x4_t a
, int32x4_t b
)
87 return vhcaddq_rot270(a
, b
);
89 return vhcaddq_rot270_s32(a
, b
);
93 // CHECK-LABEL: @test_vhcaddq_rot90_x_s8(
95 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
96 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
97 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> undef, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
98 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
100 int8x16_t
test_vhcaddq_rot90_x_s8(int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
103 return vhcaddq_rot90_x(a
, b
, p
);
105 return vhcaddq_rot90_x_s8(a
, b
, p
);
109 // CHECK-LABEL: @test_vhcaddq_rot90_x_s16(
110 // CHECK-NEXT: entry:
111 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
112 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
113 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> undef, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
114 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
116 int16x8_t
test_vhcaddq_rot90_x_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
119 return vhcaddq_rot90_x(a
, b
, p
);
121 return vhcaddq_rot90_x_s16(a
, b
, p
);
125 // CHECK-LABEL: @test_vhcaddq_rot90_x_s32(
126 // CHECK-NEXT: entry:
127 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
128 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
129 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> undef, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
130 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
132 int32x4_t
test_vhcaddq_rot90_x_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
135 return vhcaddq_rot90_x(a
, b
, p
);
137 return vhcaddq_rot90_x_s32(a
, b
, p
);
141 // CHECK-LABEL: @test_vhcaddq_rot270_x_s8(
142 // CHECK-NEXT: entry:
143 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
144 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
145 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> undef, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
146 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
148 int8x16_t
test_vhcaddq_rot270_x_s8(int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
151 return vhcaddq_rot270_x(a
, b
, p
);
153 return vhcaddq_rot270_x_s8(a
, b
, p
);
157 // CHECK-LABEL: @test_vhcaddq_rot270_x_s16(
158 // CHECK-NEXT: entry:
159 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
160 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
161 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> undef, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
162 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
164 int16x8_t
test_vhcaddq_rot270_x_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
167 return vhcaddq_rot270_x(a
, b
, p
);
169 return vhcaddq_rot270_x_s16(a
, b
, p
);
173 // CHECK-LABEL: @test_vhcaddq_rot270_x_s32(
174 // CHECK-NEXT: entry:
175 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
176 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
177 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> undef, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
178 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
180 int32x4_t
test_vhcaddq_rot270_x_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
183 return vhcaddq_rot270_x(a
, b
, p
);
185 return vhcaddq_rot270_x_s32(a
, b
, p
);
189 // CHECK-LABEL: @test_vhcaddq_rot90_m_s8(
190 // CHECK-NEXT: entry:
191 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
192 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
193 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
194 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
196 int8x16_t
test_vhcaddq_rot90_m_s8(int8x16_t inactive
, int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
199 return vhcaddq_rot90_m(inactive
, a
, b
, p
);
201 return vhcaddq_rot90_m_s8(inactive
, a
, b
, p
);
205 // CHECK-LABEL: @test_vhcaddq_rot90_m_s16(
206 // CHECK-NEXT: entry:
207 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
208 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
209 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
210 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
212 int16x8_t
test_vhcaddq_rot90_m_s16(int16x8_t inactive
, int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
215 return vhcaddq_rot90_m(inactive
, a
, b
, p
);
217 return vhcaddq_rot90_m_s16(inactive
, a
, b
, p
);
221 // CHECK-LABEL: @test_vhcaddq_rot90_m_s32(
222 // CHECK-NEXT: entry:
223 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
224 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
225 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
226 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
228 int32x4_t
test_vhcaddq_rot90_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
231 return vhcaddq_rot90_m(inactive
, a
, b
, p
);
233 return vhcaddq_rot90_m_s32(inactive
, a
, b
, p
);
237 // CHECK-LABEL: @test_vhcaddq_rot270_m_s8(
238 // CHECK-NEXT: entry:
239 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
240 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
241 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
242 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
244 int8x16_t
test_vhcaddq_rot270_m_s8(int8x16_t inactive
, int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
247 return vhcaddq_rot270_m(inactive
, a
, b
, p
);
249 return vhcaddq_rot270_m_s8(inactive
, a
, b
, p
);
253 // CHECK-LABEL: @test_vhcaddq_rot270_m_s16(
254 // CHECK-NEXT: entry:
255 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
256 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
257 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
258 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
260 int16x8_t
test_vhcaddq_rot270_m_s16(int16x8_t inactive
, int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
263 return vhcaddq_rot270_m(inactive
, a
, b
, p
);
265 return vhcaddq_rot270_m_s16(inactive
, a
, b
, p
);
269 // CHECK-LABEL: @test_vhcaddq_rot270_m_s32(
270 // CHECK-NEXT: entry:
271 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
272 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
273 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
274 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
276 int32x4_t
test_vhcaddq_rot270_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
279 return vhcaddq_rot270_m(inactive
, a
, b
, p
);
281 return vhcaddq_rot270_m_s32(inactive
, a
, b
, p
);