1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=armv7a-eabi -mattr=+neon -float-abi=hard %s -o - | FileCheck %s
4 define <8 x i8> @vsubi8(<8 x i8> %A, <8 x i8> %B) {
7 ; CHECK-NEXT: vsub.i8 d0, d0, d1
9 %tmp3 = sub <8 x i8> %A, %B
13 define <4 x i16> @vsubi16(<4 x i16> %A, <4 x i16> %B) {
14 ; CHECK-LABEL: vsubi16:
16 ; CHECK-NEXT: vsub.i16 d0, d0, d1
18 %tmp3 = sub <4 x i16> %A, %B
22 define <2 x i32> @vsubi32(<2 x i32> %A, <2 x i32> %B) {
23 ; CHECK-LABEL: vsubi32:
25 ; CHECK-NEXT: vsub.i32 d0, d0, d1
27 %tmp3 = sub <2 x i32> %A, %B
31 define <1 x i64> @vsubi64(<1 x i64> %A, <1 x i64> %B) {
32 ; CHECK-LABEL: vsubi64:
34 ; CHECK-NEXT: vsub.i64 d0, d0, d1
36 %tmp3 = sub <1 x i64> %A, %B
40 define <2 x float> @vsubf32(<2 x float> %A, <2 x float> %B) {
41 ; CHECK-LABEL: vsubf32:
43 ; CHECK-NEXT: vsub.f32 d0, d0, d1
45 %tmp3 = fsub <2 x float> %A, %B
49 define <16 x i8> @vsubQi8(<16 x i8> %A, <16 x i8> %B) {
50 ; CHECK-LABEL: vsubQi8:
52 ; CHECK-NEXT: vsub.i8 q0, q0, q1
54 %tmp3 = sub <16 x i8> %A, %B
58 define <8 x i16> @vsubQi16(<8 x i16> %A, <8 x i16> %B) {
59 ; CHECK-LABEL: vsubQi16:
61 ; CHECK-NEXT: vsub.i16 q0, q0, q1
63 %tmp3 = sub <8 x i16> %A, %B
67 define <4 x i32> @vsubQi32(<4 x i32> %A, <4 x i32> %B) {
68 ; CHECK-LABEL: vsubQi32:
70 ; CHECK-NEXT: vsub.i32 q0, q0, q1
72 %tmp3 = sub <4 x i32> %A, %B
76 define <2 x i64> @vsubQi64(<2 x i64> %A, <2 x i64> %B) {
77 ; CHECK-LABEL: vsubQi64:
79 ; CHECK-NEXT: vsub.i64 q0, q0, q1
81 %tmp3 = sub <2 x i64> %A, %B
85 define <4 x float> @vsubQf32(<4 x float> %A, <4 x float> %B) {
86 ; CHECK-LABEL: vsubQf32:
88 ; CHECK-NEXT: vsub.f32 q0, q0, q1
90 %tmp3 = fsub <4 x float> %A, %B
94 define <8 x i8> @vrsubhni16(<8 x i16> %A, <8 x i16> %B) {
95 ; CHECK-LABEL: vrsubhni16:
97 ; CHECK-NEXT: vrsubhn.i16 d0, q0, q1
99 %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %A, <8 x i16> %B)
103 define <4 x i16> @vrsubhni32(<4 x i32> %A, <4 x i32> %B) {
104 ; CHECK-LABEL: vrsubhni32:
106 ; CHECK-NEXT: vrsubhn.i32 d0, q0, q1
108 %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %A, <4 x i32> %B)
112 define <2 x i32> @vrsubhni64(<2 x i64> %A, <2 x i64> %B) {
113 ; CHECK-LABEL: vrsubhni64:
115 ; CHECK-NEXT: vrsubhn.i64 d0, q0, q1
117 %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %A, <2 x i64> %B)
121 declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) readnone
122 declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) readnone
123 declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) readnone
125 define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) {
126 ; CHECK-LABEL: vsubhni16_natural:
128 ; CHECK-NEXT: vsubhn.i16 d0, q0, q1
130 %sum = sub <8 x i16> %A, %B
131 %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
132 %trunc = trunc <8 x i16> %shift to <8 x i8>
136 define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) {
137 ; CHECK-LABEL: vsubhni32_natural:
139 ; CHECK-NEXT: vsubhn.i32 d0, q0, q1
141 %sum = sub <4 x i32> %A, %B
142 %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
143 %trunc = trunc <4 x i32> %shift to <4 x i16>
147 define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) {
148 ; CHECK-LABEL: vsubhni64_natural:
150 ; CHECK-NEXT: vsubhn.i64 d0, q0, q1
152 %sum = sub <2 x i64> %A, %B
153 %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
154 %trunc = trunc <2 x i64> %shift to <2 x i32>
158 define <8 x i16> @vsubls8(<8 x i8> %A, <8 x i8> %B) {
159 ; CHECK-LABEL: vsubls8:
161 ; CHECK-NEXT: vsubl.s8 q0, d0, d1
163 %tmp3 = sext <8 x i8> %A to <8 x i16>
164 %tmp4 = sext <8 x i8> %B to <8 x i16>
165 %tmp5 = sub <8 x i16> %tmp3, %tmp4
169 define <4 x i32> @vsubls16(<4 x i16> %A, <4 x i16> %B) {
170 ; CHECK-LABEL: vsubls16:
172 ; CHECK-NEXT: vsubl.s16 q0, d0, d1
174 %tmp3 = sext <4 x i16> %A to <4 x i32>
175 %tmp4 = sext <4 x i16> %B to <4 x i32>
176 %tmp5 = sub <4 x i32> %tmp3, %tmp4
180 define <2 x i64> @vsubls32(<2 x i32> %A, <2 x i32> %B) {
181 ; CHECK-LABEL: vsubls32:
183 ; CHECK-NEXT: vsubl.s32 q0, d0, d1
185 %tmp3 = sext <2 x i32> %A to <2 x i64>
186 %tmp4 = sext <2 x i32> %B to <2 x i64>
187 %tmp5 = sub <2 x i64> %tmp3, %tmp4
191 define <8 x i16> @vsublu8(<8 x i8> %A, <8 x i8> %B) {
192 ; CHECK-LABEL: vsublu8:
194 ; CHECK-NEXT: vsubl.u8 q0, d0, d1
196 %tmp3 = zext <8 x i8> %A to <8 x i16>
197 %tmp4 = zext <8 x i8> %B to <8 x i16>
198 %tmp5 = sub <8 x i16> %tmp3, %tmp4
202 define <4 x i32> @vsublu16(<4 x i16> %A, <4 x i16> %B) {
203 ; CHECK-LABEL: vsublu16:
205 ; CHECK-NEXT: vsubl.u16 q0, d0, d1
207 %tmp3 = zext <4 x i16> %A to <4 x i32>
208 %tmp4 = zext <4 x i16> %B to <4 x i32>
209 %tmp5 = sub <4 x i32> %tmp3, %tmp4
213 define <2 x i64> @vsublu32(<2 x i32> %A, <2 x i32> %B) {
214 ; CHECK-LABEL: vsublu32:
216 ; CHECK-NEXT: vsubl.u32 q0, d0, d1
218 %tmp3 = zext <2 x i32> %A to <2 x i64>
219 %tmp4 = zext <2 x i32> %B to <2 x i64>
220 %tmp5 = sub <2 x i64> %tmp3, %tmp4
224 define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) {
225 ; CHECK-LABEL: vsubla8:
227 ; CHECK-NEXT: vsubl.u8 q0, d0, d1
228 ; CHECK-NEXT: vbic.i16 q0, #0xff00
230 %tmp3 = zext <8 x i8> %A to <8 x i16>
231 %tmp4 = zext <8 x i8> %B to <8 x i16>
232 %tmp5 = sub <8 x i16> %tmp3, %tmp4
233 %and = and <8 x i16> %tmp5, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
237 define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) {
238 ; CHECK-LABEL: vsubla16:
240 ; CHECK-NEXT: vmov.i32 q8, #0xffff
241 ; CHECK-NEXT: vsubl.u16 q9, d0, d1
242 ; CHECK-NEXT: vand q0, q9, q8
244 %tmp3 = zext <4 x i16> %A to <4 x i32>
245 %tmp4 = zext <4 x i16> %B to <4 x i32>
246 %tmp5 = sub <4 x i32> %tmp3, %tmp4
247 %and = and <4 x i32> %tmp5, <i32 65535, i32 65535, i32 65535, i32 65535>
251 define <2 x i64> @vsubla32(<2 x i32> %A, <2 x i32> %B) {
252 ; CHECK-LABEL: vsubla32:
254 ; CHECK-NEXT: vmov.i64 q8, #0xffffffff
255 ; CHECK-NEXT: vsubl.u32 q9, d0, d1
256 ; CHECK-NEXT: vand q0, q9, q8
258 %tmp3 = zext <2 x i32> %A to <2 x i64>
259 %tmp4 = zext <2 x i32> %B to <2 x i64>
260 %tmp5 = sub <2 x i64> %tmp3, %tmp4
261 %and = and <2 x i64> %tmp5, <i64 4294967295, i64 4294967295>
265 define <8 x i16> @vsubws8(<8 x i16> %A, <8 x i8> %B) {
266 ; CHECK-LABEL: vsubws8:
268 ; CHECK-NEXT: vsubw.s8 q0, q0, d2
270 %tmp3 = sext <8 x i8> %B to <8 x i16>
271 %tmp4 = sub <8 x i16> %A, %tmp3
275 define <4 x i32> @vsubws16(<4 x i32> %A, <4 x i16> %B) {
276 ; CHECK-LABEL: vsubws16:
278 ; CHECK-NEXT: vsubw.s16 q0, q0, d2
280 %tmp3 = sext <4 x i16> %B to <4 x i32>
281 %tmp4 = sub <4 x i32> %A, %tmp3
285 define <2 x i64> @vsubws32(<2 x i64> %A, <2 x i32> %B) {
286 ; CHECK-LABEL: vsubws32:
288 ; CHECK-NEXT: vsubw.s32 q0, q0, d2
290 %tmp3 = sext <2 x i32> %B to <2 x i64>
291 %tmp4 = sub <2 x i64> %A, %tmp3
295 define <8 x i16> @vsubwu8(<8 x i16> %A, <8 x i8> %B) {
296 ; CHECK-LABEL: vsubwu8:
298 ; CHECK-NEXT: vsubw.u8 q0, q0, d2
300 %tmp3 = zext <8 x i8> %B to <8 x i16>
301 %tmp4 = sub <8 x i16> %A, %tmp3
305 define <4 x i32> @vsubwu16(<4 x i32> %A, <4 x i16> %B) {
306 ; CHECK-LABEL: vsubwu16:
308 ; CHECK-NEXT: vsubw.u16 q0, q0, d2
310 %tmp3 = zext <4 x i16> %B to <4 x i32>
311 %tmp4 = sub <4 x i32> %A, %tmp3
315 define <2 x i64> @vsubwu32(<2 x i64> %A, <2 x i32> %B) {
316 ; CHECK-LABEL: vsubwu32:
318 ; CHECK-NEXT: vsubw.u32 q0, q0, d2
320 %tmp3 = zext <2 x i32> %B to <2 x i64>
321 %tmp4 = sub <2 x i64> %A, %tmp3
325 define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) {
326 ; CHECK-LABEL: vsubwa8:
328 ; CHECK-NEXT: vsubw.u8 q0, q0, d2
329 ; CHECK-NEXT: vbic.i16 q0, #0xff00
331 %tmp3 = zext <8 x i8> %B to <8 x i16>
332 %tmp4 = sub <8 x i16> %A, %tmp3
333 %and = and <8 x i16> %tmp4, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
337 define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) {
338 ; CHECK-LABEL: vsubwa16:
340 ; CHECK-NEXT: vmov.i32 q8, #0xffff
341 ; CHECK-NEXT: vsubw.u16 q9, q0, d2
342 ; CHECK-NEXT: vand q0, q9, q8
344 %tmp3 = zext <4 x i16> %B to <4 x i32>
345 %tmp4 = sub <4 x i32> %A, %tmp3
346 %and = and <4 x i32> %tmp4, <i32 65535, i32 65535, i32 65535, i32 65535>
350 define <2 x i64> @vsubwa32(<2 x i64> %A, <2 x i32> %B) {
351 ; CHECK-LABEL: vsubwa32:
353 ; CHECK-NEXT: vmov.i64 q8, #0xffffffff
354 ; CHECK-NEXT: vsubw.u32 q9, q0, d2
355 ; CHECK-NEXT: vand q0, q9, q8
357 %tmp3 = zext <2 x i32> %B to <2 x i64>
358 %tmp4 = sub <2 x i64> %A, %tmp3
359 %and = and <2 x i64> %tmp4, <i64 4294967295, i64 4294967295>