1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=armv7a-eabi -mattr=+neon -float-abi=hard %s -o - | FileCheck %s
4 define <8 x i8> @vaddi8(<8 x i8> %A, <8 x i8> %B) {
7 ; CHECK-NEXT: vadd.i8 d0, d0, d1
9 %tmp3 = add <8 x i8> %A, %B
13 define <4 x i16> @vaddi16(<4 x i16> %A, <4 x i16> %B) {
14 ; CHECK-LABEL: vaddi16:
16 ; CHECK-NEXT: vadd.i16 d0, d0, d1
18 %tmp3 = add <4 x i16> %A, %B
22 define <2 x i32> @vaddi32(<2 x i32> %A, <2 x i32> %B) {
23 ; CHECK-LABEL: vaddi32:
25 ; CHECK-NEXT: vadd.i32 d0, d0, d1
27 %tmp3 = add <2 x i32> %A, %B
31 define <1 x i64> @vaddi64(<1 x i64> %A, <1 x i64> %B) {
32 ; CHECK-LABEL: vaddi64:
34 ; CHECK-NEXT: vadd.i64 d0, d0, d1
36 %tmp3 = add <1 x i64> %A, %B
40 define <2 x float> @vaddf32(<2 x float> %A, <2 x float> %B) {
41 ; CHECK-LABEL: vaddf32:
43 ; CHECK-NEXT: vadd.f32 d0, d0, d1
45 %tmp3 = fadd <2 x float> %A, %B
49 define <16 x i8> @vaddQi8(<16 x i8> %A, <16 x i8> %B) {
50 ; CHECK-LABEL: vaddQi8:
52 ; CHECK-NEXT: vadd.i8 q0, q0, q1
54 %tmp3 = add <16 x i8> %A, %B
58 define <8 x i16> @vaddQi16(<8 x i16> %A, <8 x i16> %B) {
59 ; CHECK-LABEL: vaddQi16:
61 ; CHECK-NEXT: vadd.i16 q0, q0, q1
63 %tmp3 = add <8 x i16> %A, %B
67 define <4 x i32> @vaddQi32(<4 x i32> %A, <4 x i32> %B) {
68 ; CHECK-LABEL: vaddQi32:
70 ; CHECK-NEXT: vadd.i32 q0, q0, q1
72 %tmp3 = add <4 x i32> %A, %B
76 define <2 x i64> @vaddQi64(<2 x i64> %A, <2 x i64> %B) {
77 ; CHECK-LABEL: vaddQi64:
79 ; CHECK-NEXT: vadd.i64 q0, q0, q1
81 %tmp3 = add <2 x i64> %A, %B
85 define <4 x float> @vaddQf32(<4 x float> %A, <4 x float> %B) {
86 ; CHECK-LABEL: vaddQf32:
88 ; CHECK-NEXT: vadd.f32 q0, q0, q1
90 %tmp3 = fadd <4 x float> %A, %B
94 define <8 x i8> @vraddhni16(<8 x i16> %A, <8 x i16> %B) {
95 ; CHECK-LABEL: vraddhni16:
97 ; CHECK-NEXT: vraddhn.i16 d0, q0, q1
99 %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %A, <8 x i16> %B)
103 define <4 x i16> @vraddhni32(<4 x i32> %A, <4 x i32> %B) {
104 ; CHECK-LABEL: vraddhni32:
106 ; CHECK-NEXT: vraddhn.i32 d0, q0, q1
108 %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %A, <4 x i32> %B)
112 define <2 x i32> @vraddhni64(<2 x i64> %A, <2 x i64> %B) {
113 ; CHECK-LABEL: vraddhni64:
115 ; CHECK-NEXT: vraddhn.i64 d0, q0, q1
117 %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %A, <2 x i64> %B)
121 declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) readnone
122 declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) readnone
123 declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) readnone
125 define <8 x i8> @vaddhni16_natural(<8 x i16> %A, <8 x i16> %B) {
126 ; CHECK-LABEL: vaddhni16_natural:
128 ; CHECK-NEXT: vaddhn.i16 d0, q0, q1
130 %sum = add <8 x i16> %A, %B
131 %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
132 %trunc = trunc <8 x i16> %shift to <8 x i8>
136 define <4 x i16> @vaddhni32_natural(<4 x i32> %A, <4 x i32> %B) {
137 ; CHECK-LABEL: vaddhni32_natural:
139 ; CHECK-NEXT: vaddhn.i32 d0, q0, q1
141 %sum = add <4 x i32> %A, %B
142 %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
143 %trunc = trunc <4 x i32> %shift to <4 x i16>
147 define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) {
148 ; CHECK-LABEL: vaddhni64_natural:
150 ; CHECK-NEXT: vaddhn.i64 d0, q0, q1
152 %sum = add <2 x i64> %A, %B
153 %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
154 %trunc = trunc <2 x i64> %shift to <2 x i32>
158 define <8 x i16> @vaddls8(<8 x i8> %A, <8 x i8> %B) {
159 ; CHECK-LABEL: vaddls8:
161 ; CHECK-NEXT: vaddl.s8 q0, d0, d1
163 %tmp3 = sext <8 x i8> %A to <8 x i16>
164 %tmp4 = sext <8 x i8> %B to <8 x i16>
165 %tmp5 = add <8 x i16> %tmp3, %tmp4
169 define <4 x i32> @vaddls16(<4 x i16> %A, <4 x i16> %B) {
170 ; CHECK-LABEL: vaddls16:
172 ; CHECK-NEXT: vaddl.s16 q0, d0, d1
174 %tmp3 = sext <4 x i16> %A to <4 x i32>
175 %tmp4 = sext <4 x i16> %B to <4 x i32>
176 %tmp5 = add <4 x i32> %tmp3, %tmp4
180 define <2 x i64> @vaddls32(<2 x i32> %A, <2 x i32> %B) {
181 ; CHECK-LABEL: vaddls32:
183 ; CHECK-NEXT: vaddl.s32 q0, d0, d1
185 %tmp3 = sext <2 x i32> %A to <2 x i64>
186 %tmp4 = sext <2 x i32> %B to <2 x i64>
187 %tmp5 = add <2 x i64> %tmp3, %tmp4
191 define <8 x i16> @vaddlu8(<8 x i8> %A, <8 x i8> %B) {
192 ; CHECK-LABEL: vaddlu8:
194 ; CHECK-NEXT: vaddl.u8 q0, d0, d1
196 %tmp3 = zext <8 x i8> %A to <8 x i16>
197 %tmp4 = zext <8 x i8> %B to <8 x i16>
198 %tmp5 = add <8 x i16> %tmp3, %tmp4
202 define <4 x i32> @vaddlu16(<4 x i16> %A, <4 x i16> %B) {
203 ; CHECK-LABEL: vaddlu16:
205 ; CHECK-NEXT: vaddl.u16 q0, d0, d1
207 %tmp3 = zext <4 x i16> %A to <4 x i32>
208 %tmp4 = zext <4 x i16> %B to <4 x i32>
209 %tmp5 = add <4 x i32> %tmp3, %tmp4
213 define <2 x i64> @vaddlu32(<2 x i32> %A, <2 x i32> %B) {
214 ; CHECK-LABEL: vaddlu32:
216 ; CHECK-NEXT: vaddl.u32 q0, d0, d1
218 %tmp3 = zext <2 x i32> %A to <2 x i64>
219 %tmp4 = zext <2 x i32> %B to <2 x i64>
220 %tmp5 = add <2 x i64> %tmp3, %tmp4
224 define <8 x i16> @vaddla8(<8 x i8> %A, <8 x i8> %B) {
225 ; CHECK-LABEL: vaddla8:
227 ; CHECK-NEXT: vaddl.u8 q0, d0, d1
228 ; CHECK-NEXT: vbic.i16 q0, #0xff00
230 %tmp3 = zext <8 x i8> %A to <8 x i16>
231 %tmp4 = zext <8 x i8> %B to <8 x i16>
232 %tmp5 = add <8 x i16> %tmp3, %tmp4
233 %and = and <8 x i16> %tmp5, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
237 define <4 x i32> @vaddla16(<4 x i16> %A, <4 x i16> %B) {
238 ; CHECK-LABEL: vaddla16:
240 ; CHECK-NEXT: vmov.i32 q8, #0xffff
241 ; CHECK-NEXT: vaddl.u16 q9, d0, d1
242 ; CHECK-NEXT: vand q0, q9, q8
244 %tmp3 = zext <4 x i16> %A to <4 x i32>
245 %tmp4 = zext <4 x i16> %B to <4 x i32>
246 %tmp5 = add <4 x i32> %tmp3, %tmp4
247 %and = and <4 x i32> %tmp5, <i32 65535, i32 65535, i32 65535, i32 65535>
251 define <2 x i64> @vaddla32(<2 x i32> %A, <2 x i32> %B) {
252 ; CHECK-LABEL: vaddla32:
254 ; CHECK-NEXT: vmov.i64 q8, #0xffffffff
255 ; CHECK-NEXT: vaddl.u32 q9, d0, d1
256 ; CHECK-NEXT: vand q0, q9, q8
258 %tmp3 = zext <2 x i32> %A to <2 x i64>
259 %tmp4 = zext <2 x i32> %B to <2 x i64>
260 %tmp5 = add <2 x i64> %tmp3, %tmp4
261 %and = and <2 x i64> %tmp5, <i64 4294967295, i64 4294967295>
265 define <8 x i16> @vaddws8(<8 x i16> %A, <8 x i8> %B) {
266 ; CHECK-LABEL: vaddws8:
268 ; CHECK-NEXT: vaddw.s8 q0, q0, d2
270 %tmp3 = sext <8 x i8> %B to <8 x i16>
271 %tmp4 = add <8 x i16> %A, %tmp3
275 define <4 x i32> @vaddws16(<4 x i32> %A, <4 x i16> %B) {
276 ; CHECK-LABEL: vaddws16:
278 ; CHECK-NEXT: vaddw.s16 q0, q0, d2
280 %tmp3 = sext <4 x i16> %B to <4 x i32>
281 %tmp4 = add <4 x i32> %A, %tmp3
285 define <2 x i64> @vaddws32(<2 x i64> %A, <2 x i32> %B) {
286 ; CHECK-LABEL: vaddws32:
288 ; CHECK-NEXT: vaddw.s32 q0, q0, d2
290 %tmp3 = sext <2 x i32> %B to <2 x i64>
291 %tmp4 = add <2 x i64> %A, %tmp3
295 define <8 x i16> @vaddwu8(<8 x i16> %A, <8 x i8> %B) {
296 ; CHECK-LABEL: vaddwu8:
298 ; CHECK-NEXT: vaddw.u8 q0, q0, d2
300 %tmp3 = zext <8 x i8> %B to <8 x i16>
301 %tmp4 = add <8 x i16> %A, %tmp3
305 define <4 x i32> @vaddwu16(<4 x i32> %A, <4 x i16> %B) {
306 ; CHECK-LABEL: vaddwu16:
308 ; CHECK-NEXT: vaddw.u16 q0, q0, d2
310 %tmp3 = zext <4 x i16> %B to <4 x i32>
311 %tmp4 = add <4 x i32> %A, %tmp3
315 define <2 x i64> @vaddwu32(<2 x i64> %A, <2 x i32> %B) {
316 ; CHECK-LABEL: vaddwu32:
318 ; CHECK-NEXT: vaddw.u32 q0, q0, d2
320 %tmp3 = zext <2 x i32> %B to <2 x i64>
321 %tmp4 = add <2 x i64> %A, %tmp3
325 define <8 x i16> @vaddwa8(<8 x i16> %A, <8 x i8> %B) {
326 ; CHECK-LABEL: vaddwa8:
328 ; CHECK-NEXT: vaddw.u8 q0, q0, d2
329 ; CHECK-NEXT: vbic.i16 q0, #0xff00
331 %tmp3 = zext <8 x i8> %B to <8 x i16>
332 %tmp4 = add <8 x i16> %A, %tmp3
333 %and = and <8 x i16> %tmp4, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
337 define <4 x i32> @vaddwa16(<4 x i32> %A, <4 x i16> %B) {
338 ; CHECK-LABEL: vaddwa16:
340 ; CHECK-NEXT: vmov.i32 q8, #0xffff
341 ; CHECK-NEXT: vaddw.u16 q9, q0, d2
342 ; CHECK-NEXT: vand q0, q9, q8
344 %tmp3 = zext <4 x i16> %B to <4 x i32>
345 %tmp4 = add <4 x i32> %A, %tmp3
346 %and = and <4 x i32> %tmp4, <i32 65535, i32 65535, i32 65535, i32 65535>
350 define <2 x i64> @vaddwa32(<2 x i64> %A, <2 x i32> %B) {
351 ; CHECK-LABEL: vaddwa32:
353 ; CHECK-NEXT: vmov.i64 q8, #0xffffffff
354 ; CHECK-NEXT: vaddw.u32 q9, q0, d2
355 ; CHECK-NEXT: vand q0, q9, q8
357 %tmp3 = zext <2 x i32> %B to <2 x i64>
358 %tmp4 = add <2 x i64> %A, %tmp3
359 %and = and <2 x i64> %tmp4, <i64 4294967295, i64 4294967295>