1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
4 define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
7 ; CHECK-NEXT: sqadd s0, s0, s1
8 ; CHECK-NEXT: fmov w0, s0
10 %vecext = extractelement <4 x i32> %b, i32 0
11 %vecext1 = extractelement <4 x i32> %c, i32 0
12 %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
16 define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
19 ; CHECK-NEXT: sqadd d0, d0, d1
20 ; CHECK-NEXT: fmov x0, d0
22 %vecext = extractelement <2 x i64> %b, i32 0
23 %vecext1 = extractelement <2 x i64> %c, i32 0
24 %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
28 define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
29 ; CHECK-LABEL: uqadds:
31 ; CHECK-NEXT: uqadd s0, s0, s1
32 ; CHECK-NEXT: fmov w0, s0
34 %vecext = extractelement <4 x i32> %b, i32 0
35 %vecext1 = extractelement <4 x i32> %c, i32 0
36 %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
40 define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
41 ; CHECK-LABEL: uqaddd:
43 ; CHECK-NEXT: uqadd d0, d0, d1
44 ; CHECK-NEXT: fmov x0, d0
46 %vecext = extractelement <2 x i64> %b, i32 0
47 %vecext1 = extractelement <2 x i64> %c, i32 0
48 %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
52 declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone
53 declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone
54 declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone
55 declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone
57 define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
60 ; CHECK-NEXT: sqsub s0, s0, s1
61 ; CHECK-NEXT: fmov w0, s0
63 %vecext = extractelement <4 x i32> %b, i32 0
64 %vecext1 = extractelement <4 x i32> %c, i32 0
65 %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
69 define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
72 ; CHECK-NEXT: sqsub d0, d0, d1
73 ; CHECK-NEXT: fmov x0, d0
75 %vecext = extractelement <2 x i64> %b, i32 0
76 %vecext1 = extractelement <2 x i64> %c, i32 0
77 %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
81 define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
82 ; CHECK-LABEL: uqsubs:
84 ; CHECK-NEXT: uqsub s0, s0, s1
85 ; CHECK-NEXT: fmov w0, s0
87 %vecext = extractelement <4 x i32> %b, i32 0
88 %vecext1 = extractelement <4 x i32> %c, i32 0
89 %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
93 define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
94 ; CHECK-LABEL: uqsubd:
96 ; CHECK-NEXT: uqsub d0, d0, d1
97 ; CHECK-NEXT: fmov x0, d0
99 %vecext = extractelement <2 x i64> %b, i32 0
100 %vecext1 = extractelement <2 x i64> %c, i32 0
101 %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
105 declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone
106 declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone
107 declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone
108 declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone
110 define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
111 ; CHECK-LABEL: qabss:
113 ; CHECK-NEXT: sqabs s0, s0
114 ; CHECK-NEXT: fmov w0, s0
116 %vecext = extractelement <4 x i32> %b, i32 0
117 %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind
121 define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
122 ; CHECK-LABEL: qabsd:
124 ; CHECK-NEXT: sqabs d0, d0
125 ; CHECK-NEXT: fmov x0, d0
127 %vecext = extractelement <2 x i64> %b, i32 0
128 %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind
132 define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
133 ; CHECK-LABEL: qnegs:
135 ; CHECK-NEXT: sqneg s0, s0
136 ; CHECK-NEXT: fmov w0, s0
138 %vecext = extractelement <4 x i32> %b, i32 0
139 %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind
143 define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
144 ; CHECK-LABEL: qnegd:
146 ; CHECK-NEXT: sqneg d0, d0
147 ; CHECK-NEXT: fmov x0, d0
149 %vecext = extractelement <2 x i64> %b, i32 0
150 %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind
154 declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone
155 declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone
156 declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone
157 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone
160 define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
161 ; CHECK-LABEL: vqmovund:
163 ; CHECK-NEXT: sqxtun s0, d0
164 ; CHECK-NEXT: fmov w0, s0
166 %vecext = extractelement <2 x i64> %b, i32 0
167 %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
171 define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
172 ; CHECK-LABEL: vqmovnd_s:
174 ; CHECK-NEXT: sqxtn s0, d0
175 ; CHECK-NEXT: fmov w0, s0
177 %vecext = extractelement <2 x i64> %b, i32 0
178 %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
182 define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
183 ; CHECK-LABEL: vqmovnd_u:
185 ; CHECK-NEXT: uqxtn s0, d0
186 ; CHECK-NEXT: fmov w0, s0
188 %vecext = extractelement <2 x i64> %b, i32 0
189 %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
193 define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) {
194 ; CHECK-LABEL: uqxtn_ext:
195 ; CHECK: // %bb.0: // %entry
196 ; CHECK-NEXT: mov v0.d[0], v3.d[1]
197 ; CHECK-NEXT: uqxtn s0, d0
198 ; CHECK-NEXT: fmov w0, s0
201 %e1 = extractelement <2 x i64> %e, i64 1
202 %r = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %e1)
206 define <4 x i32> @sqxtn_ins(<4 x i32> noundef %a, i64 %c) {
207 ; CHECK-LABEL: sqxtn_ins:
208 ; CHECK: // %bb.0: // %entry
209 ; CHECK-NEXT: fmov d1, x0
210 ; CHECK-NEXT: sqxtn s1, d1
211 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
214 %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %c)
215 %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3
216 ret <4 x i32> %vecins
219 define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) {
220 ; CHECK-LABEL: sqxtun_insext:
221 ; CHECK: // %bb.0: // %entry
222 ; CHECK-NEXT: mov v1.d[0], v1.d[1]
223 ; CHECK-NEXT: sqxtun s1, d1
224 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
227 %c = extractelement <2 x i64> %e, i64 1
228 %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %c)
229 %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3
230 ret <4 x i32> %vecins
233 define <4 x i32> @saddluse(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) {
234 ; CHECK-LABEL: saddluse:
235 ; CHECK: // %bb.0: // %entry
236 ; CHECK-NEXT: saddlv d1, v1.4s
237 ; CHECK-NEXT: sqxtn s1, d1
238 ; CHECK-NEXT: mov v0.s[1], v1.s[0]
241 %vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %b)
242 %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vaddlvq_s32.i)
243 %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 1
244 ret <4 x i32> %vecins
247 declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
248 declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
249 declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone
250 declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)