1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
6 target triple = "aarch64-unknown-linux-gnu"
8 define void @uabd_v16i8_v16i16(ptr %a, ptr %b) {
9 ; CHECK-LABEL: uabd_v16i8_v16i16:
11 ; CHECK-NEXT: ptrue p0.b, vl16
12 ; CHECK-NEXT: ldr q0, [x0]
13 ; CHECK-NEXT: ldr q1, [x1]
14 ; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b
15 ; CHECK-NEXT: str q0, [x0]
18 ; NONEON-NOSVE-LABEL: uabd_v16i8_v16i16:
19 ; NONEON-NOSVE: // %bb.0:
20 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
21 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
22 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
23 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
24 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
25 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
26 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
27 ; NONEON-NOSVE-NEXT: csetm w9, hi
28 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
29 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
30 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
31 ; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
32 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
33 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
34 ; NONEON-NOSVE-NEXT: csetm w9, hi
35 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
36 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
37 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
38 ; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
39 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
40 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
41 ; NONEON-NOSVE-NEXT: csetm w9, hi
42 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
43 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
44 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
45 ; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
46 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
47 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
48 ; NONEON-NOSVE-NEXT: csetm w9, hi
49 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
50 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
51 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
52 ; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
53 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
54 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
55 ; NONEON-NOSVE-NEXT: csetm w9, hi
56 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
57 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
58 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
59 ; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
60 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
61 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
62 ; NONEON-NOSVE-NEXT: csetm w9, hi
63 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
64 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
65 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
66 ; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
67 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
68 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
69 ; NONEON-NOSVE-NEXT: csetm w9, hi
70 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
71 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
72 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
73 ; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
74 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
75 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
76 ; NONEON-NOSVE-NEXT: csetm w9, hi
77 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
78 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
79 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
80 ; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
81 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
82 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
83 ; NONEON-NOSVE-NEXT: csetm w9, hi
84 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
85 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
86 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
87 ; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
88 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
89 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
90 ; NONEON-NOSVE-NEXT: csetm w9, hi
91 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
92 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
93 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
94 ; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
95 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
96 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
97 ; NONEON-NOSVE-NEXT: csetm w9, hi
98 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
99 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
100 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
101 ; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
102 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
103 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
104 ; NONEON-NOSVE-NEXT: csetm w9, hi
105 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
106 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
107 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
108 ; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
109 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
110 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
111 ; NONEON-NOSVE-NEXT: csetm w9, hi
112 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
113 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
114 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
115 ; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
116 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
117 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
118 ; NONEON-NOSVE-NEXT: csetm w9, hi
119 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
120 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
121 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
122 ; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
123 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
124 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
125 ; NONEON-NOSVE-NEXT: csetm w9, hi
126 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
127 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
128 ; NONEON-NOSVE-NEXT: ldrb w9, [sp]
129 ; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
130 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
131 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
132 ; NONEON-NOSVE-NEXT: csetm w9, hi
133 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
134 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
135 ; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
136 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
137 ; NONEON-NOSVE-NEXT: str q0, [x0]
138 ; NONEON-NOSVE-NEXT: add sp, sp, #48
139 ; NONEON-NOSVE-NEXT: ret
140 %a.ld = load <16 x i8>, ptr %a
141 %b.ld = load <16 x i8>, ptr %b
142 %a.sext = zext <16 x i8> %a.ld to <16 x i16>
143 %b.sext = zext <16 x i8> %b.ld to <16 x i16>
144 %sub = sub <16 x i16> %a.sext, %b.sext
145 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
146 %trunc = trunc <16 x i16> %abs to <16 x i8>
147 store <16 x i8> %trunc, ptr %a
151 define void @sabd_v16i8_v16i16(ptr %a, ptr %b) {
152 ; CHECK-LABEL: sabd_v16i8_v16i16:
154 ; CHECK-NEXT: ptrue p0.b, vl16
155 ; CHECK-NEXT: ldr q0, [x0]
156 ; CHECK-NEXT: ldr q1, [x1]
157 ; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b
158 ; CHECK-NEXT: str q0, [x0]
161 ; NONEON-NOSVE-LABEL: sabd_v16i8_v16i16:
162 ; NONEON-NOSVE: // %bb.0:
163 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
164 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
165 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
166 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
167 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
168 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
169 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
170 ; NONEON-NOSVE-NEXT: csetm w9, gt
171 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
172 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
173 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14]
174 ; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
175 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
176 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
177 ; NONEON-NOSVE-NEXT: csetm w9, gt
178 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
179 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
180 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
181 ; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
182 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
183 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
184 ; NONEON-NOSVE-NEXT: csetm w9, gt
185 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
186 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
187 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12]
188 ; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
189 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
190 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
191 ; NONEON-NOSVE-NEXT: csetm w9, gt
192 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
193 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
194 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
195 ; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
196 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
197 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
198 ; NONEON-NOSVE-NEXT: csetm w9, gt
199 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
200 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
201 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10]
202 ; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
203 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
204 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
205 ; NONEON-NOSVE-NEXT: csetm w9, gt
206 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
207 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
208 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
209 ; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
210 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
211 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
212 ; NONEON-NOSVE-NEXT: csetm w9, gt
213 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
214 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
215 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8]
216 ; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
217 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
218 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
219 ; NONEON-NOSVE-NEXT: csetm w9, gt
220 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
221 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
222 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7]
223 ; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
224 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
225 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
226 ; NONEON-NOSVE-NEXT: csetm w9, gt
227 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
228 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
229 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6]
230 ; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
231 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
232 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
233 ; NONEON-NOSVE-NEXT: csetm w9, gt
234 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
235 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
236 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5]
237 ; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
238 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
239 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
240 ; NONEON-NOSVE-NEXT: csetm w9, gt
241 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
242 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
243 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4]
244 ; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
245 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
246 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
247 ; NONEON-NOSVE-NEXT: csetm w9, gt
248 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
249 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
250 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3]
251 ; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
252 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
253 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
254 ; NONEON-NOSVE-NEXT: csetm w9, gt
255 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
256 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
257 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2]
258 ; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
259 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
260 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
261 ; NONEON-NOSVE-NEXT: csetm w9, gt
262 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
263 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
264 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1]
265 ; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
266 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
267 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
268 ; NONEON-NOSVE-NEXT: csetm w9, gt
269 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
270 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
271 ; NONEON-NOSVE-NEXT: ldrsb w9, [sp]
272 ; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
273 ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
274 ; NONEON-NOSVE-NEXT: subs w8, w9, w8
275 ; NONEON-NOSVE-NEXT: csetm w9, gt
276 ; NONEON-NOSVE-NEXT: eor w8, w8, w9
277 ; NONEON-NOSVE-NEXT: sub w8, w9, w8
278 ; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
279 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
280 ; NONEON-NOSVE-NEXT: str q0, [x0]
281 ; NONEON-NOSVE-NEXT: add sp, sp, #48
282 ; NONEON-NOSVE-NEXT: ret
283 %a.ld = load <16 x i8>, ptr %a
284 %b.ld = load <16 x i8>, ptr %b
285 %a.sext = sext <16 x i8> %a.ld to <16 x i16>
286 %b.sext = sext <16 x i8> %b.ld to <16 x i16>
287 %sub = sub <16 x i16> %a.sext, %b.sext
288 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
289 %trunc = trunc <16 x i16> %abs to <16 x i8>
290 store <16 x i8> %trunc, ptr %a