1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
4 define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
5 ; CHECK-LABEL: zext_avgflooru:
7 ; CHECK-NEXT: uhadd v0.16b, v0.16b, v1.16b
8 ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
9 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
11 %x0 = zext <16 x i8> %a0 to <16 x i16>
12 %x1 = zext <16 x i8> %a1 to <16 x i16>
13 %and = and <16 x i16> %x0, %x1
14 %xor = xor <16 x i16> %x0, %x1
15 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
16 %avg = add <16 x i16> %and, %shift
20 define <16 x i16> @zext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
21 ; CHECK-LABEL: zext_avgflooru_mismatch:
23 ; CHECK-NEXT: movi v2.16b, #15
24 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
25 ; CHECK-NEXT: uhadd v0.16b, v0.16b, v1.16b
26 ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
27 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
29 %x0 = zext <16 x i8> %a0 to <16 x i16>
30 %x1 = zext <16 x i4> %a1 to <16 x i16>
31 %and = and <16 x i16> %x0, %x1
32 %xor = xor <16 x i16> %x0, %x1
33 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
34 %avg = add <16 x i16> %and, %shift
38 define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
39 ; CHECK-LABEL: zext_avgceilu:
41 ; CHECK-NEXT: urhadd v0.16b, v0.16b, v1.16b
42 ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
43 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
45 %x0 = zext <16 x i8> %a0 to <16 x i16>
46 %x1 = zext <16 x i8> %a1 to <16 x i16>
47 %or = or <16 x i16> %x0, %x1
48 %xor = xor <16 x i16> %x0, %x1
49 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
50 %avg = sub <16 x i16> %or, %shift
54 define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
55 ; CHECK-LABEL: zext_avgceilu_mismatch:
57 ; CHECK-NEXT: movi v2.16b, #15
58 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
59 ; CHECK-NEXT: urhadd v0.16b, v0.16b, v1.16b
60 ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
61 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
63 %x0 = zext <16 x i4> %a0 to <16 x i16>
64 %x1 = zext <16 x i8> %a1 to <16 x i16>
65 %or = or <16 x i16> %x0, %x1
66 %xor = xor <16 x i16> %x0, %x1
67 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
68 %avg = sub <16 x i16> %or, %shift
72 define <16 x i16> @sext_avgfloors(<16 x i8> %a0, <16 x i8> %a1) {
73 ; CHECK-LABEL: sext_avgfloors:
75 ; CHECK-NEXT: shadd v0.16b, v0.16b, v1.16b
76 ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
77 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
79 %x0 = sext <16 x i8> %a0 to <16 x i16>
80 %x1 = sext <16 x i8> %a1 to <16 x i16>
81 %and = and <16 x i16> %x0, %x1
82 %xor = xor <16 x i16> %x0, %x1
83 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
84 %avg = add <16 x i16> %and, %shift
88 define <16 x i16> @sext_avgfloors_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
89 ; CHECK-LABEL: sext_avgfloors_mismatch:
91 ; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0
92 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0
93 ; CHECK-NEXT: sshll v3.8h, v0.8b, #0
94 ; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0
95 ; CHECK-NEXT: shl v1.8h, v1.8h, #12
96 ; CHECK-NEXT: shl v2.8h, v2.8h, #12
97 ; CHECK-NEXT: sshr v4.8h, v1.8h, #12
98 ; CHECK-NEXT: sshr v1.8h, v2.8h, #12
99 ; CHECK-NEXT: shadd v1.8h, v0.8h, v1.8h
100 ; CHECK-NEXT: shadd v0.8h, v3.8h, v4.8h
102 %x0 = sext <16 x i8> %a0 to <16 x i16>
103 %x1 = sext <16 x i4> %a1 to <16 x i16>
104 %and = and <16 x i16> %x0, %x1
105 %xor = xor <16 x i16> %x0, %x1
106 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
107 %avg = add <16 x i16> %and, %shift
111 define <16 x i16> @sext_avgceils(<16 x i8> %a0, <16 x i8> %a1) {
112 ; CHECK-LABEL: sext_avgceils:
114 ; CHECK-NEXT: srhadd v0.16b, v0.16b, v1.16b
115 ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
116 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
118 %x0 = sext <16 x i8> %a0 to <16 x i16>
119 %x1 = sext <16 x i8> %a1 to <16 x i16>
120 %or = or <16 x i16> %x0, %x1
121 %xor = xor <16 x i16> %x0, %x1
122 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123 %avg = sub <16 x i16> %or, %shift
127 define <16 x i16> @sext_avgceils_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
128 ; CHECK-LABEL: sext_avgceils_mismatch:
130 ; CHECK-NEXT: ushll v2.8h, v0.8b, #0
131 ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
132 ; CHECK-NEXT: sshll v3.8h, v1.8b, #0
133 ; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0
134 ; CHECK-NEXT: shl v2.8h, v2.8h, #12
135 ; CHECK-NEXT: shl v0.8h, v0.8h, #12
136 ; CHECK-NEXT: sshr v2.8h, v2.8h, #12
137 ; CHECK-NEXT: sshr v0.8h, v0.8h, #12
138 ; CHECK-NEXT: srhadd v1.8h, v0.8h, v1.8h
139 ; CHECK-NEXT: srhadd v0.8h, v2.8h, v3.8h
141 %x0 = sext <16 x i4> %a0 to <16 x i16>
142 %x1 = sext <16 x i8> %a1 to <16 x i16>
143 %or = or <16 x i16> %x0, %x1
144 %xor = xor <16 x i16> %x0, %x1
145 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
146 %avg = sub <16 x i16> %or, %shift
150 define <8 x i16> @add_avgflooru(<8 x i16> %a0, <8 x i16> %a1) {
151 ; CHECK-LABEL: add_avgflooru:
153 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
155 %add = add nuw <8 x i16> %a0, %a1
156 %avg = lshr <8 x i16> %add, splat(i16 1)
160 define <8 x i16> @add_avgflooru_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
161 ; CHECK-LABEL: add_avgflooru_mismatch:
163 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
164 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
166 %add = add <8 x i16> %a0, %a1
167 %avg = lshr <8 x i16> %add, splat(i16 1)
171 define <8 x i16> @add_avgceilu(<8 x i16> %a0, <8 x i16> %a1) {
172 ; CHECK-LABEL: add_avgceilu:
174 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
176 %add0 = add nuw <8 x i16> %a0, splat(i16 1)
177 %add = add nuw <8 x i16> %a1, %add0
178 %avg = lshr <8 x i16> %add, splat(i16 1)
182 define <8 x i16> @add_avgceilu2(<8 x i16> %a0, <8 x i16> %a1) {
183 ; CHECK-LABEL: add_avgceilu2:
185 ; CHECK-NEXT: urhadd v0.8h, v1.8h, v0.8h
187 %add0 = add nuw <8 x i16> %a1, %a0
188 %add = add nuw <8 x i16> %add0, splat(i16 1)
189 %avg = lshr <8 x i16> %add, splat(i16 1)
193 define <8 x i16> @add_avgceilu_mismatch1(<8 x i16> %a0, <8 x i16> %a1) {
194 ; CHECK-LABEL: add_avgceilu_mismatch1:
196 ; CHECK-NEXT: movi v2.8h, #1
197 ; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
198 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
200 %add0 = add <8 x i16> %a1, %a0
201 %add = add nuw <8 x i16> %add0, splat(i16 1)
202 %avg = lshr <8 x i16> %add, splat(i16 1)
206 define <8 x i16> @add_avgceilu_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
207 ; CHECK-LABEL: add_avgceilu_mismatch2:
209 ; CHECK-NEXT: mvn v1.16b, v1.16b
210 ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
211 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
213 %add0 = add nuw <8 x i16> %a1, %a0
214 %add = add <8 x i16> %add0, splat(i16 1)
215 %avg = lshr <8 x i16> %add, splat(i16 1)
219 define <8 x i16> @add_avgceilu_mismatch3(<8 x i16> %a0, <8 x i16> %a1) {
220 ; CHECK-LABEL: add_avgceilu_mismatch3:
222 ; CHECK-NEXT: mvn v1.16b, v1.16b
223 ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
224 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
226 %add0 = add nuw <8 x i16> %a1, %a0
227 %add = add <8 x i16> %add0, splat(i16 1)
228 %avg = lshr <8 x i16> %add, splat(i16 1)
232 define <8 x i16> @add_avgfloors(<8 x i16> %a0, <8 x i16> %a1) {
233 ; CHECK-LABEL: add_avgfloors:
235 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
237 %add = add nsw <8 x i16> %a0, %a1
238 %avg = ashr <8 x i16> %add, splat(i16 1)
242 define <8 x i16> @add_avgfloors_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
243 ; CHECK-LABEL: add_avgfloors_mismatch:
245 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
246 ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
248 %add = add <8 x i16> %a0, %a1
249 %avg = ashr <8 x i16> %add, splat(i16 1)
253 define <8 x i16> @add_avgfoor_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
254 ; CHECK-LABEL: add_avgfoor_mismatch2:
256 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
257 ; CHECK-NEXT: sshr v0.8h, v0.8h, #2
259 %add = add nsw <8 x i16> %a0, %a1
260 %avg = ashr <8 x i16> %add, splat(i16 2)
264 define <8 x i16> @add_avgceils(<8 x i16> %a0, <8 x i16> %a1) {
265 ; CHECK-LABEL: add_avgceils:
267 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
269 %add0 = add nsw <8 x i16> %a0, splat(i16 1)
270 %add = add nsw <8 x i16> %a1, %add0
271 %avg = ashr <8 x i16> %add, splat(i16 1)
275 define <8 x i16> @add_avgceils2(<8 x i16> %a0, <8 x i16> %a1) {
276 ; CHECK-LABEL: add_avgceils2:
278 ; CHECK-NEXT: srhadd v0.8h, v1.8h, v0.8h
280 %add0 = add nsw <8 x i16> %a1, %a0
281 %add = add nsw <8 x i16> %add0, splat(i16 1)
282 %avg = ashr <8 x i16> %add, splat(i16 1)
286 define <8 x i16> @add_avgceils_mismatch1(<8 x i16> %a0, <8 x i16> %a1) {
287 ; CHECK-LABEL: add_avgceils_mismatch1:
289 ; CHECK-NEXT: movi v2.8h, #1
290 ; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
291 ; CHECK-NEXT: shadd v0.8h, v0.8h, v2.8h
293 %add0 = add <8 x i16> %a1, %a0
294 %add = add nsw <8 x i16> %add0, splat(i16 1)
295 %avg = ashr <8 x i16> %add, splat(i16 1)
299 define <8 x i16> @add_avgceils_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
300 ; CHECK-LABEL: add_avgceils_mismatch2:
302 ; CHECK-NEXT: mvn v1.16b, v1.16b
303 ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
304 ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
306 %add0 = add nsw <8 x i16> %a1, %a0
307 %add = add <8 x i16> %add0, splat(i16 1)
308 %avg = ashr <8 x i16> %add, splat(i16 1)
312 define <8 x i16> @add_avgceils_mismatch3(<8 x i16> %a0, <8 x i16> %a1) {
313 ; CHECK-LABEL: add_avgceils_mismatch3:
315 ; CHECK-NEXT: mvn v1.16b, v1.16b
316 ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
317 ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
319 %add0 = add nsw <8 x i16> %a1, %a0
320 %add = add <8 x i16> %add0, splat(i16 1)
321 %avg = ashr <8 x i16> %add, splat(i16 1)
325 define <8 x i16> @add_avgceils_mismatch4(<8 x i16> %a0, <8 x i16> %a1) {
326 ; CHECK-LABEL: add_avgceils_mismatch4:
328 ; CHECK-NEXT: mvn v0.16b, v0.16b
329 ; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
330 ; CHECK-NEXT: sshr v0.8h, v0.8h, #2
332 %add0 = add nsw <8 x i16> %a0, splat(i16 1)
333 %add = add nsw <8 x i16> %a1, %add0
334 %avg = ashr <8 x i16> %add, splat(i16 2)
338 define <8 x i16> @add_avgceilu_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
339 ; CHECK-LABEL: add_avgceilu_mismatch:
341 ; CHECK-NEXT: movi v2.8h, #1
342 ; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
343 ; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
344 ; CHECK-NEXT: ushr v0.8h, v0.8h, #2
346 %add0 = add nuw <8 x i16> %a1, %a0
347 %add = add nuw <8 x i16> %add0, splat(i16 1)
348 %avg = lshr <8 x i16> %add, splat(i16 2)