1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; Basic tests from input vector to bitmask
6 ; IR generated from clang for:
7 ; __builtin_convertvector + reinterpret_cast<uint16&>
9 ; CHECK-GI: warning: Instruction selection used fallback path for convert_to_bitmask2
10 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4
11 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32
12 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2
13 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat
15 define i16 @convert_to_bitmask16(<16 x i8> %vec) {
17 ; CHECK-SD-LABEL: convert_to_bitmask16:
19 ; CHECK-SD-NEXT: adrp x8, lCPI0_0@PAGE
20 ; CHECK-SD-NEXT: cmeq.16b v0, v0, #0
21 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF]
22 ; CHECK-SD-NEXT: bic.16b v0, v1, v0
23 ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8
24 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1
25 ; CHECK-SD-NEXT: addv.8h h0, v0
26 ; CHECK-SD-NEXT: fmov w0, s0
29 ; CHECK-GI-LABEL: convert_to_bitmask16:
31 ; CHECK-GI-NEXT: sub sp, sp, #16
32 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
33 ; CHECK-GI-NEXT: cmeq.16b v0, v0, #0
34 ; CHECK-GI-NEXT: mvn.16b v0, v0
35 ; CHECK-GI-NEXT: umov.b w8, v0[1]
36 ; CHECK-GI-NEXT: umov.b w9, v0[0]
37 ; CHECK-GI-NEXT: umov.b w10, v0[2]
38 ; CHECK-GI-NEXT: umov.b w11, v0[3]
39 ; CHECK-GI-NEXT: and w8, w8, #0x1
40 ; CHECK-GI-NEXT: bfi w9, w8, #1, #31
41 ; CHECK-GI-NEXT: and w8, w10, #0x1
42 ; CHECK-GI-NEXT: umov.b w10, v0[4]
43 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2
44 ; CHECK-GI-NEXT: and w9, w11, #0x1
45 ; CHECK-GI-NEXT: umov.b w11, v0[5]
46 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
47 ; CHECK-GI-NEXT: and w9, w10, #0x1
48 ; CHECK-GI-NEXT: umov.b w10, v0[6]
49 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4
50 ; CHECK-GI-NEXT: and w9, w11, #0x1
51 ; CHECK-GI-NEXT: umov.b w11, v0[7]
52 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5
53 ; CHECK-GI-NEXT: and w9, w10, #0x1
54 ; CHECK-GI-NEXT: umov.b w10, v0[8]
55 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6
56 ; CHECK-GI-NEXT: and w9, w11, #0x1
57 ; CHECK-GI-NEXT: umov.b w11, v0[9]
58 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7
59 ; CHECK-GI-NEXT: and w9, w10, #0x1
60 ; CHECK-GI-NEXT: umov.b w10, v0[10]
61 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #8
62 ; CHECK-GI-NEXT: and w9, w11, #0x1
63 ; CHECK-GI-NEXT: umov.b w11, v0[11]
64 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #9
65 ; CHECK-GI-NEXT: and w9, w10, #0x1
66 ; CHECK-GI-NEXT: umov.b w10, v0[12]
67 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #10
68 ; CHECK-GI-NEXT: and w9, w11, #0x1
69 ; CHECK-GI-NEXT: umov.b w11, v0[13]
70 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #11
71 ; CHECK-GI-NEXT: and w9, w10, #0x1
72 ; CHECK-GI-NEXT: umov.b w10, v0[14]
73 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #12
74 ; CHECK-GI-NEXT: and w9, w11, #0x1
75 ; CHECK-GI-NEXT: umov.b w11, v0[15]
76 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #13
77 ; CHECK-GI-NEXT: and w9, w10, #0x1
78 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #14
79 ; CHECK-GI-NEXT: and w9, w11, #0x1
80 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #15
81 ; CHECK-GI-NEXT: strh w8, [sp, #14]
82 ; CHECK-GI-NEXT: and w0, w8, #0xffff
83 ; CHECK-GI-NEXT: add sp, sp, #16
88 %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer
89 %bitmask = bitcast <16 x i1> %cmp_result to i16
93 define i16 @convert_to_bitmask8(<8 x i16> %vec) {
94 ; CHECK-SD-LABEL: convert_to_bitmask8:
96 ; CHECK-SD-NEXT: adrp x8, lCPI1_0@PAGE
97 ; CHECK-SD-NEXT: cmeq.8h v0, v0, #0
98 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF]
99 ; CHECK-SD-NEXT: bic.16b v0, v1, v0
100 ; CHECK-SD-NEXT: addv.8h h0, v0
101 ; CHECK-SD-NEXT: fmov w8, s0
102 ; CHECK-SD-NEXT: and w0, w8, #0xff
105 ; CHECK-GI-LABEL: convert_to_bitmask8:
107 ; CHECK-GI-NEXT: sub sp, sp, #16
108 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
109 ; CHECK-GI-NEXT: cmeq.8h v0, v0, #0
110 ; CHECK-GI-NEXT: mvn.16b v0, v0
111 ; CHECK-GI-NEXT: xtn.8b v0, v0
112 ; CHECK-GI-NEXT: umov.b w8, v0[1]
113 ; CHECK-GI-NEXT: umov.b w9, v0[0]
114 ; CHECK-GI-NEXT: umov.b w10, v0[2]
115 ; CHECK-GI-NEXT: umov.b w11, v0[3]
116 ; CHECK-GI-NEXT: and w8, w8, #0x1
117 ; CHECK-GI-NEXT: bfi w9, w8, #1, #31
118 ; CHECK-GI-NEXT: and w8, w10, #0x1
119 ; CHECK-GI-NEXT: umov.b w10, v0[4]
120 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2
121 ; CHECK-GI-NEXT: and w9, w11, #0x1
122 ; CHECK-GI-NEXT: umov.b w11, v0[5]
123 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
124 ; CHECK-GI-NEXT: and w9, w10, #0x1
125 ; CHECK-GI-NEXT: umov.b w10, v0[6]
126 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4
127 ; CHECK-GI-NEXT: and w9, w11, #0x1
128 ; CHECK-GI-NEXT: umov.b w11, v0[7]
129 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5
130 ; CHECK-GI-NEXT: and w9, w10, #0x1
131 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6
132 ; CHECK-GI-NEXT: and w9, w11, #0x1
133 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7
134 ; CHECK-GI-NEXT: strb w8, [sp, #15]
135 ; CHECK-GI-NEXT: and w0, w8, #0xff
136 ; CHECK-GI-NEXT: add sp, sp, #16
140 %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
141 %bitmask = bitcast <8 x i1> %cmp_result to i8
142 %extended_bitmask = zext i8 %bitmask to i16
143 ret i16 %extended_bitmask
146 define i4 @convert_to_bitmask4(<4 x i32> %vec) {
147 ; CHECK-SD-LABEL: convert_to_bitmask4:
149 ; CHECK-SD-NEXT: adrp x8, lCPI2_0@PAGE
150 ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0
151 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF]
152 ; CHECK-SD-NEXT: bic.16b v0, v1, v0
153 ; CHECK-SD-NEXT: addv.4s s0, v0
154 ; CHECK-SD-NEXT: fmov w0, s0
157 ; CHECK-GI-LABEL: convert_to_bitmask4:
159 ; CHECK-GI-NEXT: sub sp, sp, #16
160 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
161 ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
162 ; CHECK-GI-NEXT: mvn.16b v0, v0
163 ; CHECK-GI-NEXT: mov.s w8, v0[1]
164 ; CHECK-GI-NEXT: mov.s w9, v0[2]
165 ; CHECK-GI-NEXT: fmov w11, s0
166 ; CHECK-GI-NEXT: mov.s w10, v0[3]
167 ; CHECK-GI-NEXT: and w8, w8, #0x1
168 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
169 ; CHECK-GI-NEXT: and w8, w9, #0x1
170 ; CHECK-GI-NEXT: and w9, w10, #0x1
171 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
172 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
173 ; CHECK-GI-NEXT: strb w8, [sp, #15]
174 ; CHECK-GI-NEXT: and w0, w8, #0xff
175 ; CHECK-GI-NEXT: add sp, sp, #16
179 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
180 %bitmask = bitcast <4 x i1> %cmp_result to i4
184 define i8 @convert_to_bitmask2(<2 x i64> %vec) {
185 ; CHECK-LABEL: convert_to_bitmask2:
187 ; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
188 ; CHECK-NEXT: cmeq.2d v0, v0, #0
189 ; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF]
190 ; CHECK-NEXT: bic.16b v0, v1, v0
191 ; CHECK-NEXT: addp.2d d0, v0
192 ; CHECK-NEXT: fmov w8, s0
193 ; CHECK-NEXT: and w0, w8, #0x3
197 %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer
198 %bitmask = bitcast <2 x i1> %cmp_result to i2
199 %extended_bitmask = zext i2 %bitmask to i8
200 ret i8 %extended_bitmask
203 ; Clang's __builtin_convertvector adds an undef vector concat for vectors with <8 elements.
204 define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) {
205 ; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4:
207 ; CHECK-NEXT: adrp x8, lCPI4_0@PAGE
208 ; CHECK-NEXT: cmeq.4s v0, v0, #0
209 ; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF]
210 ; CHECK-NEXT: bic.16b v0, v1, v0
211 ; CHECK-NEXT: addv.4s s0, v0
212 ; CHECK-NEXT: fmov w0, s0
216 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
217 %vector_pad = shufflevector <4 x i1> %cmp_result, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
218 %bitmask = bitcast <8 x i1> %vector_pad to i8
223 define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) {
224 ; CHECK-SD-LABEL: convert_to_bitmask_no_compare:
226 ; CHECK-SD-NEXT: and.16b v0, v0, v1
227 ; CHECK-SD-NEXT: adrp x8, lCPI5_0@PAGE
228 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI5_0@PAGEOFF]
229 ; CHECK-SD-NEXT: shl.4s v0, v0, #31
230 ; CHECK-SD-NEXT: cmlt.4s v0, v0, #0
231 ; CHECK-SD-NEXT: and.16b v0, v0, v1
232 ; CHECK-SD-NEXT: addv.4s s0, v0
233 ; CHECK-SD-NEXT: fmov w0, s0
236 ; CHECK-GI-LABEL: convert_to_bitmask_no_compare:
238 ; CHECK-GI-NEXT: sub sp, sp, #16
239 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
240 ; CHECK-GI-NEXT: and.16b v0, v0, v1
241 ; CHECK-GI-NEXT: mov.s w8, v0[1]
242 ; CHECK-GI-NEXT: mov.s w9, v0[2]
243 ; CHECK-GI-NEXT: fmov w11, s0
244 ; CHECK-GI-NEXT: mov.s w10, v0[3]
245 ; CHECK-GI-NEXT: and w8, w8, #0x1
246 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
247 ; CHECK-GI-NEXT: and w8, w9, #0x1
248 ; CHECK-GI-NEXT: and w9, w10, #0x1
249 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
250 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
251 ; CHECK-GI-NEXT: strb w8, [sp, #15]
252 ; CHECK-GI-NEXT: and w0, w8, #0xff
253 ; CHECK-GI-NEXT: add sp, sp, #16
257 %cmp = and <4 x i32> %vec1, %vec2
258 %trunc = trunc <4 x i32> %cmp to <4 x i1>
259 %bitmask = bitcast <4 x i1> %trunc to i4
263 define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec2) {
264 ; CHECK-SD-LABEL: convert_to_bitmask_with_compare_chain:
266 ; CHECK-SD-NEXT: cmeq.4s v2, v0, #0
267 ; CHECK-SD-NEXT: cmeq.4s v0, v0, v1
268 ; CHECK-SD-NEXT: adrp x8, lCPI6_0@PAGE
269 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI6_0@PAGEOFF]
270 ; CHECK-SD-NEXT: bic.16b v0, v0, v2
271 ; CHECK-SD-NEXT: and.16b v0, v0, v1
272 ; CHECK-SD-NEXT: addv.4s s0, v0
273 ; CHECK-SD-NEXT: fmov w0, s0
276 ; CHECK-GI-LABEL: convert_to_bitmask_with_compare_chain:
278 ; CHECK-GI-NEXT: sub sp, sp, #16
279 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
280 ; CHECK-GI-NEXT: cmeq.4s v2, v0, #0
281 ; CHECK-GI-NEXT: cmeq.4s v0, v0, v1
282 ; CHECK-GI-NEXT: bic.16b v0, v0, v2
283 ; CHECK-GI-NEXT: mov.s w8, v0[1]
284 ; CHECK-GI-NEXT: mov.s w9, v0[2]
285 ; CHECK-GI-NEXT: fmov w11, s0
286 ; CHECK-GI-NEXT: mov.s w10, v0[3]
287 ; CHECK-GI-NEXT: and w8, w8, #0x1
288 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
289 ; CHECK-GI-NEXT: and w8, w9, #0x1
290 ; CHECK-GI-NEXT: and w9, w10, #0x1
291 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
292 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
293 ; CHECK-GI-NEXT: strb w8, [sp, #15]
294 ; CHECK-GI-NEXT: and w0, w8, #0xff
295 ; CHECK-GI-NEXT: add sp, sp, #16
299 %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
300 %cmp2 = icmp eq <4 x i32> %vec1, %vec2
301 %cmp3 = and <4 x i1> %cmp1, %cmp2
302 %bitmask = bitcast <4 x i1> %cmp3 to i4
306 define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %vec2) {
307 ; CHECK-SD-LABEL: convert_to_bitmask_with_trunc_in_chain:
309 ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0
310 ; CHECK-SD-NEXT: adrp x8, lCPI7_0@PAGE
311 ; CHECK-SD-NEXT: bic.16b v0, v1, v0
312 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI7_0@PAGEOFF]
313 ; CHECK-SD-NEXT: shl.4s v0, v0, #31
314 ; CHECK-SD-NEXT: cmlt.4s v0, v0, #0
315 ; CHECK-SD-NEXT: and.16b v0, v0, v1
316 ; CHECK-SD-NEXT: addv.4s s0, v0
317 ; CHECK-SD-NEXT: fmov w0, s0
320 ; CHECK-GI-LABEL: convert_to_bitmask_with_trunc_in_chain:
322 ; CHECK-GI-NEXT: sub sp, sp, #16
323 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
324 ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
325 ; CHECK-GI-NEXT: bic.16b v0, v1, v0
326 ; CHECK-GI-NEXT: mov.s w8, v0[1]
327 ; CHECK-GI-NEXT: mov.s w9, v0[2]
328 ; CHECK-GI-NEXT: fmov w11, s0
329 ; CHECK-GI-NEXT: mov.s w10, v0[3]
330 ; CHECK-GI-NEXT: and w8, w8, #0x1
331 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
332 ; CHECK-GI-NEXT: and w8, w9, #0x1
333 ; CHECK-GI-NEXT: and w9, w10, #0x1
334 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
335 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
336 ; CHECK-GI-NEXT: strb w8, [sp, #15]
337 ; CHECK-GI-NEXT: and w0, w8, #0xff
338 ; CHECK-GI-NEXT: add sp, sp, #16
342 %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
343 %trunc_vec = trunc <4 x i32> %vec2 to <4 x i1>
344 %and_res = and <4 x i1> %cmp1, %trunc_vec
345 %bitmask = bitcast <4 x i1> %and_res to i4
349 define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <4 x i32> %vec2) {
350 ; CHECK-SD-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain:
352 ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0
353 ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0
354 ; CHECK-SD-NEXT: adrp x8, lCPI8_0@PAGE
355 ; CHECK-SD-NEXT: movi d2, #0x000000ffffffff
356 ; CHECK-SD-NEXT: movi d3, #0x00ffffffffffff
357 ; CHECK-SD-NEXT: bic.16b v0, v1, v0
358 ; CHECK-SD-NEXT: movi d1, #0xffff0000ffff0000
359 ; CHECK-SD-NEXT: xtn.4h v0, v0
360 ; CHECK-SD-NEXT: orr.8b v0, v0, v2
361 ; CHECK-SD-NEXT: movi d2, #0x00ffffffff0000
362 ; CHECK-SD-NEXT: eor.8b v1, v0, v1
363 ; CHECK-SD-NEXT: eor.8b v0, v0, v2
364 ; CHECK-SD-NEXT: mov.h v1[2], wzr
365 ; CHECK-SD-NEXT: orr.8b v0, v0, v3
366 ; CHECK-SD-NEXT: orr.8b v0, v1, v0
367 ; CHECK-SD-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF]
368 ; CHECK-SD-NEXT: shl.4h v0, v0, #15
369 ; CHECK-SD-NEXT: cmlt.4h v0, v0, #0
370 ; CHECK-SD-NEXT: and.8b v0, v0, v1
371 ; CHECK-SD-NEXT: addv.4h h0, v0
372 ; CHECK-SD-NEXT: fmov w0, s0
375 ; CHECK-GI-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain:
377 ; CHECK-GI-NEXT: sub sp, sp, #16
378 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
379 ; CHECK-GI-NEXT: mov w8, #1 ; =0x1
380 ; CHECK-GI-NEXT: mov w9, #0 ; =0x0
381 ; CHECK-GI-NEXT: cmeq.4s v5, v0, #0
382 ; CHECK-GI-NEXT: fmov s2, w8
383 ; CHECK-GI-NEXT: fmov s4, w9
384 ; CHECK-GI-NEXT: cmeq.4s v1, v1, #0
385 ; CHECK-GI-NEXT: mov.16b v3, v2
386 ; CHECK-GI-NEXT: mov.16b v0, v4
387 ; CHECK-GI-NEXT: mov.h v4[1], w8
388 ; CHECK-GI-NEXT: bic.16b v1, v1, v5
389 ; CHECK-GI-NEXT: mov.16b v5, v2
390 ; CHECK-GI-NEXT: mov.h v2[1], w8
391 ; CHECK-GI-NEXT: mov.h v3[1], w8
392 ; CHECK-GI-NEXT: mov.h v0[1], w8
393 ; CHECK-GI-NEXT: mov.h v5[1], w8
394 ; CHECK-GI-NEXT: mov.h v4[2], w8
395 ; CHECK-GI-NEXT: xtn.4h v1, v1
396 ; CHECK-GI-NEXT: mov.h v2[2], w8
397 ; CHECK-GI-NEXT: mov.h v3[2], w9
398 ; CHECK-GI-NEXT: mov.h v0[2], w9
399 ; CHECK-GI-NEXT: mov.h v5[2], w9
400 ; CHECK-GI-NEXT: mov.h v4[3], w9
401 ; CHECK-GI-NEXT: mov.h v2[3], w9
402 ; CHECK-GI-NEXT: mov.h v3[3], w9
403 ; CHECK-GI-NEXT: mov.h v0[3], w8
404 ; CHECK-GI-NEXT: mov.h v5[3], w8
405 ; CHECK-GI-NEXT: orr.8b v1, v1, v3
406 ; CHECK-GI-NEXT: eor.8b v0, v1, v0
407 ; CHECK-GI-NEXT: eor.8b v1, v4, v1
408 ; CHECK-GI-NEXT: and.8b v0, v0, v5
409 ; CHECK-GI-NEXT: orr.8b v1, v2, v1
410 ; CHECK-GI-NEXT: orr.8b v0, v0, v1
411 ; CHECK-GI-NEXT: ushll.4s v0, v0, #0
412 ; CHECK-GI-NEXT: mov.s w8, v0[1]
413 ; CHECK-GI-NEXT: mov.s w9, v0[2]
414 ; CHECK-GI-NEXT: fmov w11, s0
415 ; CHECK-GI-NEXT: mov.s w10, v0[3]
416 ; CHECK-GI-NEXT: and w8, w8, #0x1
417 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
418 ; CHECK-GI-NEXT: and w8, w9, #0x1
419 ; CHECK-GI-NEXT: and w9, w10, #0x1
420 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
421 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
422 ; CHECK-GI-NEXT: strb w8, [sp, #15]
423 ; CHECK-GI-NEXT: and w0, w8, #0xff
424 ; CHECK-GI-NEXT: add sp, sp, #16
428 %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
429 %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer
431 ; Artificially make this a long chain to hide the original type
432 %chain1 = and <4 x i1> %cmp1, %cmp2;
433 %chain2 = or <4 x i1> %chain1, <i1 1, i1 1, i1 0, i1 0>;
434 %chain3 = xor <4 x i1> %chain2, <i1 0, i1 1, i1 0, i1 1>;
435 %chain4 = and <4 x i1> %chain3, <i1 1, i1 1, i1 0, i1 1>;
436 %chain5 = or <4 x i1> %chain4, <i1 1, i1 1, i1 1, i1 0>;
437 %chain6 = xor <4 x i1> <i1 0, i1 1, i1 1, i1 0>, %chain2;
438 %chain7 = or <4 x i1> %chain5, %chain6;
439 %bitmask = bitcast <4 x i1> %chain7 to i4
443 define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4 x i32> %vec2) {
444 ; CHECK-SD-LABEL: convert_to_bitmask_with_different_types_in_chain:
446 ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0
447 ; CHECK-SD-NEXT: cmeq.4h v0, v0, #0
448 ; CHECK-SD-NEXT: adrp x8, lCPI9_0@PAGE
449 ; CHECK-SD-NEXT: xtn.4h v1, v1
450 ; CHECK-SD-NEXT: orn.8b v0, v1, v0
451 ; CHECK-SD-NEXT: ldr d1, [x8, lCPI9_0@PAGEOFF]
452 ; CHECK-SD-NEXT: and.8b v0, v0, v1
453 ; CHECK-SD-NEXT: addv.4h h0, v0
454 ; CHECK-SD-NEXT: fmov w0, s0
457 ; CHECK-GI-LABEL: convert_to_bitmask_with_different_types_in_chain:
459 ; CHECK-GI-NEXT: sub sp, sp, #16
460 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
461 ; CHECK-GI-NEXT: cmeq.4s v1, v1, #0
462 ; CHECK-GI-NEXT: cmeq.4h v0, v0, #0
463 ; CHECK-GI-NEXT: xtn.4h v1, v1
464 ; CHECK-GI-NEXT: orn.8b v0, v1, v0
465 ; CHECK-GI-NEXT: ushll.4s v0, v0, #0
466 ; CHECK-GI-NEXT: mov.s w8, v0[1]
467 ; CHECK-GI-NEXT: mov.s w9, v0[2]
468 ; CHECK-GI-NEXT: fmov w11, s0
469 ; CHECK-GI-NEXT: mov.s w10, v0[3]
470 ; CHECK-GI-NEXT: and w8, w8, #0x1
471 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
472 ; CHECK-GI-NEXT: and w8, w9, #0x1
473 ; CHECK-GI-NEXT: and w9, w10, #0x1
474 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
475 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
476 ; CHECK-GI-NEXT: strb w8, [sp, #15]
477 ; CHECK-GI-NEXT: and w0, w8, #0xff
478 ; CHECK-GI-NEXT: add sp, sp, #16
482 %cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer
483 %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer
484 %chain1 = or <4 x i1> %cmp1, %cmp2
485 %bitmask = bitcast <4 x i1> %chain1 to i4
489 define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
490 ; CHECK-SD-LABEL: convert_to_bitmask_without_knowing_type:
492 ; CHECK-SD-NEXT: shl.16b v0, v0, #7
493 ; CHECK-SD-NEXT: adrp x8, lCPI10_0@PAGE
494 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI10_0@PAGEOFF]
495 ; CHECK-SD-NEXT: cmlt.16b v0, v0, #0
496 ; CHECK-SD-NEXT: and.16b v0, v0, v1
497 ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8
498 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1
499 ; CHECK-SD-NEXT: addv.8h h0, v0
500 ; CHECK-SD-NEXT: fmov w0, s0
503 ; CHECK-GI-LABEL: convert_to_bitmask_without_knowing_type:
505 ; CHECK-GI-NEXT: sub sp, sp, #16
506 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
507 ; CHECK-GI-NEXT: umov.b w8, v0[1]
508 ; CHECK-GI-NEXT: umov.b w9, v0[0]
509 ; CHECK-GI-NEXT: umov.b w10, v0[2]
510 ; CHECK-GI-NEXT: umov.b w11, v0[3]
511 ; CHECK-GI-NEXT: and w8, w8, #0x1
512 ; CHECK-GI-NEXT: bfi w9, w8, #1, #31
513 ; CHECK-GI-NEXT: and w8, w10, #0x1
514 ; CHECK-GI-NEXT: umov.b w10, v0[4]
515 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2
516 ; CHECK-GI-NEXT: and w9, w11, #0x1
517 ; CHECK-GI-NEXT: umov.b w11, v0[5]
518 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
519 ; CHECK-GI-NEXT: and w9, w10, #0x1
520 ; CHECK-GI-NEXT: umov.b w10, v0[6]
521 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4
522 ; CHECK-GI-NEXT: and w9, w11, #0x1
523 ; CHECK-GI-NEXT: umov.b w11, v0[7]
524 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5
525 ; CHECK-GI-NEXT: and w9, w10, #0x1
526 ; CHECK-GI-NEXT: umov.b w10, v0[8]
527 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6
528 ; CHECK-GI-NEXT: and w9, w11, #0x1
529 ; CHECK-GI-NEXT: umov.b w11, v0[9]
530 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7
531 ; CHECK-GI-NEXT: and w9, w10, #0x1
532 ; CHECK-GI-NEXT: umov.b w10, v0[10]
533 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #8
534 ; CHECK-GI-NEXT: and w9, w11, #0x1
535 ; CHECK-GI-NEXT: umov.b w11, v0[11]
536 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #9
537 ; CHECK-GI-NEXT: and w9, w10, #0x1
538 ; CHECK-GI-NEXT: umov.b w10, v0[12]
539 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #10
540 ; CHECK-GI-NEXT: and w9, w11, #0x1
541 ; CHECK-GI-NEXT: umov.b w11, v0[13]
542 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #11
543 ; CHECK-GI-NEXT: and w9, w10, #0x1
544 ; CHECK-GI-NEXT: umov.b w10, v0[14]
545 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #12
546 ; CHECK-GI-NEXT: and w9, w11, #0x1
547 ; CHECK-GI-NEXT: umov.b w11, v0[15]
548 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #13
549 ; CHECK-GI-NEXT: and w9, w10, #0x1
550 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #14
551 ; CHECK-GI-NEXT: and w9, w11, #0x1
552 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #15
553 ; CHECK-GI-NEXT: strh w8, [sp, #14]
554 ; CHECK-GI-NEXT: and w0, w8, #0xffff
555 ; CHECK-GI-NEXT: add sp, sp, #16
558 %bitmask = bitcast <16 x i1> %vec to i16
562 define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) {
563 ; CHECK-LABEL: convert_to_bitmask_2xi32:
565 ; CHECK-NEXT: adrp x8, lCPI11_0@PAGE
566 ; CHECK-NEXT: cmeq.2s v0, v0, #0
567 ; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF]
568 ; CHECK-NEXT: bic.8b v0, v1, v0
569 ; CHECK-NEXT: addp.2s v0, v0, v0
570 ; CHECK-NEXT: fmov w0, s0
573 %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer
574 %bitmask = bitcast <2 x i1> %cmp_result to i2
578 define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) {
579 ; CHECK-SD-LABEL: convert_to_bitmask_4xi8:
581 ; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
582 ; CHECK-SD-NEXT: adrp x8, lCPI12_0@PAGE
583 ; CHECK-SD-NEXT: ldr d1, [x8, lCPI12_0@PAGEOFF]
584 ; CHECK-SD-NEXT: cmeq.4h v0, v0, #0
585 ; CHECK-SD-NEXT: bic.8b v0, v1, v0
586 ; CHECK-SD-NEXT: addv.4h h0, v0
587 ; CHECK-SD-NEXT: fmov w0, s0
590 ; CHECK-GI-LABEL: convert_to_bitmask_4xi8:
592 ; CHECK-GI-NEXT: sub sp, sp, #16
593 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
594 ; CHECK-GI-NEXT: mov w8, #0 ; =0x0
595 ; CHECK-GI-NEXT: uzp1.8b v0, v0, v0
596 ; CHECK-GI-NEXT: fmov s1, w8
597 ; CHECK-GI-NEXT: mov.b v1[1], w8
598 ; CHECK-GI-NEXT: mov.b v1[2], w8
599 ; CHECK-GI-NEXT: mov.b v1[3], w8
600 ; CHECK-GI-NEXT: cmeq.8b v0, v0, v1
601 ; CHECK-GI-NEXT: mvn.8b v0, v0
602 ; CHECK-GI-NEXT: umov.b w8, v0[0]
603 ; CHECK-GI-NEXT: umov.b w9, v0[1]
604 ; CHECK-GI-NEXT: mov.s v1[0], w8
605 ; CHECK-GI-NEXT: umov.b w8, v0[2]
606 ; CHECK-GI-NEXT: mov.s v1[1], w9
607 ; CHECK-GI-NEXT: umov.b w9, v0[3]
608 ; CHECK-GI-NEXT: mov.s v1[2], w8
609 ; CHECK-GI-NEXT: mov.s v1[3], w9
610 ; CHECK-GI-NEXT: mov.s w8, v1[1]
611 ; CHECK-GI-NEXT: mov.s w9, v1[2]
612 ; CHECK-GI-NEXT: fmov w11, s1
613 ; CHECK-GI-NEXT: mov.s w10, v1[3]
614 ; CHECK-GI-NEXT: and w8, w8, #0x1
615 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
616 ; CHECK-GI-NEXT: and w8, w9, #0x1
617 ; CHECK-GI-NEXT: and w9, w10, #0x1
618 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
619 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
620 ; CHECK-GI-NEXT: strb w8, [sp, #15]
621 ; CHECK-GI-NEXT: and w0, w8, #0xff
622 ; CHECK-GI-NEXT: add sp, sp, #16
625 %cmp_result = icmp ne <4 x i8> %vec, zeroinitializer
626 %bitmask = bitcast <4 x i1> %cmp_result to i4
630 define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) {
631 ; CHECK-LABEL: convert_to_bitmask_8xi2:
633 ; CHECK-NEXT: movi.8b v1, #3
634 ; CHECK-NEXT: adrp x8, lCPI13_0@PAGE
635 ; CHECK-NEXT: and.8b v0, v0, v1
636 ; CHECK-NEXT: ldr d1, [x8, lCPI13_0@PAGEOFF]
637 ; CHECK-NEXT: cmeq.8b v0, v0, #0
638 ; CHECK-NEXT: bic.8b v0, v1, v0
639 ; CHECK-NEXT: addv.8b b0, v0
640 ; CHECK-NEXT: fmov w0, s0
643 %cmp_result = icmp ne <8 x i2> %vec, zeroinitializer
644 %bitmask = bitcast <8 x i1> %cmp_result to i8
648 define i4 @convert_to_bitmask_float(<4 x float> %vec) {
649 ; CHECK-SD-LABEL: convert_to_bitmask_float:
651 ; CHECK-SD-NEXT: fcmgt.4s v1, v0, #0.0
652 ; CHECK-SD-NEXT: fcmlt.4s v0, v0, #0.0
653 ; CHECK-SD-NEXT: adrp x8, lCPI14_0@PAGE
654 ; CHECK-SD-NEXT: orr.16b v0, v0, v1
655 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI14_0@PAGEOFF]
656 ; CHECK-SD-NEXT: and.16b v0, v0, v1
657 ; CHECK-SD-NEXT: addv.4s s0, v0
658 ; CHECK-SD-NEXT: fmov w0, s0
661 ; CHECK-GI-LABEL: convert_to_bitmask_float:
663 ; CHECK-GI-NEXT: sub sp, sp, #16
664 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
665 ; CHECK-GI-NEXT: fcmgt.4s v1, v0, #0.0
666 ; CHECK-GI-NEXT: fcmlt.4s v0, v0, #0.0
667 ; CHECK-GI-NEXT: orr.16b v0, v0, v1
668 ; CHECK-GI-NEXT: mov.s w8, v0[1]
669 ; CHECK-GI-NEXT: mov.s w9, v0[2]
670 ; CHECK-GI-NEXT: fmov w11, s0
671 ; CHECK-GI-NEXT: mov.s w10, v0[3]
672 ; CHECK-GI-NEXT: and w8, w8, #0x1
673 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
674 ; CHECK-GI-NEXT: and w8, w9, #0x1
675 ; CHECK-GI-NEXT: and w9, w10, #0x1
676 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
677 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
678 ; CHECK-GI-NEXT: strb w8, [sp, #15]
679 ; CHECK-GI-NEXT: and w0, w8, #0xff
680 ; CHECK-GI-NEXT: add sp, sp, #16
684 %cmp_result = fcmp one <4 x float> %vec, zeroinitializer
685 %bitmask = bitcast <4 x i1> %cmp_result to i4
689 ; Larger vector types don't map directly, but the can be split/truncated and then converted.
690 ; After the comparison against 0, this is truncated to <8 x i16>, which is valid again.
691 define i8 @convert_large_vector(<8 x i32> %vec) {
692 ; CHECK-SD-LABEL: convert_large_vector:
694 ; CHECK-SD-NEXT: sub sp, sp, #16
695 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
696 ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0
697 ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0
698 ; CHECK-SD-NEXT: adrp x8, lCPI15_0@PAGE
699 ; CHECK-SD-NEXT: uzp1.8h v0, v0, v1
700 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI15_0@PAGEOFF]
701 ; CHECK-SD-NEXT: bic.16b v0, v1, v0
702 ; CHECK-SD-NEXT: addv.8h h0, v0
703 ; CHECK-SD-NEXT: fmov w8, s0
704 ; CHECK-SD-NEXT: and w0, w8, #0xff
705 ; CHECK-SD-NEXT: add sp, sp, #16
708 ; CHECK-GI-LABEL: convert_large_vector:
710 ; CHECK-GI-NEXT: sub sp, sp, #16
711 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
712 ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
713 ; CHECK-GI-NEXT: cmeq.4s v1, v1, #0
714 ; CHECK-GI-NEXT: mvn.16b v0, v0
715 ; CHECK-GI-NEXT: mvn.16b v1, v1
716 ; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
717 ; CHECK-GI-NEXT: xtn.8b v0, v0
718 ; CHECK-GI-NEXT: umov.b w8, v0[1]
719 ; CHECK-GI-NEXT: umov.b w9, v0[0]
720 ; CHECK-GI-NEXT: umov.b w10, v0[2]
721 ; CHECK-GI-NEXT: umov.b w11, v0[3]
722 ; CHECK-GI-NEXT: and w8, w8, #0x1
723 ; CHECK-GI-NEXT: bfi w9, w8, #1, #31
724 ; CHECK-GI-NEXT: and w8, w10, #0x1
725 ; CHECK-GI-NEXT: umov.b w10, v0[4]
726 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2
727 ; CHECK-GI-NEXT: and w9, w11, #0x1
728 ; CHECK-GI-NEXT: umov.b w11, v0[5]
729 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
730 ; CHECK-GI-NEXT: and w9, w10, #0x1
731 ; CHECK-GI-NEXT: umov.b w10, v0[6]
732 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4
733 ; CHECK-GI-NEXT: and w9, w11, #0x1
734 ; CHECK-GI-NEXT: umov.b w11, v0[7]
735 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5
736 ; CHECK-GI-NEXT: and w9, w10, #0x1
737 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6
738 ; CHECK-GI-NEXT: and w9, w11, #0x1
739 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7
740 ; CHECK-GI-NEXT: strb w8, [sp, #15]
741 ; CHECK-GI-NEXT: and w0, w8, #0xff
742 ; CHECK-GI-NEXT: add sp, sp, #16
746 %cmp_result = icmp ne <8 x i32> %vec, zeroinitializer
747 %bitmask = bitcast <8 x i1> %cmp_result to i8
751 define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) {
752 ; CHECK-SD-LABEL: convert_legalized_illegal_element_size:
754 ; CHECK-SD-NEXT: movi.4s v1, #63, msl #16
755 ; CHECK-SD-NEXT: adrp x8, lCPI16_0@PAGE
756 ; CHECK-SD-NEXT: cmtst.4s v0, v0, v1
757 ; CHECK-SD-NEXT: ldr d1, [x8, lCPI16_0@PAGEOFF]
758 ; CHECK-SD-NEXT: xtn.4h v0, v0
759 ; CHECK-SD-NEXT: and.8b v0, v0, v1
760 ; CHECK-SD-NEXT: addv.4h h0, v0
761 ; CHECK-SD-NEXT: fmov w0, s0
764 ; CHECK-GI-LABEL: convert_legalized_illegal_element_size:
766 ; CHECK-GI-NEXT: sub sp, sp, #16
767 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
768 ; CHECK-GI-NEXT: movi.4s v1, #63, msl #16
769 ; CHECK-GI-NEXT: and.16b v0, v0, v1
770 ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
771 ; CHECK-GI-NEXT: mvn.16b v0, v0
772 ; CHECK-GI-NEXT: mov.s w8, v0[1]
773 ; CHECK-GI-NEXT: mov.s w9, v0[2]
774 ; CHECK-GI-NEXT: fmov w11, s0
775 ; CHECK-GI-NEXT: mov.s w10, v0[3]
776 ; CHECK-GI-NEXT: and w8, w8, #0x1
777 ; CHECK-GI-NEXT: bfi w11, w8, #1, #31
778 ; CHECK-GI-NEXT: and w8, w9, #0x1
779 ; CHECK-GI-NEXT: and w9, w10, #0x1
780 ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2
781 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
782 ; CHECK-GI-NEXT: strb w8, [sp, #15]
783 ; CHECK-GI-NEXT: and w0, w8, #0xff
784 ; CHECK-GI-NEXT: add sp, sp, #16
787 %cmp_result = icmp ne <4 x i22> %vec, zeroinitializer
788 %bitmask = bitcast <4 x i1> %cmp_result to i4
792 ; This may still be converted as a v8i8 after the vector concat (but not as v4iX).
793 define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
794 ; CHECK-LABEL: no_direct_convert_for_bad_concat:
796 ; CHECK-NEXT: cmtst.4s v0, v0, v0
797 ; CHECK-NEXT: adrp x8, lCPI17_0@PAGE
798 ; CHECK-NEXT: xtn.4h v0, v0
799 ; CHECK-NEXT: umov.h w9, v0[0]
800 ; CHECK-NEXT: mov.b v1[4], w9
801 ; CHECK-NEXT: umov.h w9, v0[1]
802 ; CHECK-NEXT: mov.b v1[5], w9
803 ; CHECK-NEXT: umov.h w9, v0[2]
804 ; CHECK-NEXT: mov.b v1[6], w9
805 ; CHECK-NEXT: umov.h w9, v0[3]
806 ; CHECK-NEXT: mov.b v1[7], w9
807 ; CHECK-NEXT: shl.8b v0, v1, #7
808 ; CHECK-NEXT: ldr d1, [x8, lCPI17_0@PAGEOFF]
809 ; CHECK-NEXT: cmlt.8b v0, v0, #0
810 ; CHECK-NEXT: and.8b v0, v0, v1
811 ; CHECK-NEXT: addv.8b b0, v0
812 ; CHECK-NEXT: fmov w0, s0
815 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
816 %vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7>
817 %bitmask = bitcast <8 x i1> %vector_pad to i8
821 define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) {
822 ; CHECK-SD-LABEL: no_convert_without_direct_bitcast:
824 ; CHECK-SD-NEXT: cmtst.8h v0, v0, v0
825 ; CHECK-SD-NEXT: xtn.8b v0, v0
828 ; CHECK-GI-LABEL: no_convert_without_direct_bitcast:
830 ; CHECK-GI-NEXT: cmeq.8h v0, v0, #0
831 ; CHECK-GI-NEXT: mvn.16b v0, v0
832 ; CHECK-GI-NEXT: xtn.8b v0, v0
835 %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
836 ret <8 x i1> %cmp_result
839 define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) {
840 ; CHECK-SD-LABEL: no_combine_illegal_num_elements:
842 ; CHECK-SD-NEXT: sub sp, sp, #16
843 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
844 ; CHECK-SD-NEXT: fmov s0, w0
845 ; CHECK-SD-NEXT: fmov s1, w4
846 ; CHECK-SD-NEXT: mov.s v0[1], w1
847 ; CHECK-SD-NEXT: mov.s v1[1], w5
848 ; CHECK-SD-NEXT: mov.s v0[2], w2
849 ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0
850 ; CHECK-SD-NEXT: mov.s v0[3], w3
851 ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0
852 ; CHECK-SD-NEXT: uzp1.8h v0, v0, v1
853 ; CHECK-SD-NEXT: mvn.16b v0, v0
854 ; CHECK-SD-NEXT: xtn.8b v0, v0
855 ; CHECK-SD-NEXT: umov.b w8, v0[0]
856 ; CHECK-SD-NEXT: umov.b w9, v0[1]
857 ; CHECK-SD-NEXT: umov.b w10, v0[2]
858 ; CHECK-SD-NEXT: and w8, w8, #0x1
859 ; CHECK-SD-NEXT: bfi w8, w9, #1, #1
860 ; CHECK-SD-NEXT: umov.b w9, v0[3]
861 ; CHECK-SD-NEXT: bfi w8, w10, #2, #1
862 ; CHECK-SD-NEXT: umov.b w10, v0[4]
863 ; CHECK-SD-NEXT: bfi w8, w9, #3, #1
864 ; CHECK-SD-NEXT: umov.b w9, v0[5]
865 ; CHECK-SD-NEXT: bfi w8, w10, #4, #1
866 ; CHECK-SD-NEXT: orr w8, w8, w9, lsl #5
867 ; CHECK-SD-NEXT: and w0, w8, #0x3f
868 ; CHECK-SD-NEXT: add sp, sp, #16
871 ; CHECK-GI-LABEL: no_combine_illegal_num_elements:
873 ; CHECK-GI-NEXT: sub sp, sp, #16
874 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
875 ; CHECK-GI-NEXT: mov.s v0[0], w0
876 ; CHECK-GI-NEXT: mov.s v1[0], w4
877 ; CHECK-GI-NEXT: mov.s v2[0], wzr
878 ; CHECK-GI-NEXT: mov.s v0[1], w1
879 ; CHECK-GI-NEXT: mov.s v1[1], w5
880 ; CHECK-GI-NEXT: mov.s v2[1], wzr
881 ; CHECK-GI-NEXT: mov.s v0[2], w2
882 ; CHECK-GI-NEXT: cmeq.4s v1, v1, v2
883 ; CHECK-GI-NEXT: mvn.16b v1, v1
884 ; CHECK-GI-NEXT: mov.s v0[3], w3
885 ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
886 ; CHECK-GI-NEXT: mvn.16b v0, v0
887 ; CHECK-GI-NEXT: mov.s w8, v0[1]
888 ; CHECK-GI-NEXT: mov.s w9, v0[2]
889 ; CHECK-GI-NEXT: mov.s w10, v0[3]
890 ; CHECK-GI-NEXT: mov.h v0[1], w8
891 ; CHECK-GI-NEXT: mov.s w8, v1[1]
892 ; CHECK-GI-NEXT: mov.h v0[2], w9
893 ; CHECK-GI-NEXT: mov.h v0[3], w10
894 ; CHECK-GI-NEXT: mov.h v0[4], v1[0]
895 ; CHECK-GI-NEXT: mov.h v0[5], w8
896 ; CHECK-GI-NEXT: umov.h w8, v0[1]
897 ; CHECK-GI-NEXT: umov.h w9, v0[0]
898 ; CHECK-GI-NEXT: umov.h w10, v0[2]
899 ; CHECK-GI-NEXT: umov.h w11, v0[3]
900 ; CHECK-GI-NEXT: and w8, w8, #0x1
901 ; CHECK-GI-NEXT: bfi w9, w8, #1, #31
902 ; CHECK-GI-NEXT: and w8, w10, #0x1
903 ; CHECK-GI-NEXT: umov.h w10, v0[4]
904 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2
905 ; CHECK-GI-NEXT: and w9, w11, #0x1
906 ; CHECK-GI-NEXT: umov.h w11, v0[5]
907 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3
908 ; CHECK-GI-NEXT: and w9, w10, #0x1
909 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4
910 ; CHECK-GI-NEXT: and w9, w11, #0x1
911 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5
912 ; CHECK-GI-NEXT: and w8, w8, #0x3f
913 ; CHECK-GI-NEXT: strb w8, [sp, #15]
914 ; CHECK-GI-NEXT: and w0, w8, #0xff
915 ; CHECK-GI-NEXT: add sp, sp, #16
918 %cmp_result = icmp ne <6 x i32> %vec, zeroinitializer
919 %bitmask = bitcast <6 x i1> %cmp_result to i6
923 ; Only apply the combine when casting a vector to a scalar.
924 define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
925 ; CHECK-SD-LABEL: vector_to_vector_cast:
927 ; CHECK-SD-NEXT: sub sp, sp, #16
928 ; CHECK-SD-NEXT: shl.16b v0, v0, #7
929 ; CHECK-SD-NEXT: adrp x8, lCPI20_0@PAGE
930 ; CHECK-SD-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF]
931 ; CHECK-SD-NEXT: add x8, sp, #14
932 ; CHECK-SD-NEXT: cmlt.16b v0, v0, #0
933 ; CHECK-SD-NEXT: and.16b v0, v0, v1
934 ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8
935 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1
936 ; CHECK-SD-NEXT: addv.8h h0, v0
937 ; CHECK-SD-NEXT: str h0, [sp, #14]
938 ; CHECK-SD-NEXT: ld1.b { v0 }[0], [x8]
939 ; CHECK-SD-NEXT: orr x8, x8, #0x1
940 ; CHECK-SD-NEXT: ld1.b { v0 }[4], [x8]
941 ; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0
942 ; CHECK-SD-NEXT: add sp, sp, #16
945 ; CHECK-GI-LABEL: vector_to_vector_cast:
947 ; CHECK-GI-NEXT: sub sp, sp, #16
948 ; CHECK-GI-NEXT: umov.b w8, v0[1]
949 ; CHECK-GI-NEXT: mov d1, v0[1]
950 ; CHECK-GI-NEXT: umov.b w10, v0[1]
951 ; CHECK-GI-NEXT: umov.b w9, v0[0]
952 ; CHECK-GI-NEXT: umov.b w13, v0[0]
953 ; CHECK-GI-NEXT: umov.b w14, v0[2]
954 ; CHECK-GI-NEXT: umov.b w15, v0[3]
955 ; CHECK-GI-NEXT: umov.b w11, v0[2]
956 ; CHECK-GI-NEXT: umov.b w16, v0[4]
957 ; CHECK-GI-NEXT: umov.b w17, v0[5]
958 ; CHECK-GI-NEXT: umov.b w12, v0[3]
959 ; CHECK-GI-NEXT: and w8, w8, #0x1
960 ; CHECK-GI-NEXT: and w10, w10, #0x1
961 ; CHECK-GI-NEXT: umov.b w0, v1[1]
962 ; CHECK-GI-NEXT: bfi w9, w8, #1, #31
963 ; CHECK-GI-NEXT: bfi w13, w10, #1, #31
964 ; CHECK-GI-NEXT: and w14, w14, #0x1
965 ; CHECK-GI-NEXT: umov.b w8, v1[0]
966 ; CHECK-GI-NEXT: umov.b w10, v1[2]
967 ; CHECK-GI-NEXT: and w15, w15, #0x1
968 ; CHECK-GI-NEXT: orr w13, w13, w14, lsl #2
969 ; CHECK-GI-NEXT: umov.b w14, v1[3]
970 ; CHECK-GI-NEXT: and w11, w11, #0x1
971 ; CHECK-GI-NEXT: and w0, w0, #0x1
972 ; CHECK-GI-NEXT: and w16, w16, #0x1
973 ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #2
974 ; CHECK-GI-NEXT: orr w13, w13, w15, lsl #3
975 ; CHECK-GI-NEXT: umov.b w15, v1[4]
976 ; CHECK-GI-NEXT: umov.b w11, v0[6]
977 ; CHECK-GI-NEXT: bfi w8, w0, #1, #31
978 ; CHECK-GI-NEXT: and w10, w10, #0x1
979 ; CHECK-GI-NEXT: and w17, w17, #0x1
980 ; CHECK-GI-NEXT: orr w13, w13, w16, lsl #4
981 ; CHECK-GI-NEXT: and w14, w14, #0x1
982 ; CHECK-GI-NEXT: umov.b w0, v0[7]
983 ; CHECK-GI-NEXT: orr w8, w8, w10, lsl #2
984 ; CHECK-GI-NEXT: umov.b w10, v1[5]
985 ; CHECK-GI-NEXT: umov.b w16, v1[6]
986 ; CHECK-GI-NEXT: orr w13, w13, w17, lsl #5
987 ; CHECK-GI-NEXT: umov.b w17, v0[4]
988 ; CHECK-GI-NEXT: and w15, w15, #0x1
989 ; CHECK-GI-NEXT: orr w8, w8, w14, lsl #3
990 ; CHECK-GI-NEXT: and w12, w12, #0x1
991 ; CHECK-GI-NEXT: and w11, w11, #0x1
992 ; CHECK-GI-NEXT: umov.b w14, v1[7]
993 ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #3
994 ; CHECK-GI-NEXT: orr w11, w13, w11, lsl #6
995 ; CHECK-GI-NEXT: orr w8, w8, w15, lsl #4
996 ; CHECK-GI-NEXT: umov.b w15, v0[5]
997 ; CHECK-GI-NEXT: and w10, w10, #0x1
998 ; CHECK-GI-NEXT: and w0, w0, #0x1
999 ; CHECK-GI-NEXT: and w12, w17, #0x1
1000 ; CHECK-GI-NEXT: umov.b w13, v0[1]
1001 ; CHECK-GI-NEXT: orr w8, w8, w10, lsl #5
1002 ; CHECK-GI-NEXT: and w16, w16, #0x1
1003 ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #4
1004 ; CHECK-GI-NEXT: umov.b w10, v0[0]
1005 ; CHECK-GI-NEXT: orr w11, w11, w0, lsl #7
1006 ; CHECK-GI-NEXT: and w14, w14, #0x1
1007 ; CHECK-GI-NEXT: and w12, w15, #0x1
1008 ; CHECK-GI-NEXT: umov.b w15, v0[2]
1009 ; CHECK-GI-NEXT: orr w8, w8, w16, lsl #6
1010 ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5
1011 ; CHECK-GI-NEXT: umov.b w12, v0[6]
1012 ; CHECK-GI-NEXT: strb w11, [sp, #8]
1013 ; CHECK-GI-NEXT: and w11, w13, #0x1
1014 ; CHECK-GI-NEXT: umov.b w13, v0[3]
1015 ; CHECK-GI-NEXT: orr w8, w8, w14, lsl #7
1016 ; CHECK-GI-NEXT: umov.b w14, v0[7]
1017 ; CHECK-GI-NEXT: ldr b0, [sp, #8]
1018 ; CHECK-GI-NEXT: bfi w10, w11, #1, #31
1019 ; CHECK-GI-NEXT: and w11, w15, #0x1
1020 ; CHECK-GI-NEXT: strb w8, [sp, #9]
1021 ; CHECK-GI-NEXT: umov.b w15, v0[4]
1022 ; CHECK-GI-NEXT: and w8, w12, #0x1
1023 ; CHECK-GI-NEXT: orr w10, w10, w11, lsl #2
1024 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #6
1025 ; CHECK-GI-NEXT: and w9, w13, #0x1
1026 ; CHECK-GI-NEXT: umov.b w11, v0[1]
1027 ; CHECK-GI-NEXT: orr w9, w10, w9, lsl #3
1028 ; CHECK-GI-NEXT: umov.b w10, v0[5]
1029 ; CHECK-GI-NEXT: umov.b w12, v0[0]
1030 ; CHECK-GI-NEXT: and w13, w14, #0x1
1031 ; CHECK-GI-NEXT: umov.b w16, v0[2]
1032 ; CHECK-GI-NEXT: umov.b w17, v0[3]
1033 ; CHECK-GI-NEXT: and w14, w15, #0x1
1034 ; CHECK-GI-NEXT: umov.b w15, v0[2]
1035 ; CHECK-GI-NEXT: orr w8, w8, w13, lsl #7
1036 ; CHECK-GI-NEXT: orr w9, w9, w14, lsl #4
1037 ; CHECK-GI-NEXT: umov.b w13, v0[6]
1038 ; CHECK-GI-NEXT: and w11, w11, #0x1
1039 ; CHECK-GI-NEXT: umov.b w14, v0[3]
1040 ; CHECK-GI-NEXT: strb w8, [sp, #10]
1041 ; CHECK-GI-NEXT: and w8, w10, #0x1
1042 ; CHECK-GI-NEXT: bfi w12, w11, #1, #31
1043 ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #5
1044 ; CHECK-GI-NEXT: umov.b w10, v0[4]
1045 ; CHECK-GI-NEXT: and w9, w15, #0x1
1046 ; CHECK-GI-NEXT: umov.b w11, v0[7]
1047 ; CHECK-GI-NEXT: umov.b w15, v0[1]
1048 ; CHECK-GI-NEXT: orr w9, w12, w9, lsl #2
1049 ; CHECK-GI-NEXT: umov.b w12, v0[5]
1050 ; CHECK-GI-NEXT: and w13, w13, #0x1
1051 ; CHECK-GI-NEXT: and w14, w14, #0x1
1052 ; CHECK-GI-NEXT: orr w8, w8, w13, lsl #6
1053 ; CHECK-GI-NEXT: umov.b w13, v0[0]
1054 ; CHECK-GI-NEXT: orr w9, w9, w14, lsl #3
1055 ; CHECK-GI-NEXT: and w10, w10, #0x1
1056 ; CHECK-GI-NEXT: umov.b w14, v0[6]
1057 ; CHECK-GI-NEXT: and w11, w11, #0x1
1058 ; CHECK-GI-NEXT: and w15, w15, #0x1
1059 ; CHECK-GI-NEXT: umov.b w0, v0[3]
1060 ; CHECK-GI-NEXT: orr w9, w9, w10, lsl #4
1061 ; CHECK-GI-NEXT: and w10, w12, #0x1
1062 ; CHECK-GI-NEXT: umov.b w12, v0[7]
1063 ; CHECK-GI-NEXT: orr w8, w8, w11, lsl #7
1064 ; CHECK-GI-NEXT: bfi w13, w15, #1, #31
1065 ; CHECK-GI-NEXT: and w11, w16, #0x1
1066 ; CHECK-GI-NEXT: orr w9, w9, w10, lsl #5
1067 ; CHECK-GI-NEXT: and w10, w14, #0x1
1068 ; CHECK-GI-NEXT: umov.b w14, v0[4]
1069 ; CHECK-GI-NEXT: strb w8, [sp, #11]
1070 ; CHECK-GI-NEXT: umov.b w15, v0[1]
1071 ; CHECK-GI-NEXT: umov.b w16, v0[3]
1072 ; CHECK-GI-NEXT: orr w8, w9, w10, lsl #6
1073 ; CHECK-GI-NEXT: orr w9, w13, w11, lsl #2
1074 ; CHECK-GI-NEXT: and w10, w12, #0x1
1075 ; CHECK-GI-NEXT: and w11, w17, #0x1
1076 ; CHECK-GI-NEXT: umov.b w12, v0[5]
1077 ; CHECK-GI-NEXT: umov.b w17, v0[0]
1078 ; CHECK-GI-NEXT: orr w8, w8, w10, lsl #7
1079 ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #3
1080 ; CHECK-GI-NEXT: umov.b w10, v0[1]
1081 ; CHECK-GI-NEXT: and w11, w14, #0x1
1082 ; CHECK-GI-NEXT: umov.b w14, v0[0]
1083 ; CHECK-GI-NEXT: and w15, w15, #0x1
1084 ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #4
1085 ; CHECK-GI-NEXT: umov.b w11, v0[2]
1086 ; CHECK-GI-NEXT: umov.b w13, v0[6]
1087 ; CHECK-GI-NEXT: and w12, w12, #0x1
1088 ; CHECK-GI-NEXT: bfi w17, w15, #1, #31
1089 ; CHECK-GI-NEXT: umov.b w15, v0[5]
1090 ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5
1091 ; CHECK-GI-NEXT: and w10, w10, #0x1
1092 ; CHECK-GI-NEXT: umov.b w12, v0[2]
1093 ; CHECK-GI-NEXT: bfi w14, w10, #1, #31
1094 ; CHECK-GI-NEXT: umov.b w10, v0[4]
1095 ; CHECK-GI-NEXT: ldr b1, [sp, #9]
1096 ; CHECK-GI-NEXT: and w11, w11, #0x1
1097 ; CHECK-GI-NEXT: and w13, w13, #0x1
1098 ; CHECK-GI-NEXT: strb w8, [sp, #12]
1099 ; CHECK-GI-NEXT: orr w11, w14, w11, lsl #2
1100 ; CHECK-GI-NEXT: and w14, w16, #0x1
1101 ; CHECK-GI-NEXT: umov.b w16, v0[4]
1102 ; CHECK-GI-NEXT: and w12, w12, #0x1
1103 ; CHECK-GI-NEXT: and w15, w15, #0x1
1104 ; CHECK-GI-NEXT: orr w9, w9, w13, lsl #6
1105 ; CHECK-GI-NEXT: orr w11, w11, w14, lsl #3
1106 ; CHECK-GI-NEXT: orr w12, w17, w12, lsl #2
1107 ; CHECK-GI-NEXT: and w10, w10, #0x1
1108 ; CHECK-GI-NEXT: and w17, w0, #0x1
1109 ; CHECK-GI-NEXT: umov.b w0, v0[5]
1110 ; CHECK-GI-NEXT: umov.b w14, v0[6]
1111 ; CHECK-GI-NEXT: orr w10, w11, w10, lsl #4
1112 ; CHECK-GI-NEXT: orr w12, w12, w17, lsl #3
1113 ; CHECK-GI-NEXT: umov.b w11, v0[7]
1114 ; CHECK-GI-NEXT: and w16, w16, #0x1
1115 ; CHECK-GI-NEXT: umov.b w17, v0[6]
1116 ; CHECK-GI-NEXT: orr w10, w10, w15, lsl #5
1117 ; CHECK-GI-NEXT: umov.b w15, v0[7]
1118 ; CHECK-GI-NEXT: orr w12, w12, w16, lsl #4
1119 ; CHECK-GI-NEXT: and w16, w0, #0x1
1120 ; CHECK-GI-NEXT: umov.b w0, v0[7]
1121 ; CHECK-GI-NEXT: and w14, w14, #0x1
1122 ; CHECK-GI-NEXT: orr w12, w12, w16, lsl #5
1123 ; CHECK-GI-NEXT: orr w10, w10, w14, lsl #6
1124 ; CHECK-GI-NEXT: and w11, w11, #0x1
1125 ; CHECK-GI-NEXT: and w13, w17, #0x1
1126 ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #7
1127 ; CHECK-GI-NEXT: mov.s v0[1], v1[0]
1128 ; CHECK-GI-NEXT: orr w11, w12, w13, lsl #6
1129 ; CHECK-GI-NEXT: and w12, w15, #0x1
1130 ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
1131 ; CHECK-GI-NEXT: orr w8, w10, w12, lsl #7
1132 ; CHECK-GI-NEXT: and w10, w0, #0x1
1133 ; CHECK-GI-NEXT: strb w9, [sp, #13]
1134 ; CHECK-GI-NEXT: orr w9, w11, w10, lsl #7
1135 ; CHECK-GI-NEXT: strb w8, [sp, #14]
1136 ; CHECK-GI-NEXT: strb w9, [sp, #15]
1137 ; CHECK-GI-NEXT: add sp, sp, #16
1138 ; CHECK-GI-NEXT: ret
1139 %bc = bitcast <16 x i1> %arg to <2 x i8>