1 ; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s
2 ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
3 %struct.X = type { i8, i8, [2 x i8] }
4 %struct.Y = type { i32, i8 }
5 %struct.Z = type { i8, i8, [2 x i8], i16 }
6 %struct.A = type { i64, i8 }
8 define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp {
14 %tmp = bitcast %struct.X* %x to i32*
15 %tmp1 = load i32, i32* %tmp, align 4
16 %b = getelementptr inbounds %struct.Y, %struct.Y* %y, i64 0, i32 1
17 %bf.clear = lshr i32 %tmp1, 3
18 %bf.clear.lobit = and i32 %bf.clear, 1
19 %frombool = trunc i32 %bf.clear.lobit to i8
20 store i8 %frombool, i8* %b, align 1
24 define i32 @baz(i64 %cav1.coerce) nounwind {
26 ; CHECK: sbfx w0, w0, #0, #4
27 %tmp = trunc i64 %cav1.coerce to i32
28 %tmp1 = shl i32 %tmp, 28
29 %bf.val.sext = ashr exact i32 %tmp1, 28
33 define i32 @bar(i64 %cav1.coerce) nounwind {
35 ; CHECK: sbfx w0, w0, #4, #6
36 %tmp = trunc i64 %cav1.coerce to i32
37 %cav1.sroa.0.1.insert = shl i32 %tmp, 22
38 %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26
42 define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp {
44 ; CHECK: ubfx x{{[0-9]+}}, x{{[0-9]+}}
48 %tmp = bitcast %struct.Z* %x to i64*
49 %tmp1 = load i64, i64* %tmp, align 4
50 %b = getelementptr inbounds %struct.A, %struct.A* %y, i64 0, i32 0
51 %bf.clear = lshr i64 %tmp1, 3
52 %bf.clear.lobit = and i64 %bf.clear, 1
53 store i64 %bf.clear.lobit, i64* %b, align 8
57 define i64 @fct2(i64 %cav1.coerce) nounwind {
59 ; CHECK: sbfx x0, x0, #0, #36
60 %tmp = shl i64 %cav1.coerce, 28
61 %bf.val.sext = ashr exact i64 %tmp, 28
65 define i64 @fct3(i64 %cav1.coerce) nounwind {
67 ; CHECK: sbfx x0, x0, #4, #38
68 %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22
69 %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26
73 define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
76 ; CHECK: ldr [[REG1:x[0-9]+]],
77 ; CHECK-NEXT: bfxil [[REG1]], x1, #16, #24
78 ; CHECK-NEXT: str [[REG1]],
80 %0 = load i64, i64* %y, align 8
81 %and = and i64 %0, -16777216
82 %shr = lshr i64 %x, 16
83 %and1 = and i64 %shr, 16777215
84 %or = or i64 %and, %and1
85 store i64 %or, i64* %y, align 8
89 define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
92 ; CHECK: ldr [[REG1:w[0-9]+]],
93 ; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
94 ; CHECK-NEXT: str [[REG1]],
96 %0 = load i32, i32* %y, align 8
98 %shr = lshr i32 %x, 16
99 %and1 = and i32 %shr, 7
100 %or = or i32 %and, %and1
101 store i32 %or, i32* %y, align 8
105 ; Check if we can still catch bfm instruction when we drop some low bits
106 define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
109 ; CHECK: ldr [[REG1:w[0-9]+]],
110 ; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
111 ; lsr is an alias of ubfm
112 ; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
113 ; CHECK-NEXT: str [[REG2]],
115 %0 = load i32, i32* %y, align 8
116 %and = and i32 %0, -8
117 %shr = lshr i32 %x, 16
118 %and1 = and i32 %shr, 7
119 %or = or i32 %and, %and1
120 %shr1 = lshr i32 %or, 2
121 store i32 %shr1, i32* %y, align 8
126 ; Check if we can still catch bfm instruction when we drop some high bits
127 define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
130 ; CHECK: ldr [[REG1:w[0-9]+]],
131 ; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
132 ; lsl is an alias of ubfm
133 ; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
134 ; CHECK-NEXT: str [[REG2]],
136 %0 = load i32, i32* %y, align 8
137 %and = and i32 %0, -8
138 %shr = lshr i32 %x, 16
139 %and1 = and i32 %shr, 7
140 %or = or i32 %and, %and1
141 %shl = shl i32 %or, 2
142 store i32 %shl, i32* %y, align 8
147 ; Check if we can still catch bfm instruction when we drop some low bits
149 define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
152 ; CHECK: ldr [[REG1:x[0-9]+]],
153 ; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
154 ; lsr is an alias of ubfm
155 ; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
156 ; CHECK-NEXT: str [[REG2]],
158 %0 = load i64, i64* %y, align 8
159 %and = and i64 %0, -8
160 %shr = lshr i64 %x, 16
161 %and1 = and i64 %shr, 7
162 %or = or i64 %and, %and1
163 %shr1 = lshr i64 %or, 2
164 store i64 %shr1, i64* %y, align 8
169 ; Check if we can still catch bfm instruction when we drop some high bits
171 define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
174 ; CHECK: ldr [[REG1:x[0-9]+]],
175 ; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
176 ; lsr is an alias of ubfm
177 ; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
178 ; CHECK-NEXT: str [[REG2]],
180 %0 = load i64, i64* %y, align 8
181 %and = and i64 %0, -8
182 %shr = lshr i64 %x, 16
183 %and1 = and i64 %shr, 7
184 %or = or i64 %and, %and1
185 %shl = shl i64 %or, 2
186 store i64 %shl, i64* %y, align 8
190 ; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
192 define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
194 ; CHECK-LABEL: fct10:
195 ; CHECK: ldr [[REG1:w[0-9]+]],
196 ; CHECK-NEXT: bfxil [[REG1]], w1, #0, #3
197 ; lsl is an alias of ubfm
198 ; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
199 ; CHECK-NEXT: str [[REG2]],
201 %0 = load i32, i32* %y, align 8
202 %and = and i32 %0, -8
203 %and1 = and i32 %x, 7
204 %or = or i32 %and, %and1
205 %shl = shl i32 %or, 2
206 store i32 %shl, i32* %y, align 8
210 ; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
212 define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
214 ; CHECK-LABEL: fct11:
215 ; CHECK: ldr [[REG1:x[0-9]+]],
216 ; CHECK-NEXT: bfxil [[REG1]], x1, #0, #3
217 ; lsl is an alias of ubfm
218 ; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
219 ; CHECK-NEXT: str [[REG2]],
221 %0 = load i64, i64* %y, align 8
222 %and = and i64 %0, -8
223 %and1 = and i64 %x, 7
224 %or = or i64 %and, %and1
225 %shl = shl i64 %or, 2
226 store i64 %shl, i64* %y, align 8
230 define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 {
231 ; CHECK-LABEL: fct12bis:
233 ; CHECK: ubfx w0, w0, #11, #1
234 %and.i.i = and i32 %tmp2, 2048
235 %tobool.i.i = icmp ne i32 %and.i.i, 0
239 ; Check if we can still catch bfm instruction when we drop some high bits
241 define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
243 ; CHECK-LABEL: fct12:
244 ; CHECK: ldr [[REG1:w[0-9]+]],
245 ; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
246 ; lsr is an alias of ubfm
247 ; CHECK-NEXT: ubfx [[REG2:w[0-9]+]], [[REG1]], #2, #28
248 ; CHECK-NEXT: str [[REG2]],
250 %0 = load i32, i32* %y, align 8
251 %and = and i32 %0, -8
252 %shr = lshr i32 %x, 16
253 %and1 = and i32 %shr, 7
254 %or = or i32 %and, %and1
255 %shl = shl i32 %or, 2
256 %shr2 = lshr i32 %shl, 4
257 store i32 %shr2, i32* %y, align 8
261 ; Check if we can still catch bfm instruction when we drop some high bits
264 define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
266 ; CHECK-LABEL: fct13:
267 ; CHECK: ldr [[REG1:x[0-9]+]],
268 ; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
269 ; lsr is an alias of ubfm
270 ; CHECK-NEXT: ubfx [[REG2:x[0-9]+]], [[REG1]], #2, #60
271 ; CHECK-NEXT: str [[REG2]],
273 %0 = load i64, i64* %y, align 8
274 %and = and i64 %0, -8
275 %shr = lshr i64 %x, 16
276 %and1 = and i64 %shr, 7
277 %or = or i64 %and, %and1
278 %shl = shl i64 %or, 2
279 %shr2 = lshr i64 %shl, 4
280 store i64 %shr2, i64* %y, align 8
285 ; Check if we can still catch bfm instruction when we drop some high bits
287 define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp {
289 ; CHECK-LABEL: fct14:
290 ; CHECK: ldr [[REG1:w[0-9]+]],
291 ; CHECK-NEXT: bfxil [[REG1]], w1, #16, #8
292 ; lsr is an alias of ubfm
293 ; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4
294 ; CHECK-NEXT: bfxil [[REG2]], w2, #5, #3
295 ; lsl is an alias of ubfm
296 ; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
297 ; CHECK-NEXT: str [[REG3]],
299 %0 = load i32, i32* %y, align 8
300 %and = and i32 %0, -256
301 %shr = lshr i32 %x, 16
302 %and1 = and i32 %shr, 255
303 %or = or i32 %and, %and1
304 %shl = lshr i32 %or, 4
305 %and2 = and i32 %shl, -8
306 %shr1 = lshr i32 %x1, 5
307 %and3 = and i32 %shr1, 7
308 %or1 = or i32 %and2, %and3
309 %shl1 = shl i32 %or1, 2
310 store i32 %shl1, i32* %y, align 8
314 ; Check if we can still catch bfm instruction when we drop some high bits
317 define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp {
319 ; CHECK-LABEL: fct15:
320 ; CHECK: ldr [[REG1:x[0-9]+]],
321 ; CHECK-NEXT: bfxil [[REG1]], x1, #16, #8
322 ; lsr is an alias of ubfm
323 ; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4
324 ; CHECK-NEXT: bfxil [[REG2]], x2, #5, #3
325 ; lsl is an alias of ubfm
326 ; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
327 ; CHECK-NEXT: str [[REG3]],
329 %0 = load i64, i64* %y, align 8
330 %and = and i64 %0, -256
331 %shr = lshr i64 %x, 16
332 %and1 = and i64 %shr, 255
333 %or = or i64 %and, %and1
334 %shl = lshr i64 %or, 4
335 %and2 = and i64 %shl, -8
336 %shr1 = lshr i64 %x1, 5
337 %and3 = and i64 %shr1, 7
338 %or1 = or i64 %and2, %and3
339 %shl1 = shl i64 %or1, 2
340 store i64 %shl1, i64* %y, align 8
344 ; Check if we can still catch bfm instruction when we drop some high bits
345 ; and some low bits and a masking operation has to be kept
346 define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
348 ; CHECK-LABEL: fct16:
349 ; CHECK: ldr [[REG1:w[0-9]+]],
350 ; Create the constant
351 ; CHECK: mov [[REGCST:w[0-9]+]], #33120
352 ; CHECK: movk [[REGCST]], #26, lsl #16
354 ; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]]
355 ; CHECK-NEXT: bfxil [[REG2]], w1, #16, #3
356 ; lsr is an alias of ubfm
357 ; CHECK-NEXT: ubfx [[REG3:w[0-9]+]], [[REG2]], #2, #28
358 ; CHECK-NEXT: str [[REG3]],
360 %0 = load i32, i32* %y, align 8
361 %and = and i32 %0, 1737056
362 %shr = lshr i32 %x, 16
363 %and1 = and i32 %shr, 7
364 %or = or i32 %and, %and1
365 %shl = shl i32 %or, 2
366 %shr2 = lshr i32 %shl, 4
367 store i32 %shr2, i32* %y, align 8
372 ; Check if we can still catch bfm instruction when we drop some high bits
373 ; and some low bits and a masking operation has to be kept
375 define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
377 ; CHECK-LABEL: fct17:
378 ; CHECK: ldr [[REG1:x[0-9]+]],
379 ; Create the constant
380 ; CHECK: mov w[[REGCST:[0-9]+]], #33120
381 ; CHECK: movk w[[REGCST]], #26, lsl #16
383 ; CHECK: and [[REG2:x[0-9]+]], [[REG1]], x[[REGCST]]
384 ; CHECK-NEXT: bfxil [[REG2]], x1, #16, #3
385 ; lsr is an alias of ubfm
386 ; CHECK-NEXT: ubfx [[REG3:x[0-9]+]], [[REG2]], #2, #60
387 ; CHECK-NEXT: str [[REG3]],
389 %0 = load i64, i64* %y, align 8
390 %and = and i64 %0, 1737056
391 %shr = lshr i64 %x, 16
392 %and1 = and i64 %shr, 7
393 %or = or i64 %and, %and1
394 %shl = shl i64 %or, 2
395 %shr2 = lshr i64 %shl, 4
396 store i64 %shr2, i64* %y, align 8
400 define i64 @fct18(i32 %xor72) nounwind ssp {
401 ; CHECK-LABEL: fct18:
402 ; CHECK: ubfx x0, x0, #9, #8
403 %shr81 = lshr i32 %xor72, 9
404 %conv82 = zext i32 %shr81 to i64
405 %result = and i64 %conv82, 255
409 ; Using the access to the global array to keep the instruction and control flow.
410 @first_ones = external global [65536 x i8]
412 ; Function Attrs: nounwind readonly ssp
413 define i32 @fct19(i64 %arg1) nounwind readonly ssp {
414 ; CHECK-LABEL: fct19:
416 %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16
417 %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16
418 %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32
419 %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48
420 %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0
421 br i1 %tobool, label %if.end, label %if.then
423 if.then: ; preds = %entry
424 %arrayidx3 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
425 %0 = load i8, i8* %arrayidx3, align 1
426 %conv = zext i8 %0 to i32
430 if.end: ; preds = %entry
432 ; CHECK: ubfx [[REG1:x[0-9]+]], [[REG2:x[0-9]+]], #32, #16
433 %x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16
434 %tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0
436 br i1 %tobool6, label %if.end13, label %if.then7
438 ; OPT-LABEL: if.then7
439 if.then7: ; preds = %if.end
441 ; "and" should be combined to "ubfm" while "ubfm" should be removed by cse.
442 ; So neither of them should be in the assemble code.
445 %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535
446 %arrayidx11 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
447 %1 = load i8, i8* %arrayidx11, align 1
448 %conv12 = zext i8 %1 to i32
449 %add = add nsw i32 %conv12, 16
452 ; OPT-LABEL: if.end13
453 if.end13: ; preds = %if.end
456 ; CHECK: ubfx [[REG3:x[0-9]+]], [[REG4:x[0-9]+]], #16, #16
457 %tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0
459 br i1 %tobool16, label %return, label %if.then17
461 ; OPT-LABEL: if.then17
462 if.then17: ; preds = %if.end13
464 ; "and" should be combined to "ubfm" while "ubfm" should be removed by cse.
465 ; So neither of them should be in the assemble code.
468 %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535
469 %arrayidx21 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
470 %2 = load i8, i8* %arrayidx21, align 1
471 %conv22 = zext i8 %2 to i32
472 %add23 = add nsw i32 %conv22, 32
475 return: ; preds = %if.end13, %if.then17, %if.then7, %if.then
477 %retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ]
481 ; Make sure we do not assert if the immediate in and is bigger than i64.
487 ; CHECK-LABEL: fct20:
489 define i80 @fct20(i128 %a, i128 %b) {
491 %shr = lshr i128 %a, 18
492 %conv = trunc i128 %shr to i80
493 %tobool = icmp eq i128 %b, 0
494 br i1 %tobool, label %then, label %end
496 %and = and i128 %shr, 483673642326615442599424
497 %conv2 = trunc i128 %and to i80
500 %conv3 = phi i80 [%conv, %entry], [%conv2, %then]
504 ; Check if we can still catch UBFX when "AND" is used by SHL.
505 ; CHECK-LABEL: fct21:
507 @arr = external global [8 x [64 x i64]]
508 define i64 @fct21(i64 %x) {
510 %shr = lshr i64 %x, 4
511 %and = and i64 %shr, 15
512 %arrayidx = getelementptr inbounds [8 x [64 x i64]], [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
513 %0 = load i64, i64* %arrayidx, align 8
517 define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
518 ; CHECK-LABEL: test_ignored_rightbits:
520 %positioned_field = shl i32 %in, 3
521 %positioned_masked_field = and i32 %positioned_field, 120
522 %masked_dst = and i32 %dst, 7
523 %insertion = or i32 %masked_dst, %positioned_masked_field
524 ; CHECK: {{bfm|bfi|bfxil}}
526 %shl16 = shl i32 %insertion, 8
527 %or18 = or i32 %shl16, %insertion
528 %conv19 = trunc i32 %or18 to i16
529 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #8, #7
534 ; The following test excercises the case where we have a BFI
535 ; instruction with the same input in both operands. We need to
536 ; track the useful bits through both operands.
537 ; CHECK-LABEL: sameOperandBFI
542 define void @sameOperandBFI(i64 %src, i64 %src2, i16 *%ptr) {
544 %shr47 = lshr i64 %src, 47
545 %src2.trunc = trunc i64 %src2 to i32
546 br i1 undef, label %end, label %if.else
549 %and3 = and i32 %src2.trunc, 3
550 %shl2 = shl nuw nsw i64 %shr47, 2
551 %shl2.trunc = trunc i64 %shl2 to i32
552 %and12 = and i32 %shl2.trunc, 12
553 %BFISource = or i32 %and3, %and12 ; ...00000ABCD
554 %BFIRHS = shl nuw nsw i32 %BFISource, 4 ; ...0ABCD0000
555 %BFI = or i32 %BFIRHS, %BFISource ; ...0ABCDABCD
556 %BFItrunc = trunc i32 %BFI to i16
557 store i16 %BFItrunc, i16* %ptr, align 4