1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv7s-none-eabi %s -o - | FileCheck %s
4 declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i)
6 ; Check that we get the motivating example:
7 ; The bitcasts force the values to go through the GPRs, whereas
8 ; they are defined on VPRs and used on VPRs.
10 define void @motivatingExample(ptr %addr, ptr %addr2) {
11 ; CHECK-LABEL: motivatingExample:
13 ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
14 ; CHECK-NEXT: vldr d18, [r1]
15 ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18
16 ; CHECK-NEXT: vstr d16, [r1]
18 %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr
19 %vtbl2.i25.i = load <8 x i8>, ptr %addr2
20 %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
21 %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
22 %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
23 %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
24 %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
25 store <8 x i8> %vtbl2.i25.i313, ptr %addr2
29 ; Check that we do not perform the transformation for dynamic index.
30 define void @dynamicIndex(ptr %addr, ptr %addr2, i32 %index) {
31 ; CHECK-LABEL: dynamicIndex:
33 ; CHECK-NEXT: .save {r4, r6, r7, lr}
34 ; CHECK-NEXT: push {r4, r6, r7, lr}
35 ; CHECK-NEXT: .setfp r7, sp, #8
36 ; CHECK-NEXT: add r7, sp, #8
37 ; CHECK-NEXT: .pad #16
38 ; CHECK-NEXT: sub sp, #16
39 ; CHECK-NEXT: mov r4, sp
40 ; CHECK-NEXT: bfc r4, #0, #4
41 ; CHECK-NEXT: mov sp, r4
42 ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
43 ; CHECK-NEXT: adds r0, r2, r2
44 ; CHECK-NEXT: and r2, r0, #3
45 ; CHECK-NEXT: adds r0, #1
46 ; CHECK-NEXT: mov r12, sp
47 ; CHECK-NEXT: and r0, r0, #3
48 ; CHECK-NEXT: lsls r2, r2, #2
49 ; CHECK-NEXT: mov r3, r12
50 ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r2
51 ; CHECK-NEXT: orr.w r0, r12, r0, lsl #2
52 ; CHECK-NEXT: sub.w r4, r7, #8
53 ; CHECK-NEXT: ldr r2, [r3]
54 ; CHECK-NEXT: ldr r0, [r0]
55 ; CHECK-NEXT: vldr d18, [r1]
56 ; CHECK-NEXT: vmov d16, r2, r0
57 ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18
58 ; CHECK-NEXT: vstr d16, [r1]
59 ; CHECK-NEXT: mov sp, r4
60 ; CHECK-NEXT: pop {r4, r6, r7, pc}
61 %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr
62 %vtbl2.i25.i = load <8 x i8>, ptr %addr2
63 %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index
64 %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
65 %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
66 %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
67 %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
68 store <8 x i8> %vtbl2.i25.i313, ptr %addr2
72 ; Check that we do not perform the transformation when there are several uses
73 ; of the result of the bitcast.
74 define i64 @severalUses(ptr %addr, ptr %addr2) {
75 ; CHECK-LABEL: severalUses:
77 ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
78 ; CHECK-NEXT: vmov r0, r2, d16
79 ; CHECK-NEXT: vldr d18, [r1]
80 ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18
81 ; CHECK-NEXT: vstr d16, [r1]
82 ; CHECK-NEXT: mov r1, r2
84 %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr
85 %vtbl2.i25.i = load <8 x i8>, ptr %addr2
86 %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
87 %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
88 %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
89 %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
90 %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
91 store <8 x i8> %vtbl2.i25.i313, ptr %addr2
92 ret i64 %shuffle.i.extract.i310