1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -ppc-formprep-chain-commoning \
3 ; RUN: -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 < %s | FileCheck %s
5 ; Test that on 32 bit AIX, the chain commoning still works without crash.
14 ; 1: base: base1 + offset, offsets: (0, offset)
15 ; 2: base: base1 + 3*offset, offsets: (0, offset)
17 ; long long two_chain_same_offset_succ_i32(char *p, int offset, int base1, long long n) {
18 ; int o1 = base1 + offset;
19 ; int o2 = base1 + 2 * offset;
20 ; int o3 = base1 + 3 * offset;
21 ; int o4 = base1 + 4 * offset;
27 ; for (long long i = 0; i < n; ++i) {
28 ; unsigned long x1 = *(unsigned long *)(p1 + i);
29 ; unsigned long x2 = *(unsigned long *)(p2 + i);
30 ; unsigned long x3 = *(unsigned long *)(p3 + i);
31 ; unsigned long x4 = *(unsigned long *)(p4 + i);
32 ; sum += x1 * x2 * x3 * x4;
37 define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 %n) {
38 ; CHECK-LABEL: two_chain_same_offset_succ_i32:
39 ; CHECK: # %bb.0: # %entry
40 ; CHECK-NEXT: cmplwi r6, 0
41 ; CHECK-NEXT: cmpwi cr1, r6, 0
42 ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq
43 ; CHECK-NEXT: cmpwi cr1, r7, 0
44 ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6
45 ; CHECK-NEXT: # %bb.1: # %entry
46 ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq
47 ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6
48 ; CHECK-NEXT: # %bb.2: # %for.body.preheader
49 ; CHECK-NEXT: slwi r8, r4, 1
50 ; CHECK-NEXT: li r10, 0
51 ; CHECK-NEXT: li r11, 0
52 ; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
53 ; CHECK-NEXT: add r8, r4, r8
54 ; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill
55 ; CHECK-NEXT: add r9, r5, r8
56 ; CHECK-NEXT: add r5, r5, r4
57 ; CHECK-NEXT: add r8, r3, r5
58 ; CHECK-NEXT: add r9, r3, r9
59 ; CHECK-NEXT: li r3, 0
60 ; CHECK-NEXT: li r5, 0
61 ; CHECK-NEXT: .align 4
62 ; CHECK-NEXT: L..BB0_3: # %for.body
64 ; CHECK-NEXT: lwz r12, 0(r8)
65 ; CHECK-NEXT: lwzx r0, r8, r4
66 ; CHECK-NEXT: lwz r31, 0(r9)
67 ; CHECK-NEXT: lwzx r30, r9, r4
68 ; CHECK-NEXT: addi r8, r8, 1
69 ; CHECK-NEXT: addi r9, r9, 1
70 ; CHECK-NEXT: mullw r12, r0, r12
71 ; CHECK-NEXT: mullw r12, r12, r31
72 ; CHECK-NEXT: mullw r12, r12, r30
73 ; CHECK-NEXT: addc r5, r5, r12
74 ; CHECK-NEXT: addze r3, r3
75 ; CHECK-NEXT: addic r11, r11, 1
76 ; CHECK-NEXT: addze r10, r10
77 ; CHECK-NEXT: cmplw r10, r6
78 ; CHECK-NEXT: cmpw cr1, r10, r6
79 ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq
80 ; CHECK-NEXT: cmplw cr1, r11, r7
81 ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3
82 ; CHECK-NEXT: # %bb.4: # %for.body
84 ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
85 ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3
86 ; CHECK-NEXT: # %bb.5:
87 ; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload
88 ; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload
89 ; CHECK-NEXT: mr r4, r5
91 ; CHECK-NEXT: L..BB0_6:
92 ; CHECK-NEXT: li r3, 0
93 ; CHECK-NEXT: li r4, 0
96 %add = add nsw i32 %base1, %offset
97 %mul = shl nsw i32 %offset, 1
98 %add1 = add nsw i32 %mul, %base1
99 %mul2 = mul nsw i32 %offset, 3
100 %add3 = add nsw i32 %mul2, %base1
101 %mul4 = shl nsw i32 %offset, 2
102 %add5 = add nsw i32 %mul4, %base1
103 %add.ptr = getelementptr inbounds i8, ptr %p, i32 %add
104 %add.ptr6 = getelementptr inbounds i8, ptr %p, i32 %add1
105 %add.ptr7 = getelementptr inbounds i8, ptr %p, i32 %add3
106 %add.ptr8 = getelementptr inbounds i8, ptr %p, i32 %add5
107 %cmp49 = icmp sgt i64 %n, 0
108 br i1 %cmp49, label %for.body, label %for.cond.cleanup
110 for.cond.cleanup: ; preds = %for.body, %entry
111 %sum.0.lcssa = phi i64 [ 0, %entry ], [ %add19, %for.body ]
114 for.body: ; preds = %entry, %for.body
115 %sum.051 = phi i64 [ %add19, %for.body ], [ 0, %entry ]
116 %i.050 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
117 %idx.ext = trunc i64 %i.050 to i32
118 %add.ptr9 = getelementptr inbounds i8, ptr %add.ptr, i32 %idx.ext
119 %0 = load i32, ptr %add.ptr9, align 4
120 %add.ptr11 = getelementptr inbounds i8, ptr %add.ptr6, i32 %idx.ext
121 %1 = load i32, ptr %add.ptr11, align 4
122 %add.ptr13 = getelementptr inbounds i8, ptr %add.ptr7, i32 %idx.ext
123 %2 = load i32, ptr %add.ptr13, align 4
124 %add.ptr15 = getelementptr inbounds i8, ptr %add.ptr8, i32 %idx.ext
125 %3 = load i32, ptr %add.ptr15, align 4
126 %mul16 = mul i32 %1, %0
127 %mul17 = mul i32 %mul16, %2
128 %mul18 = mul i32 %mul17, %3
129 %conv = zext i32 %mul18 to i64
130 %add19 = add nuw nsw i64 %sum.051, %conv
131 %inc = add nuw nsw i64 %i.050, 1
132 %cmp = icmp slt i64 %inc, %n
133 br i1 %cmp, label %for.body, label %for.cond.cleanup