1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
5 ; FIXME: PPCLoopInstrFormPrep should be able to common base for "(unsigned long long *)(p + j + 5)"
6 ; and "(unsigned long long *)(p + j + 9)", thus we only have two DS form load inside the loop.
8 ; long long foo(char *p, int n, int count) {
11 ; for (int i = 0; i < n; i++) {
12 ; sum += *(unsigned long long *)(p + j + 5);
13 ; sum += *(unsigned long long *)(p + j + 9);
19 define i64 @foo(i8* %p, i32 signext %n, i32 signext %count) {
21 ; CHECK: # %bb.0: # %entry
22 ; CHECK-NEXT: cmpwi r4, 1
23 ; CHECK-NEXT: blt cr0, .LBB0_4
24 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
25 ; CHECK-NEXT: clrldi r4, r4, 32
26 ; CHECK-NEXT: extsw r5, r5
27 ; CHECK-NEXT: li r6, 0
28 ; CHECK-NEXT: li r7, 5
29 ; CHECK-NEXT: mtctr r4
30 ; CHECK-NEXT: li r8, 9
31 ; CHECK-NEXT: li r4, 0
32 ; CHECK-NEXT: .p2align 5
33 ; CHECK-NEXT: .LBB0_2: # %for.body
35 ; CHECK-NEXT: add r9, r3, r6
36 ; CHECK-NEXT: add r6, r6, r5
37 ; CHECK-NEXT: ldx r10, r9, r7
38 ; CHECK-NEXT: ldx r9, r9, r8
39 ; CHECK-NEXT: add r4, r10, r4
40 ; CHECK-NEXT: add r4, r4, r9
41 ; CHECK-NEXT: bdnz .LBB0_2
42 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
43 ; CHECK-NEXT: mr r3, r4
45 ; CHECK-NEXT: .LBB0_4:
46 ; CHECK-NEXT: li r4, 0
47 ; CHECK-NEXT: mr r3, r4
50 %cmp16 = icmp sgt i32 %n, 0
51 br i1 %cmp16, label %for.body.preheader, label %for.cond.cleanup
53 for.body.preheader: ; preds = %entry
54 %0 = sext i32 %count to i64
57 for.cond.cleanup: ; preds = %for.body, %entry
58 %sum.0.lcssa = phi i64 [ 0, %entry ], [ %add5, %for.body ]
61 for.body: ; preds = %for.body.preheader, %for.body
62 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
63 %i.019 = phi i32 [ 0, %for.body.preheader ], [ %inc, %for.body ]
64 %sum.018 = phi i64 [ 0, %for.body.preheader ], [ %add5, %for.body ]
65 %add.ptr = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
66 %add.ptr1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
67 %1 = bitcast i8* %add.ptr1 to i64*
68 %2 = load i64, i64* %1, align 8
69 %add = add i64 %2, %sum.018
70 %add.ptr4 = getelementptr inbounds i8, i8* %add.ptr, i64 9
71 %3 = bitcast i8* %add.ptr4 to i64*
72 %4 = load i64, i64* %3, align 8
73 %add5 = add i64 %add, %4
74 %indvars.iv.next = add nsw i64 %indvars.iv, %0
75 %inc = add nuw nsw i32 %i.019, 1
76 %exitcond.not = icmp eq i32 %inc, %n
77 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
80 ; char foo1(char *p, int n, int count) {
83 ; for (int i = 0; i < n; i++) {
84 ; sum += *(p + j + 1000);
90 define zeroext i8 @foo1(i8* %p, i32 signext %n, i32 signext %count) {
92 ; CHECK: # %bb.0: # %entry
93 ; CHECK-NEXT: cmpwi r4, 1
94 ; CHECK-NEXT: blt cr0, .LBB1_4
95 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
96 ; CHECK-NEXT: addi r6, r3, 1000
97 ; CHECK-NEXT: clrldi r3, r4, 32
98 ; CHECK-NEXT: extsw r5, r5
99 ; CHECK-NEXT: li r4, 0
100 ; CHECK-NEXT: mtctr r3
101 ; CHECK-NEXT: li r3, 0
102 ; CHECK-NEXT: .p2align 4
103 ; CHECK-NEXT: .LBB1_2: # %for.body
105 ; CHECK-NEXT: lbzx r7, r6, r4
106 ; CHECK-NEXT: add r4, r4, r5
107 ; CHECK-NEXT: add r3, r7, r3
108 ; CHECK-NEXT: bdnz .LBB1_2
109 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
110 ; CHECK-NEXT: clrldi r3, r3, 56
112 ; CHECK-NEXT: .LBB1_4:
113 ; CHECK-NEXT: li r3, 0
116 %cmp10 = icmp sgt i32 %n, 0
117 br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
119 for.body.preheader: ; preds = %entry
120 %0 = sext i32 %count to i64
121 %add.ptr = getelementptr inbounds i8, i8* %p, i64 1000
124 for.cond.cleanup: ; preds = %for.body, %entry
125 %sum.0.lcssa = phi i8 [ 0, %entry ], [ %add, %for.body ]
128 for.body: ; preds = %for.body.preheader, %for.body
129 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
130 %i.013 = phi i32 [ 0, %for.body.preheader ], [ %inc, %for.body ]
131 %sum.012 = phi i8 [ 0, %for.body.preheader ], [ %add, %for.body ]
132 %add.ptr1 = getelementptr inbounds i8, i8* %add.ptr, i64 %indvars.iv
133 %1 = load i8, i8* %add.ptr1, align 1
134 %add = add i8 %1, %sum.012
135 %indvars.iv.next = add nsw i64 %indvars.iv, %0
136 %inc = add nuw nsw i32 %i.013, 1
137 %exitcond.not = icmp eq i32 %inc, %n
138 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body