1 ; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs < %s \
2 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=INST
3 ; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -ppc-lsr-no-insns-cost=true \
4 ; RUN: < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=REG
6 ; void test(unsigned *a, unsigned *b, unsigned *c)
8 ; for (unsigned long i = 0; i < 1024; i++)
12 ; compile with -fno-unroll-loops
14 define void @lsr-insts-cost(ptr %0, ptr %1, ptr %2) {
15 ; INST-LABEL: lsr-insts-cost
16 ; INST: .LBB0_4: # =>This Inner Loop Header: Depth=1
17 ; INST-NEXT: lxvd2x vs34, r3, r6
18 ; INST-NEXT: lxvd2x vs35, r4, r6
19 ; INST-NEXT: vadduwm v2, v3, v2
20 ; INST-NEXT: stxvd2x vs34, r5, r6
21 ; INST-NEXT: addi r6, r6, 16
22 ; INST-NEXT: bdnz .LBB0_4
24 ; REG-LABEL: lsr-insts-cost
25 ; REG: .LBB0_4: # =>This Inner Loop Header: Depth=1
26 ; REG-NEXT: lxvd2x vs34, 0, r3
27 ; REG-NEXT: lxvd2x vs35, 0, r4
28 ; REG-NEXT: addi r4, r4, 16
29 ; REG-NEXT: addi r3, r3, 16
30 ; REG-NEXT: vadduwm v2, v3, v2
31 ; REG-NEXT: stxvd2x vs34, 0, r5
32 ; REG-NEXT: addi r5, r5, 16
33 ; REG-NEXT: bdnz .LBB0_4
34 %4 = getelementptr i32, ptr %2, i64 1024
35 %5 = getelementptr i32, ptr %0, i64 1024
36 %6 = getelementptr i32, ptr %1, i64 1024
37 %7 = icmp ugt ptr %5, %2
38 %8 = icmp ugt ptr %4, %0
40 %10 = icmp ugt ptr %6, %2
41 %11 = icmp ugt ptr %4, %1
44 br i1 %13, label %28, label %14
47 %15 = phi i64 [ %25, %14 ], [ 0, %3 ]
48 %16 = getelementptr inbounds i32, ptr %0, i64 %15
49 %17 = bitcast ptr %16 to ptr
50 %18 = load <4 x i32>, ptr %17, align 4
51 %19 = getelementptr inbounds i32, ptr %1, i64 %15
52 %20 = bitcast ptr %19 to ptr
53 %21 = load <4 x i32>, ptr %20, align 4
54 %22 = add <4 x i32> %21, %18
55 %23 = getelementptr inbounds i32, ptr %2, i64 %15
56 %24 = bitcast ptr %23 to ptr
57 store <4 x i32> %22, ptr %24, align 4
59 %26 = icmp eq i64 %25, 1024
60 br i1 %26, label %27, label %14
62 27: ; preds = %14, %28
66 %29 = phi i64 [ %36, %28 ], [ 0, %3 ]
67 %30 = getelementptr inbounds i32, ptr %0, i64 %29
68 %31 = load i32, ptr %30, align 4
69 %32 = getelementptr inbounds i32, ptr %1, i64 %29
70 %33 = load i32, ptr %32, align 4
71 %34 = add i32 %33, %31
72 %35 = getelementptr inbounds i32, ptr %2, i64 %29
73 store i32 %34, ptr %35, align 4
74 %36 = add nuw nsw i64 %29, 1
75 %37 = icmp eq i64 %36, 1024
76 br i1 %37, label %27, label %28