1 ; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \
2 ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s
4 ; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 \
5 ; RUN: -verify-machineinstrs -vec-extabi | \
6 ; RUN: FileCheck %s --check-prefixes=AIX,AIX64
7 ; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 \
8 ; RUN: -verify-machineinstrs -vec-extabi | \
9 ; RUN: FileCheck %s --check-prefixes=AIX,AIX32
11 define dso_local void @test(ptr %Arr, i32 signext %Len) {
13 ; CHECK: lxv [[REG:vs[0-9]+]], 0(r{{[0-9]+}})
15 ; CHECK: xxbrw vs{{[0-9]+}}, [[REG]]
18 ; AIX64: lxv [[REG64:[0-9]+]], {{[0-9]+}}({{[0-9]+}})
19 ; AIX32: lxv [[REG32:[0-9]+]], {{[0-9]+}}({{[0-9]+}})
20 ; AIX64-NOT: [[REG64]]
21 ; AIX64: xxbrw {{[0-9]+}}, [[REG64]]
22 ; AIX32: xxbrw {{[0-9]+}}, [[REG32]]
24 %cmp1 = icmp slt i32 0, %Len
25 br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup
27 for.body.lr.ph: ; preds = %entry
28 %min.iters.check = icmp ult i32 %Len, 4
29 br i1 %min.iters.check, label %scalar.ph, label %vector.ph
31 vector.ph: ; preds = %for.body.lr.ph
32 %n.mod.vf = urem i32 %Len, 4
33 %n.vec = sub i32 %Len, %n.mod.vf
36 vector.body: ; preds = %vector.body, %vector.ph
37 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
38 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
39 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
40 %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
41 %0 = add i32 %index, 0
42 %1 = sext i32 %0 to i64
43 %2 = getelementptr inbounds i32, ptr %Arr, i64 %1
44 %wide.load = load <4 x i32>, ptr %2, align 4
45 %3 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %wide.load)
46 %4 = sext i32 %0 to i64
47 %5 = getelementptr inbounds i32, ptr %Arr, i64 %4
48 store <4 x i32> %3, ptr %5, align 4
49 %index.next = add i32 %index, 4
50 %6 = icmp eq i32 %index.next, %n.vec
51 br i1 %6, label %middle.block, label %vector.body
53 middle.block: ; preds = %vector.body
54 %cmp.n = icmp eq i32 %Len, %n.vec
55 br i1 %cmp.n, label %for.cond.for.cond.cleanup_crit_edge, label %scalar.ph
57 scalar.ph: ; preds = %middle.block, %for.body.lr.ph
58 %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %for.body.lr.ph ]
61 for.cond.for.cond.cleanup_crit_edge: ; preds = %middle.block, %for.inc
62 br label %for.cond.cleanup
64 for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
67 for.body: ; preds = %for.inc, %scalar.ph
68 %i.02 = phi i32 [ %bc.resume.val, %scalar.ph ], [ %inc, %for.inc ]
69 %idxprom = sext i32 %i.02 to i64
70 %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %idxprom
71 %7 = load i32, ptr %arrayidx, align 4
72 %8 = call i32 @llvm.bswap.i32(i32 %7)
73 %idxprom1 = sext i32 %i.02 to i64
74 %arrayidx2 = getelementptr inbounds i32, ptr %Arr, i64 %idxprom1
75 store i32 %8, ptr %arrayidx2, align 4
78 for.inc: ; preds = %for.body
79 %inc = add nsw i32 %i.02, 1
80 %cmp = icmp slt i32 %inc, %Len
81 br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
83 for.end: ; preds = %for.cond.cleanup
87 define dso_local <8 x i16> @test_halfword(<8 x i16> %a) local_unnamed_addr {
88 ; CHECK-LABEL: test_halfword:
89 ; CHECK: xxbrh vs34, vs34
92 ; AIX-LABEL: test_halfword:
96 %0 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
100 define dso_local <2 x i64> @test_doubleword(<2 x i64> %a) local_unnamed_addr {
101 ; CHECK-LABEL: test_doubleword:
102 ; CHECK: xxbrd vs34, vs34
105 ; AIX-LABEL: test_doubleword:
109 %0 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
113 define dso_local <1 x i128> @test_quadword(<1 x i128> %a) local_unnamed_addr {
114 ; CHECK-LABEL: test_quadword:
115 ; CHECK: xxbrq vs34, vs34
118 ; AIX-LABEL: test_quadword:
122 %0 = call <1 x i128> @llvm.bswap.v1i128(<1 x i128> %a)
126 ; Function Attrs: nounwind readnone speculatable willreturn
127 declare <1 x i128> @llvm.bswap.v1i128(<1 x i128>)
129 ; Function Attrs: nounwind readnone speculatable willreturn
130 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
132 ; Function Attrs: nounwind readnone speculatable willreturn
133 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
135 ; Function Attrs: nounwind readnone speculatable willreturn
136 declare i32 @llvm.bswap.i32(i32)
138 ; Function Attrs: nounwind readnone speculatable willreturn
139 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)