1 ; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
3 define void @bswap_i64(i64 %arg, <2 x i64> %arg2) {
4 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64':
5 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64
6 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64>
7 ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %swp4 = tail call <4 x i64>
8 %swp1 = tail call i64 @llvm.bswap.i64(i64 %arg)
9 %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg2)
10 %swp4 = tail call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
14 define void @bswap_i32(i32 %arg, <2 x i32> %arg2, <4 x i32> %arg4) {
15 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32':
16 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32
17 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i32>
18 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp4 = tail call <4 x i32>
19 ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %swp8 = tail call <8 x i32>
20 %swp1 = tail call i32 @llvm.bswap.i32(i32 %arg)
21 %swp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %arg2)
22 %swp4 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg4)
23 %swp8 = tail call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
27 define void @bswap_i16(i16 %arg, <2 x i16> %arg2, <4 x i16> %arg4,
29 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16':
30 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %arg)
31 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %arg2)
32 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp4 = tail call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %arg4)
33 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp8 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg8)
34 ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %swp16 = tail call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
35 %swp1 = tail call i16 @llvm.bswap.i16(i16 %arg)
36 %swp2 = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %arg2)
37 %swp4 = tail call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %arg4)
38 %swp8 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg8)
39 %swp16 = tail call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
43 ; Test that store/load reversed is reflected in costs.
44 define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) {
45 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64_mem':
46 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i64, i64* %src
47 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1)
48 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg)
49 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp2, i64* %dst
50 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i64, i64* %src
51 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2)
52 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp3, i64* %dst
53 %Ld1 = load i64, i64* %src
54 %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1)
56 %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg)
57 store i64 %swp2, i64* %dst
59 %Ld2 = load i64, i64* %src
60 %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2)
61 store i64 %swp3, i64* %dst
66 define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
67 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem':
68 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src
69 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1)
70 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg)
71 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp2, i32* %dst
72 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i32, i32* %src
73 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2)
74 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp3, i32* %dst
75 %Ld1 = load i32, i32* %src
76 %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1)
78 %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg)
79 store i32 %swp2, i32* %dst
81 %Ld2 = load i32, i32* %src
82 %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2)
83 store i32 %swp3, i32* %dst
88 define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
89 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem':
90 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src
91 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1)
92 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg)
93 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp2, i16* %dst
94 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i16, i16* %src
95 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2)
96 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp3, i16* %dst
97 %Ld1 = load i16, i16* %src
98 %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1)
100 %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg)
101 store i16 %swp2, i16* %dst
103 %Ld2 = load i16, i16* %src
104 %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2)
105 store i16 %swp3, i16* %dst
111 declare i64 @llvm.bswap.i64(i64)
112 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
113 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
115 declare i32 @llvm.bswap.i32(i32)
116 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
117 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
118 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
120 declare i16 @llvm.bswap.i16(i16)
121 declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
122 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
123 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
124 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)