1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
3 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
4 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
5 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
7 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
9 define i32 @reduce_i64(i32 %arg) {
10 ; V8M-RECIP-LABEL: 'reduce_i64'
11 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
12 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
13 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
14 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
15 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
16 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
18 ; NEON-RECIP-LABEL: 'reduce_i64'
19 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
20 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
21 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
22 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
23 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
24 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
26 ; V8M-SIZE-LABEL: 'reduce_i64'
27 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
28 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
29 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
30 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
31 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
32 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
34 ; NEON-SIZE-LABEL: 'reduce_i64'
35 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
36 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
37 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
38 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
39 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
40 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
42 %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
43 %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
44 %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
45 %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
46 %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
50 define i32 @reduce_i32(i32 %arg) {
51 ; V8M-RECIP-LABEL: 'reduce_i32'
52 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
53 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
54 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
55 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
56 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
57 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
58 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
59 ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
61 ; NEON-RECIP-LABEL: 'reduce_i32'
62 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
63 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
64 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
65 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
66 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
67 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
68 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
69 ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
71 ; V8M-SIZE-LABEL: 'reduce_i32'
72 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
73 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
74 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
75 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
76 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
77 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
78 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
79 ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
81 ; NEON-SIZE-LABEL: 'reduce_i32'
82 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
83 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
84 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
85 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
86 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
87 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 681 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
88 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1066 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
89 ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
91 %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
92 %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
93 %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
94 %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
95 %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
96 %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
97 %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
101 declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
102 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
103 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
104 declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
105 declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
107 declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
108 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
109 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
110 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
111 declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
113 declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
114 declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
115 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
116 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
117 declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
118 declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
120 declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
121 declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
122 declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
123 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
124 declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
125 declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
126 declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)