1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
3 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE
4 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE
5 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
6 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
9 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
10 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512
12 define i32 @reduce_f64(i32 %arg) {
13 ; SSE-LABEL: 'reduce_f64'
14 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
15 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
16 ; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
17 ; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
18 ; SSE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
19 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
21 ; AVX1-LABEL: 'reduce_f64'
22 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
23 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
24 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
25 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
26 ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
27 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
29 ; AVX2-LABEL: 'reduce_f64'
30 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
31 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
33 ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
34 ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
35 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
37 ; AVX512-LABEL: 'reduce_f64'
38 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
39 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
40 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
41 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
42 ; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
43 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
45 %V1 = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
46 %V2 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
47 %V4 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
48 %V8 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
49 %V16 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
53 define i32 @reduce_f32(i32 %arg) {
54 ; SSE-LABEL: 'reduce_f32'
55 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
56 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
57 ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
58 ; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
59 ; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
60 ; SSE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
61 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
63 ; AVX1-LABEL: 'reduce_f32'
64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
65 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
66 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
67 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
68 ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
69 ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
70 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
72 ; AVX2-LABEL: 'reduce_f32'
73 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
74 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
75 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
76 ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
77 ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
78 ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
79 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
81 ; AVX512-LABEL: 'reduce_f32'
82 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
83 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
84 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
85 ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
86 ; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
87 ; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
88 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
90 %V1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
91 %V2 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
92 %V4 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
93 %V8 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
94 %V16 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
95 %V32 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
101 define i32 @reduce_f64_fast(i32 %arg) {
102 ; SSE-LABEL: 'reduce_f64_fast'
103 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
104 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
105 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
106 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
107 ; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
108 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
110 ; AVX-LABEL: 'reduce_f64_fast'
111 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
112 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
113 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
114 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
115 ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
116 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
118 ; AVX512-LABEL: 'reduce_f64_fast'
119 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
120 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
121 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
122 ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
123 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
124 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
126 %V1 = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> undef)
127 %V2 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
128 %V4 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
129 %V8 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> undef)
130 %V16 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> undef)
134 define i32 @reduce_f32_fast(i32 %arg) {
135 ; SSE-LABEL: 'reduce_f32_fast'
136 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call fast float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
137 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
138 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
139 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
140 ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
141 ; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call fast float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
142 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
144 ; AVX-LABEL: 'reduce_f32_fast'
145 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call fast float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
146 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
147 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
148 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
149 ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
150 ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call fast float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
151 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
153 ; AVX512-LABEL: 'reduce_f32_fast'
154 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call fast float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
155 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
156 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
157 ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
158 ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
159 ; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call fast float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
160 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
162 %V1 = call fast float @llvm.vector.reduce.fmax.v1f32(<1 x float> undef)
163 %V2 = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
164 %V4 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
165 %V8 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
166 %V16 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> undef)
167 %V32 = call fast float @llvm.vector.reduce.fmax.v32f32(<32 x float> undef)
171 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
172 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
173 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
174 declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>)
175 declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
177 declare float @llvm.vector.reduce.fmax.v1f32(<1 x float>)
178 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
179 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
180 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
181 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
182 declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>)