1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,FAST16 %s
3 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
4 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL-SIZE,FAST16-SIZE %s
5 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW16-SIZE %s
8 define amdgpu_kernel void @add_i32() #0 {
10 ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef
11 ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef
12 ; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
13 ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
14 ; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
15 ; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
16 ; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
17 ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
18 ; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
19 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
21 ; ALL-SIZE-LABEL: 'add_i32'
22 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef
23 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef
24 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
25 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
26 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
27 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
28 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
29 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
30 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
31 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
33 %i32 = add i32 undef, undef
34 %v2i32 = add <2 x i32> undef, undef
35 %v3i32 = add <3 x i32> undef, undef
36 %v4i32 = add <4 x i32> undef, undef
37 %v5i32 = add <5 x i32> undef, undef
38 %v6i32 = add <6 x i32> undef, undef
39 %v7i32 = add <7 x i32> undef, undef
40 %v8i32 = add <8 x i32> undef, undef
41 %v9i32 = add <9 x i32> undef, undef
45 define amdgpu_kernel void @add_i64() #0 {
46 ; ALL-LABEL: 'add_i64'
47 ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
48 ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
49 ; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
50 ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
51 ; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
52 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
54 ; ALL-SIZE-LABEL: 'add_i64'
55 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
56 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
57 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
58 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
59 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
60 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
62 %i64 = add i64 undef, undef
63 %v2i64 = add <2 x i64> undef, undef
64 %v3i64 = add <3 x i64> undef, undef
65 %v4i64 = add <4 x i64> undef, undef
66 %v5i64 = add <5 x i64> undef, undef
70 define amdgpu_kernel void @add_i16() #0 {
71 ; FAST16-LABEL: 'add_i16'
72 ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
73 ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
74 ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef
75 ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
76 ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
77 ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
78 ; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
79 ; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
80 ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
82 ; SLOW16-LABEL: 'add_i16'
83 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
84 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
85 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef
86 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
87 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
88 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
89 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
90 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
91 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
93 ; FAST16-SIZE-LABEL: 'add_i16'
94 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
95 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
96 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef
97 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
98 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
99 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
100 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
101 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
102 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
104 ; SLOW16-SIZE-LABEL: 'add_i16'
105 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
106 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
107 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef
108 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
109 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
110 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
111 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
112 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
113 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
115 %i16 = add i16 undef, undef
116 %v2i16 = add <2 x i16> undef, undef
117 %v3i16 = add <3 x i16> undef, undef
118 %v4i16 = add <4 x i16> undef, undef
119 %v5i16 = add <5 x i16> undef, undef
120 %v6i16 = add <6 x i16> undef, undef
121 %v16i16 = add <16 x i16> undef, undef
122 %v17i16 = add <17 x i16> undef, undef
126 define amdgpu_kernel void @add_i8() #0 {
127 ; ALL-LABEL: 'add_i8'
128 ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef
129 ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef
130 ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef
131 ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
132 ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
133 ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
134 ; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
135 ; ALL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
136 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
138 ; ALL-SIZE-LABEL: 'add_i8'
139 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef
140 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef
141 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef
142 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
143 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
144 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
145 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
146 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
147 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
149 %i8 = add i8 undef, undef
150 %v2i8 = add <2 x i8> undef, undef
151 %v3i8 = add <3 x i8> undef, undef
152 %v4i8 = add <4 x i8> undef, undef
153 %v5i8 = add <5 x i8> undef, undef
154 %v6i8 = add <6 x i8> undef, undef
155 %v32i8 = add <32 x i8> undef, undef
156 %v33i8 = add <33 x i8> undef, undef
160 define amdgpu_kernel void @sub() #0 {
161 ; FAST16-LABEL: 'sub'
162 ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
163 ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
164 ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
165 ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
166 ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
167 ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef
168 ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef
169 ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
171 ; SLOW16-LABEL: 'sub'
172 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
173 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
174 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
175 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
176 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
177 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef
178 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
179 ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
181 ; FAST16-SIZE-LABEL: 'sub'
182 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
183 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
184 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
185 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
186 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
187 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef
188 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef
189 ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
191 ; SLOW16-SIZE-LABEL: 'sub'
192 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
193 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
194 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
195 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
196 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
197 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef
198 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
199 ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
201 %i8 = sub i8 undef, undef
202 %i16 = sub i16 undef, undef
203 %i32 = sub i32 undef, undef
204 %i64 = sub i64 undef, undef
205 %v2i16 = sub <2 x i16> undef, undef
206 %v3i16 = sub <3 x i16> undef, undef
207 %v4i16 = sub <4 x i16> undef, undef
211 attributes #0 = { nounwind }