1 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SDAG %s
2 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-GISEL %s
4 ; --------------------------------------------------------------------------------
6 ; --------------------------------------------------------------------------------
8 ; GCN-LABEL: {{^}}v_fneg_minimum_f32:
9 ; GCN: global_load_b32 [[A:v[0-9]+]]
10 ; GCN: global_load_b32 [[B:v[0-9]+]]
11 ; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
12 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
13 define void @v_fneg_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
14 %tid = call i32 @llvm.amdgcn.workitem.id.x()
15 %tid.ext = sext i32 %tid to i64
16 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
17 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
18 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
19 %a = load volatile float, ptr addrspace(1) %a.gep
20 %b = load volatile float, ptr addrspace(1) %b.gep
21 %min = call float @llvm.minimum.f32(float %a, float %b)
22 %fneg = fneg float %min
23 store float %fneg, ptr addrspace(1) %out.gep
27 ; GCN-LABEL: {{^}}v_fneg_self_minimum_f32:
28 ; GCN: global_load_b32 [[A:v[0-9]+]]
29 ; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
30 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
31 define void @v_fneg_self_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
32 %tid = call i32 @llvm.amdgcn.workitem.id.x()
33 %tid.ext = sext i32 %tid to i64
34 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
35 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
36 %a = load volatile float, ptr addrspace(1) %a.gep
37 %min = call float @llvm.minimum.f32(float %a, float %a)
38 %min.fneg = fneg float %min
39 store float %min.fneg, ptr addrspace(1) %out.gep
43 ; GCN-LABEL: {{^}}v_fneg_posk_minimum_f32:
44 ; GCN: global_load_b32 [[A:v[0-9]+]]
45 ; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
46 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
47 define void @v_fneg_posk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
48 %tid = call i32 @llvm.amdgcn.workitem.id.x()
49 %tid.ext = sext i32 %tid to i64
50 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
51 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
52 %a = load volatile float, ptr addrspace(1) %a.gep
53 %min = call float @llvm.minimum.f32(float %a, float 4.0)
54 %fneg = fneg float %min
55 store float %fneg, ptr addrspace(1) %out.gep
59 ; GCN-LABEL: {{^}}v_fneg_negk_minimum_f32:
60 ; GCN: global_load_b32 [[A:v[0-9]+]]
61 ; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
62 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
63 define void @v_fneg_negk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
64 %tid = call i32 @llvm.amdgcn.workitem.id.x()
65 %tid.ext = sext i32 %tid to i64
66 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
67 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
68 %a = load volatile float, ptr addrspace(1) %a.gep
69 %min = call float @llvm.minimum.f32(float %a, float -4.0)
70 %fneg = fneg float %min
71 store float %fneg, ptr addrspace(1) %out.gep
75 ; GCN-LABEL: {{^}}v_fneg_0_minimum_f32:
76 ; GCN: global_load_b32 [[A:v[0-9]+]]
77 ; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], [[A]], 0
78 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
79 define void @v_fneg_0_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
80 %tid = call i32 @llvm.amdgcn.workitem.id.x()
81 %tid.ext = sext i32 %tid to i64
82 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
83 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
84 %a = load volatile float, ptr addrspace(1) %a.gep
85 %min = call float @llvm.minimum.f32(float %a, float 0.0)
86 %fneg = fneg float %min
87 store float %fneg, ptr addrspace(1) %out.gep
91 ; GCN-LABEL: {{^}}v_fneg_0_minimum_foldable_use_f32:
92 ; GCN: global_load_b32 [[A:v[0-9]+]]
93 ; GCN: global_load_b32 [[B:v[0-9]+]]
94 ; GCN: v_minimum_f32 [[MIN:v[0-9]+]], [[A]], 0
95 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
96 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
97 define void @v_fneg_0_minimum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
98 %tid = call i32 @llvm.amdgcn.workitem.id.x()
99 %tid.ext = sext i32 %tid to i64
100 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
101 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
102 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
103 %a = load volatile float, ptr addrspace(1) %a.gep
104 %b = load volatile float, ptr addrspace(1) %b.gep
105 %min = call float @llvm.minimum.f32(float %a, float 0.0)
106 %fneg = fneg float %min
107 %mul = fmul float %fneg, %b
108 store float %mul, ptr addrspace(1) %out.gep
112 ; GCN-LABEL: {{^}}v_fneg_minimum_multi_use_minimum_f32:
113 ; GCN: global_load_b32 [[A:v[0-9]+]]
114 ; GCN: global_load_b32 [[B:v[0-9]+]]
115 ; GCN: v_maximum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
116 ; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
117 ; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
118 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
119 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
120 define void @v_fneg_minimum_multi_use_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
121 %tid = call i32 @llvm.amdgcn.workitem.id.x()
122 %tid.ext = sext i32 %tid to i64
123 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
124 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
125 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
126 %a = load volatile float, ptr addrspace(1) %a.gep
127 %b = load volatile float, ptr addrspace(1) %b.gep
128 %min = call float @llvm.minimum.f32(float %a, float %b)
129 %fneg = fneg float %min
130 %use1 = fmul float %min, 4.0
131 store volatile float %fneg, ptr addrspace(1) %out
132 store volatile float %use1, ptr addrspace(1) %out
136 ; --------------------------------------------------------------------------------
138 ; --------------------------------------------------------------------------------
140 ; GCN-LABEL: {{^}}v_fneg_maximum_f32:
141 ; GCN: global_load_b32 [[A:v[0-9]+]]
142 ; GCN: global_load_b32 [[B:v[0-9]+]]
143 ; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
144 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
145 define void @v_fneg_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
146 %tid = call i32 @llvm.amdgcn.workitem.id.x()
147 %tid.ext = sext i32 %tid to i64
148 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
149 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
150 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
151 %a = load volatile float, ptr addrspace(1) %a.gep
152 %b = load volatile float, ptr addrspace(1) %b.gep
153 %min = call float @llvm.maximum.f32(float %a, float %b)
154 %fneg = fneg float %min
155 store float %fneg, ptr addrspace(1) %out.gep
159 ; GCN-LABEL: {{^}}v_fneg_self_maximum_f32:
160 ; GCN: global_load_b32 [[A:v[0-9]+]]
161 ; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
162 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
163 define void @v_fneg_self_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
164 %tid = call i32 @llvm.amdgcn.workitem.id.x()
165 %tid.ext = sext i32 %tid to i64
166 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
167 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
168 %a = load volatile float, ptr addrspace(1) %a.gep
169 %min = call float @llvm.maximum.f32(float %a, float %a)
170 %min.fneg = fneg float %min
171 store float %min.fneg, ptr addrspace(1) %out.gep
175 ; GCN-LABEL: {{^}}v_fneg_posk_maximum_f32:
176 ; GCN: global_load_b32 [[A:v[0-9]+]]
177 ; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
178 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
179 define void @v_fneg_posk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
180 %tid = call i32 @llvm.amdgcn.workitem.id.x()
181 %tid.ext = sext i32 %tid to i64
182 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
183 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
184 %a = load volatile float, ptr addrspace(1) %a.gep
185 %min = call float @llvm.maximum.f32(float %a, float 4.0)
186 %fneg = fneg float %min
187 store float %fneg, ptr addrspace(1) %out.gep
191 ; GCN-LABEL: {{^}}v_fneg_negk_maximum_f32:
192 ; GCN: global_load_b32 [[A:v[0-9]+]]
193 ; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
194 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
195 define void @v_fneg_negk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
196 %tid = call i32 @llvm.amdgcn.workitem.id.x()
197 %tid.ext = sext i32 %tid to i64
198 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
199 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
200 %a = load volatile float, ptr addrspace(1) %a.gep
201 %min = call float @llvm.maximum.f32(float %a, float -4.0)
202 %fneg = fneg float %min
203 store float %fneg, ptr addrspace(1) %out.gep
207 ; GCN-LABEL: {{^}}v_fneg_0_maximum_f32:
208 ; GCN: global_load_b32 [[A:v[0-9]+]]
209 ; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], [[A]], 0
210 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
211 define void @v_fneg_0_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
212 %tid = call i32 @llvm.amdgcn.workitem.id.x()
213 %tid.ext = sext i32 %tid to i64
214 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
215 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
216 %a = load volatile float, ptr addrspace(1) %a.gep
217 %max = call float @llvm.maximum.f32(float %a, float 0.0)
218 %fneg = fneg float %max
219 store float %fneg, ptr addrspace(1) %out.gep
223 ; GCN-LABEL: {{^}}v_fneg_0_maximum_foldable_use_f32:
224 ; GCN: global_load_b32 [[A:v[0-9]+]]
225 ; GCN: global_load_b32 [[B:v[0-9]+]]
226 ; GCN: v_maximum_f32 [[MAX:v[0-9]+]], [[A]], 0
227 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
228 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
229 define void @v_fneg_0_maximum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
230 %tid = call i32 @llvm.amdgcn.workitem.id.x()
231 %tid.ext = sext i32 %tid to i64
232 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
233 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
234 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
235 %a = load volatile float, ptr addrspace(1) %a.gep
236 %b = load volatile float, ptr addrspace(1) %b.gep
237 %max = call float @llvm.maximum.f32(float %a, float 0.0)
238 %fneg = fneg float %max
239 %mul = fmul float %fneg, %b
240 store float %mul, ptr addrspace(1) %out.gep
244 ; GCN-LABEL: {{^}}v_fneg_maximum_multi_use_maximum_f32:
245 ; GCN: global_load_b32 [[A:v[0-9]+]]
246 ; GCN: global_load_b32 [[B:v[0-9]+]]
247 ; GCN: v_minimum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
248 ; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
249 ; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
250 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
251 ; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
252 define void @v_fneg_maximum_multi_use_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
253 %tid = call i32 @llvm.amdgcn.workitem.id.x()
254 %tid.ext = sext i32 %tid to i64
255 %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
256 %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
257 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
258 %a = load volatile float, ptr addrspace(1) %a.gep
259 %b = load volatile float, ptr addrspace(1) %b.gep
260 %min = call float @llvm.maximum.f32(float %a, float %b)
261 %fneg = fneg float %min
262 %use1 = fmul float %min, 4.0
263 store volatile float %fneg, ptr addrspace(1) %out
264 store volatile float %use1, ptr addrspace(1) %out
268 declare i32 @llvm.amdgcn.workitem.id.x()
269 declare float @llvm.minimum.f32(float, float)
270 declare float @llvm.maximum.f32(float, float)