1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG %s
3 ; RUN: llc -march=amdgcn -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL %s
4 ; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
5 ; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG %s
9 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL %s
11 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
12 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
14 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
15 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
16 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
17 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
18 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
19 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
20 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
21 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
22 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
23 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
24 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
25 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
26 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
27 ; SI-SDAG-NEXT: s_endpgm
29 ; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
31 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
32 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
33 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
34 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
35 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
36 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
37 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
38 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
39 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
40 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
41 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
42 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
43 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
44 ; SI-GISEL-NEXT: s_endpgm
46 ; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
48 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
49 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
50 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
51 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
52 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
53 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
54 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
55 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
56 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
57 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
58 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
59 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
60 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
61 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
62 ; VI-SDAG-NEXT: s_endpgm
64 ; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
66 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
67 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
68 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
69 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
70 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
71 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
72 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
73 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
74 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
75 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
76 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
77 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
78 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
79 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
80 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
81 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
82 ; VI-GISEL-NEXT: s_endpgm
84 ; GFX9-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
86 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
87 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
88 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
89 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
90 ; GFX9-NEXT: s_waitcnt vmcnt(0)
91 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
92 ; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
93 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
96 ; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
98 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
99 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
100 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
101 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
102 ; GFX11-NEXT: s_waitcnt vmcnt(0)
103 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
104 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
105 ; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
106 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
107 ; GFX11-NEXT: s_nop 0
108 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
109 ; GFX11-NEXT: s_endpgm
110 %tid = call i32 @llvm.amdgcn.workitem.id.x()
111 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
112 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
113 %a = load float, ptr addrspace(1) %gep0
114 %a.add = fadd nnan float %a, 1.0
115 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
116 %med = call float @llvm.minnum.f32(float %max, float 4.0)
118 store float %med, ptr addrspace(1) %outgep
122 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
123 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
125 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
126 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
127 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
128 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
129 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
130 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
131 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
132 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
133 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
134 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
135 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
136 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
137 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
138 ; SI-SDAG-NEXT: s_endpgm
140 ; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_f32:
142 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
143 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
144 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
145 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
146 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
147 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
148 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
149 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
150 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
151 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
152 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
153 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
154 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
155 ; SI-GISEL-NEXT: s_endpgm
157 ; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
159 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
160 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
161 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
162 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
163 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
164 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
165 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
166 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
167 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
168 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
169 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
170 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
171 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
172 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
173 ; VI-SDAG-NEXT: s_endpgm
175 ; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_f32:
177 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
178 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
179 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
180 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
181 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
182 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
183 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
184 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
185 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
186 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
187 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
188 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
189 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
190 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
191 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
192 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
193 ; VI-GISEL-NEXT: s_endpgm
195 ; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_f32:
197 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
198 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
199 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
200 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
201 ; GFX9-NEXT: s_waitcnt vmcnt(0)
202 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
203 ; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
204 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
205 ; GFX9-NEXT: s_endpgm
207 ; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_f32:
209 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
210 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
211 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
212 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
213 ; GFX11-NEXT: s_waitcnt vmcnt(0)
214 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
215 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
216 ; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
217 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
218 ; GFX11-NEXT: s_nop 0
219 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
220 ; GFX11-NEXT: s_endpgm
221 %tid = call i32 @llvm.amdgcn.workitem.id.x()
222 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
223 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
224 %a = load float, ptr addrspace(1) %gep0
225 %a.add = fadd nnan float %a, 1.0
227 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
228 %med = call float @llvm.minnum.f32(float %max, float 4.0)
230 store float %med, ptr addrspace(1) %outgep
234 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
235 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
237 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
238 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
239 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
240 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
241 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
242 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
243 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
244 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
245 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
246 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
247 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
248 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
249 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
250 ; SI-SDAG-NEXT: s_endpgm
252 ; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
254 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
255 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
256 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
257 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
258 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
259 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
260 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
261 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
262 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
263 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
264 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
265 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
266 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
267 ; SI-GISEL-NEXT: s_endpgm
269 ; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
271 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
272 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
273 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
274 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
275 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
276 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
277 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
278 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
279 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
280 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
281 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
282 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
283 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
284 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
285 ; VI-SDAG-NEXT: s_endpgm
287 ; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
289 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
290 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
291 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
292 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
293 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
294 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
295 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
296 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
297 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
298 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
299 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
300 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
301 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
302 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
303 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
304 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
305 ; VI-GISEL-NEXT: s_endpgm
307 ; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
309 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
310 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
311 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
312 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
313 ; GFX9-NEXT: s_waitcnt vmcnt(0)
314 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
315 ; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
316 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
317 ; GFX9-NEXT: s_endpgm
319 ; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
321 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
322 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
323 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
324 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
325 ; GFX11-NEXT: s_waitcnt vmcnt(0)
326 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
327 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
328 ; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
329 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
330 ; GFX11-NEXT: s_nop 0
331 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
332 ; GFX11-NEXT: s_endpgm
333 %tid = call i32 @llvm.amdgcn.workitem.id.x()
334 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
335 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
336 %a = load float, ptr addrspace(1) %gep0
337 %a.add = fadd nnan float %a, 1.0
339 %max = call float @llvm.maxnum.f32(float 2.0, float %a.add)
340 %med = call float @llvm.minnum.f32(float 4.0, float %max)
342 store float %med, ptr addrspace(1) %outgep
346 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
347 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
349 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
350 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
351 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
352 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
353 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
354 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
355 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
356 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
357 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
358 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
359 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
360 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
361 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
362 ; SI-SDAG-NEXT: s_endpgm
364 ; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
366 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
367 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
368 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
369 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
370 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
371 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
372 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
373 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
374 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
375 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
376 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
377 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
378 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
379 ; SI-GISEL-NEXT: s_endpgm
381 ; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
383 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
384 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
385 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
386 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
387 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
388 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
389 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
390 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
391 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
392 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
393 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
394 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
395 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
396 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
397 ; VI-SDAG-NEXT: s_endpgm
399 ; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
401 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
402 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
403 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
404 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
405 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
406 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
407 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
408 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
409 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
410 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
411 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
412 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
413 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
414 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
415 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
416 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
417 ; VI-GISEL-NEXT: s_endpgm
419 ; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
421 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
422 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
423 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
424 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
425 ; GFX9-NEXT: s_waitcnt vmcnt(0)
426 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
427 ; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
428 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
429 ; GFX9-NEXT: s_endpgm
431 ; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
433 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
434 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
435 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
436 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
437 ; GFX11-NEXT: s_waitcnt vmcnt(0)
438 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
439 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
440 ; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
441 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
442 ; GFX11-NEXT: s_nop 0
443 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
444 ; GFX11-NEXT: s_endpgm
445 %tid = call i32 @llvm.amdgcn.workitem.id.x()
446 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
447 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
448 %a = load float, ptr addrspace(1) %gep0
449 %a.add = fadd nnan float %a, 1.0
451 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
452 %med = call float @llvm.minnum.f32(float 4.0, float %max)
454 store float %med, ptr addrspace(1) %outgep
458 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
459 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
461 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
462 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
463 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
464 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
465 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
466 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
467 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
468 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
469 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
470 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
471 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
472 ; SI-SDAG-NEXT: v_max_f32_e32 v2, 4.0, v2
473 ; SI-SDAG-NEXT: v_min_f32_e32 v2, 2.0, v2
474 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
475 ; SI-SDAG-NEXT: s_endpgm
477 ; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
479 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
480 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
481 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
482 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
483 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
484 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
485 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
486 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
487 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
488 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
489 ; SI-GISEL-NEXT: v_max_f32_e32 v2, 4.0, v2
490 ; SI-GISEL-NEXT: v_min_f32_e32 v2, 2.0, v2
491 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
492 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
493 ; SI-GISEL-NEXT: s_endpgm
495 ; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
497 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
498 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
499 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
500 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
501 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
502 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
503 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
504 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
505 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
506 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
507 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
508 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
509 ; VI-SDAG-NEXT: v_max_f32_e32 v2, 4.0, v2
510 ; VI-SDAG-NEXT: v_min_f32_e32 v2, 2.0, v2
511 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
512 ; VI-SDAG-NEXT: s_endpgm
514 ; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
516 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
517 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
518 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
519 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
520 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
521 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
522 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
523 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
524 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
525 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
526 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
527 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
528 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
529 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
530 ; VI-GISEL-NEXT: v_max_f32_e32 v2, 4.0, v2
531 ; VI-GISEL-NEXT: v_min_f32_e32 v2, 2.0, v2
532 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
533 ; VI-GISEL-NEXT: s_endpgm
535 ; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
537 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
538 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
539 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
540 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
541 ; GFX9-NEXT: s_waitcnt vmcnt(0)
542 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
543 ; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v1
544 ; GFX9-NEXT: v_min_f32_e32 v1, 2.0, v1
545 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
546 ; GFX9-NEXT: s_endpgm
548 ; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
550 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
551 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
552 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
553 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
554 ; GFX11-NEXT: s_waitcnt vmcnt(0)
555 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
556 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
557 ; GFX11-NEXT: v_maxmin_f32 v1, v1, 4.0, 2.0
558 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
559 ; GFX11-NEXT: s_nop 0
560 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
561 ; GFX11-NEXT: s_endpgm
562 %tid = call i32 @llvm.amdgcn.workitem.id.x()
563 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
564 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
565 %a = load float, ptr addrspace(1) %gep0
566 %a.add = fadd nnan float %a, 1.0
568 %max = call float @llvm.maxnum.f32(float %a.add, float 4.0)
569 %med = call float @llvm.minnum.f32(float %max, float 2.0)
571 store float %med, ptr addrspace(1) %outgep
575 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
576 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
578 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
579 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
580 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
581 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
582 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
583 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
584 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
585 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
586 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
587 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
588 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
589 ; SI-SDAG-NEXT: v_max_f32_e32 v2, 2.0, v2
590 ; SI-SDAG-NEXT: v_min_f32_e32 v3, 4.0, v2
591 ; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
592 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
593 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
594 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
595 ; SI-SDAG-NEXT: s_endpgm
597 ; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
599 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
600 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
601 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
602 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
603 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
604 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
605 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
606 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
607 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
608 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
609 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
610 ; SI-GISEL-NEXT: v_max_f32_e32 v3, 2.0, v2
611 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
612 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
613 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
614 ; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
615 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
616 ; SI-GISEL-NEXT: s_endpgm
618 ; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
620 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
621 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
622 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
623 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
624 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
625 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
626 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
627 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
628 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
629 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
630 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
631 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
632 ; VI-SDAG-NEXT: v_max_f32_e32 v2, 2.0, v2
633 ; VI-SDAG-NEXT: v_min_f32_e32 v3, 4.0, v2
634 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v3
635 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
636 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
637 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
638 ; VI-SDAG-NEXT: s_endpgm
640 ; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
642 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
643 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
644 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
645 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
646 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
647 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
648 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
649 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
650 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
651 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
652 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
653 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
654 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
655 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
656 ; VI-GISEL-NEXT: v_max_f32_e32 v3, 2.0, v2
657 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
658 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
659 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
660 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v3
661 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
662 ; VI-GISEL-NEXT: s_endpgm
664 ; GFX9-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
665 ; GFX9-SDAG: ; %bb.0:
666 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
667 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
668 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
669 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
670 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
671 ; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
672 ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, 2.0, v1
673 ; GFX9-SDAG-NEXT: v_min_f32_e32 v2, 4.0, v1
674 ; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[0:1]
675 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
676 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
677 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
678 ; GFX9-SDAG-NEXT: s_endpgm
680 ; GFX9-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
681 ; GFX9-GISEL: ; %bb.0:
682 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
683 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
684 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
685 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
686 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
687 ; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1
688 ; GFX9-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v1
689 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
690 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
691 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
692 ; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[0:1]
693 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
694 ; GFX9-GISEL-NEXT: s_endpgm
696 ; GFX11-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
697 ; GFX11-SDAG: ; %bb.0:
698 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
699 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
700 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
701 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
702 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
703 ; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
704 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
705 ; GFX11-SDAG-NEXT: v_max_f32_e32 v1, 2.0, v1
706 ; GFX11-SDAG-NEXT: v_min_f32_e32 v2, 4.0, v1
707 ; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] dlc
708 ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
709 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] dlc
710 ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
711 ; GFX11-SDAG-NEXT: s_nop 0
712 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
713 ; GFX11-SDAG-NEXT: s_endpgm
715 ; GFX11-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
716 ; GFX11-GISEL: ; %bb.0:
717 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
718 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
719 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
720 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
721 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
722 ; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1
723 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
724 ; GFX11-GISEL-NEXT: v_med3_f32 v2, v1, 2.0, 4.0
725 ; GFX11-GISEL-NEXT: v_max_f32_e32 v1, 2.0, v1
726 ; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] dlc
727 ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
728 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] dlc
729 ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
730 ; GFX11-GISEL-NEXT: s_nop 0
731 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
732 ; GFX11-GISEL-NEXT: s_endpgm
733 %tid = call i32 @llvm.amdgcn.workitem.id.x()
734 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
735 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
736 %a = load float, ptr addrspace(1) %gep0
737 %a.add = fadd nnan float %a, 1.0
739 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
740 %med = call float @llvm.minnum.f32(float %max, float 4.0)
742 store volatile float %med, ptr addrspace(1) %outgep
743 store volatile float %max, ptr addrspace(1) %outgep
747 define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
748 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
750 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
751 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
752 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
753 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
754 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
755 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
756 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
757 ; SI-SDAG-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
758 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
759 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
760 ; SI-SDAG-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
761 ; SI-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], 2.0
762 ; SI-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], 4.0
763 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
764 ; SI-SDAG-NEXT: s_endpgm
766 ; SI-GISEL-LABEL: v_test_fmed3_r_i_i_f64:
768 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
769 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
770 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
771 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
772 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
773 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
774 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
775 ; SI-GISEL-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
776 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
777 ; SI-GISEL-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
778 ; SI-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], 2.0
779 ; SI-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], 4.0
780 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
781 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
782 ; SI-GISEL-NEXT: s_endpgm
784 ; VI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
786 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
787 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
788 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
789 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
790 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
791 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
792 ; VI-SDAG-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
793 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
794 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s0, v2
795 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
796 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
797 ; VI-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
798 ; VI-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
799 ; VI-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
800 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
801 ; VI-SDAG-NEXT: s_endpgm
803 ; VI-GISEL-LABEL: v_test_fmed3_r_i_i_f64:
805 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
806 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0
807 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
808 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
809 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
810 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v4
811 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
812 ; VI-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
813 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
814 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
815 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v4
816 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
817 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
818 ; VI-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
819 ; VI-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
820 ; VI-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
821 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
822 ; VI-GISEL-NEXT: s_endpgm
824 ; GFX9-LABEL: v_test_fmed3_r_i_i_f64:
826 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
827 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
828 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
829 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
830 ; GFX9-NEXT: s_waitcnt vmcnt(0)
831 ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
832 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
833 ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
834 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
835 ; GFX9-NEXT: s_endpgm
837 ; GFX11-LABEL: v_test_fmed3_r_i_i_f64:
839 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
840 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
841 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
842 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[2:3]
843 ; GFX11-NEXT: s_waitcnt vmcnt(0)
844 ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
845 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
846 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
847 ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
848 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
849 ; GFX11-NEXT: s_nop 0
850 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
851 ; GFX11-NEXT: s_endpgm
852 %tid = call i32 @llvm.amdgcn.workitem.id.x()
853 %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
854 %outgep = getelementptr double, ptr addrspace(1) %out, i32 %tid
855 %a = load double, ptr addrspace(1) %gep0
856 %a.add = fadd nnan double %a, 1.0
858 %max = call double @llvm.maxnum.f64(double %a.add, double 2.0)
859 %med = call double @llvm.minnum.f64(double %max, double 4.0)
861 store double %med, ptr addrspace(1) %outgep
865 define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
866 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
868 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
869 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
870 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
871 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
872 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
873 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
874 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
875 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
876 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
877 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
878 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
879 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
880 ; SI-SDAG-NEXT: s_endpgm
882 ; SI-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
884 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
885 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
886 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
887 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
888 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
889 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
890 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
891 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
892 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
893 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
894 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
895 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
896 ; SI-GISEL-NEXT: s_endpgm
898 ; VI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
900 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
901 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
902 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
903 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
904 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
905 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
906 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
907 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
908 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
909 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
910 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
911 ; VI-SDAG-NEXT: v_med3_f32 v2, v3, 2.0, 4.0
912 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
913 ; VI-SDAG-NEXT: s_endpgm
915 ; VI-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
917 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
918 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
919 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
920 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
921 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
922 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
923 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
924 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
925 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
926 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
927 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
928 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
929 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
930 ; VI-GISEL-NEXT: v_med3_f32 v2, v3, 2.0, 4.0
931 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
932 ; VI-GISEL-NEXT: s_endpgm
934 ; GFX9-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
936 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
937 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
938 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
939 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
940 ; GFX9-NEXT: s_waitcnt vmcnt(0)
941 ; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
942 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
943 ; GFX9-NEXT: s_endpgm
945 ; GFX11-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
947 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
948 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
949 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
950 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
951 ; GFX11-NEXT: s_waitcnt vmcnt(0)
952 ; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
953 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
954 ; GFX11-NEXT: s_nop 0
955 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
956 ; GFX11-NEXT: s_endpgm
957 %tid = call i32 @llvm.amdgcn.workitem.id.x()
958 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
959 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
960 %a = load float, ptr addrspace(1) %gep0
962 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
963 %med = call float @llvm.minnum.f32(float %max, float 4.0)
965 store float %med, ptr addrspace(1) %outgep
969 define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
970 ; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
972 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
973 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
974 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
975 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
976 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
977 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
978 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
979 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
980 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
981 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
982 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
983 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
984 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
985 ; SI-SDAG-NEXT: s_endpgm
987 ; SI-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
989 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
990 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
991 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
992 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
993 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
994 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
995 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
996 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
997 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
998 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
999 ; SI-GISEL-NEXT: v_max_legacy_f32_e64 v2, v2, 2.0
1000 ; SI-GISEL-NEXT: v_min_legacy_f32_e64 v2, v2, 4.0
1001 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
1002 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1003 ; SI-GISEL-NEXT: s_endpgm
1005 ; VI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1007 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1008 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1009 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1010 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
1011 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
1012 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1013 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
1014 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
1015 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
1016 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1017 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1018 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
1019 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
1020 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
1021 ; VI-SDAG-NEXT: s_endpgm
1023 ; VI-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1024 ; VI-GISEL: ; %bb.0:
1025 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1026 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1027 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1028 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1029 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
1030 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1031 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1032 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
1033 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
1034 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
1035 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1036 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1037 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1038 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3
1039 ; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v2
1040 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 2.0, vcc
1041 ; VI-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc, 4.0, v2
1042 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 4.0, vcc
1043 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
1044 ; VI-GISEL-NEXT: s_endpgm
1046 ; GFX9-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1047 ; GFX9-SDAG: ; %bb.0:
1048 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1049 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1050 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1051 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
1052 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1053 ; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
1054 ; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
1055 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
1056 ; GFX9-SDAG-NEXT: s_endpgm
1058 ; GFX9-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1059 ; GFX9-GISEL: ; %bb.0:
1060 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1061 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1062 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1063 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
1064 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1065 ; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1
1066 ; GFX9-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v1
1067 ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 2.0, vcc
1068 ; GFX9-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc, 4.0, v1
1069 ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
1070 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
1071 ; GFX9-GISEL-NEXT: s_endpgm
1073 ; GFX11-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1074 ; GFX11-SDAG: ; %bb.0:
1075 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1076 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1077 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1078 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
1079 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1080 ; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
1081 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1082 ; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
1083 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1084 ; GFX11-SDAG-NEXT: s_nop 0
1085 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1086 ; GFX11-SDAG-NEXT: s_endpgm
1088 ; GFX11-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1089 ; GFX11-GISEL: ; %bb.0:
1090 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1091 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1092 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1093 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
1094 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1095 ; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1
1096 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1097 ; GFX11-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 2.0, v1
1098 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 2.0, vcc_lo
1099 ; GFX11-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 4.0, v1
1100 ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
1101 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1102 ; GFX11-GISEL-NEXT: s_nop 0
1103 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1104 ; GFX11-GISEL-NEXT: s_endpgm
1105 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1106 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1107 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1108 %a = load float, ptr addrspace(1) %gep0
1109 %a.nnan = fadd nnan float %a, 1.0
1112 %cmp0 = fcmp ule float %a.nnan, 2.0
1113 %max = select i1 %cmp0, float 2.0, float %a.nnan
1116 %cmp1 = fcmp uge float %max, 4.0
1117 %med = select i1 %cmp1, float 4.0, float %max
1119 store float %med, ptr addrspace(1) %outgep
1123 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1124 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1126 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1127 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
1128 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
1129 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1130 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
1131 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
1132 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
1133 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1134 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
1135 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
1136 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
1137 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1138 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1139 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1140 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1141 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1142 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1143 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
1144 ; SI-SDAG-NEXT: v_med3_f32 v2, -v2, v3, v4
1145 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1146 ; SI-SDAG-NEXT: s_endpgm
1148 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1149 ; SI-GISEL: ; %bb.0:
1150 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1151 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1152 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
1153 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
1154 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
1155 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1156 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
1157 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1158 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1159 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
1160 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1161 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1162 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
1163 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1164 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1165 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2
1166 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
1167 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
1168 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1169 ; SI-GISEL-NEXT: s_endpgm
1171 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1173 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1174 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1175 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1176 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
1177 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
1178 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1179 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
1180 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
1181 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1182 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
1183 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
1184 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1185 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
1186 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1187 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
1188 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1189 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
1190 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1191 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
1192 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
1193 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1194 ; VI-SDAG-NEXT: v_med3_f32 v2, -v7, v2, v3
1195 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
1196 ; VI-SDAG-NEXT: s_endpgm
1198 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1199 ; VI-GISEL: ; %bb.0:
1200 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1201 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1202 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1203 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1204 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
1205 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1206 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1207 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
1208 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
1209 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
1210 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1211 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
1212 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
1213 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
1214 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1215 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
1216 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1217 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
1218 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1219 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
1220 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1221 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
1222 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
1223 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1224 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1225 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7
1226 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
1227 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
1228 ; VI-GISEL-NEXT: s_endpgm
1230 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1231 ; GFX9-SDAG: ; %bb.0:
1232 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1233 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1234 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1235 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
1236 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1237 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
1238 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1239 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
1240 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1241 ; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3
1242 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
1243 ; GFX9-SDAG-NEXT: s_endpgm
1245 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1246 ; GFX9-GISEL: ; %bb.0:
1247 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1248 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1249 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1250 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
1251 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1252 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
1253 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1254 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
1255 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1256 ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
1257 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
1258 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
1259 ; GFX9-GISEL-NEXT: s_endpgm
1261 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1262 ; GFX11-SDAG: ; %bb.0:
1263 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1264 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1265 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1266 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1267 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1268 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1269 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1270 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1271 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1272 ; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3
1273 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1274 ; GFX11-SDAG-NEXT: s_nop 0
1275 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1276 ; GFX11-SDAG-NEXT: s_endpgm
1278 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1279 ; GFX11-GISEL: ; %bb.0:
1280 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1281 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1282 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1283 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1284 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1285 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1286 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1287 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1288 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1289 ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
1290 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1291 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
1292 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1293 ; GFX11-GISEL-NEXT: s_nop 0
1294 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1295 ; GFX11-GISEL-NEXT: s_endpgm
1296 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1297 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1298 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1299 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1300 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1301 %a = load volatile float, ptr addrspace(1) %gep0
1302 %b = load volatile float, ptr addrspace(1) %gep1
1303 %c = load volatile float, ptr addrspace(1) %gep2
1304 %a.fneg = fsub float -0.0, %a
1305 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
1306 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
1307 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
1308 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1309 store float %med3, ptr addrspace(1) %outgep
1313 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1314 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1316 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1317 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
1318 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
1319 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1320 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
1321 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
1322 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
1323 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1324 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
1325 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
1326 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
1327 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1328 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1329 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1330 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1331 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1332 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1333 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
1334 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, -v3, v4
1335 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1336 ; SI-SDAG-NEXT: s_endpgm
1338 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1339 ; SI-GISEL: ; %bb.0:
1340 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1341 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1342 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
1343 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
1344 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
1345 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1346 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
1347 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1348 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1349 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
1350 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1351 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1352 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
1353 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1354 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1355 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, -1.0, v3
1356 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
1357 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
1358 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1359 ; SI-GISEL-NEXT: s_endpgm
1361 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1363 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1364 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1365 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1366 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
1367 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
1368 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1369 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
1370 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
1371 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1372 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
1373 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
1374 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1375 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
1376 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1377 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
1378 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1379 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
1380 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1381 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
1382 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
1383 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1384 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, -v2, v3
1385 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
1386 ; VI-SDAG-NEXT: s_endpgm
1388 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1389 ; VI-GISEL: ; %bb.0:
1390 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1391 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1392 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1393 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1394 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
1395 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1396 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1397 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
1398 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
1399 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
1400 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1401 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
1402 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
1403 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
1404 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1405 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
1406 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1407 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
1408 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1409 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
1410 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1411 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
1412 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
1413 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1414 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1415 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2
1416 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
1417 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
1418 ; VI-GISEL-NEXT: s_endpgm
1420 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1421 ; GFX9-SDAG: ; %bb.0:
1422 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1423 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1424 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1425 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
1426 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1427 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
1428 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1429 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
1430 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1431 ; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, -v2, v3
1432 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
1433 ; GFX9-SDAG-NEXT: s_endpgm
1435 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1436 ; GFX9-GISEL: ; %bb.0:
1437 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1438 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1439 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1440 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
1441 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1442 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
1443 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1444 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
1445 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1446 ; GFX9-GISEL-NEXT: v_max_f32_e64 v2, -v2, -v2
1447 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
1448 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
1449 ; GFX9-GISEL-NEXT: s_endpgm
1451 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1452 ; GFX11-SDAG: ; %bb.0:
1453 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1454 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1455 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1456 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1457 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1458 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1459 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1460 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1461 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1462 ; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, -v2, v3
1463 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1464 ; GFX11-SDAG-NEXT: s_nop 0
1465 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1466 ; GFX11-SDAG-NEXT: s_endpgm
1468 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1469 ; GFX11-GISEL: ; %bb.0:
1470 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1471 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1472 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1473 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1474 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1475 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1476 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1477 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1478 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1479 ; GFX11-GISEL-NEXT: v_max_f32_e64 v2, -v2, -v2
1480 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1481 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
1482 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1483 ; GFX11-GISEL-NEXT: s_nop 0
1484 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1485 ; GFX11-GISEL-NEXT: s_endpgm
1486 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1487 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1488 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1489 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1490 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1491 %a = load volatile float, ptr addrspace(1) %gep0
1492 %b = load volatile float, ptr addrspace(1) %gep1
1493 %c = load volatile float, ptr addrspace(1) %gep2
1494 %b.fneg = fsub float -0.0, %b
1495 %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
1496 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
1497 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
1498 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1499 store float %med3, ptr addrspace(1) %outgep
1503 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1504 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1506 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1507 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
1508 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
1509 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1510 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
1511 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
1512 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
1513 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1514 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
1515 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
1516 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
1517 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1518 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1519 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1520 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1521 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1522 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1523 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
1524 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, -v4
1525 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1526 ; SI-SDAG-NEXT: s_endpgm
1528 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1529 ; SI-GISEL: ; %bb.0:
1530 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1531 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1532 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
1533 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
1534 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
1535 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1536 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
1537 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1538 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1539 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
1540 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1541 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1542 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
1543 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1544 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1545 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v4
1546 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
1547 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
1548 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1549 ; SI-GISEL-NEXT: s_endpgm
1551 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1553 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1554 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1555 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1556 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
1557 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
1558 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1559 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
1560 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
1561 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1562 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
1563 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
1564 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1565 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
1566 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1567 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
1568 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1569 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
1570 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1571 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
1572 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
1573 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1574 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, -v3
1575 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
1576 ; VI-SDAG-NEXT: s_endpgm
1578 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1579 ; VI-GISEL: ; %bb.0:
1580 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1581 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1582 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1583 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1584 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
1585 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1586 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1587 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
1588 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
1589 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
1590 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1591 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
1592 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
1593 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
1594 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1595 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
1596 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1597 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
1598 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1599 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
1600 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1601 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
1602 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
1603 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1604 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1605 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, -1.0, v3
1606 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
1607 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
1608 ; VI-GISEL-NEXT: s_endpgm
1610 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1611 ; GFX9-SDAG: ; %bb.0:
1612 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1613 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1614 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1615 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
1616 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1617 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
1618 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1619 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
1620 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1621 ; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, v2, -v3
1622 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
1623 ; GFX9-SDAG-NEXT: s_endpgm
1625 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1626 ; GFX9-GISEL: ; %bb.0:
1627 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1628 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1629 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1630 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
1631 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1632 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
1633 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
1635 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1636 ; GFX9-GISEL-NEXT: v_max_f32_e64 v3, -v3, -v3
1637 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
1638 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
1639 ; GFX9-GISEL-NEXT: s_endpgm
1641 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1642 ; GFX11-SDAG: ; %bb.0:
1643 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1644 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1645 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1646 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1647 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1648 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1649 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1650 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1651 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1652 ; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, v2, -v3
1653 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1654 ; GFX11-SDAG-NEXT: s_nop 0
1655 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1656 ; GFX11-SDAG-NEXT: s_endpgm
1658 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1659 ; GFX11-GISEL: ; %bb.0:
1660 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1661 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1662 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1663 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1664 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1665 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1666 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1667 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1668 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1669 ; GFX11-GISEL-NEXT: v_max_f32_e64 v3, -v3, -v3
1670 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1671 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
1672 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1673 ; GFX11-GISEL-NEXT: s_nop 0
1674 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1675 ; GFX11-GISEL-NEXT: s_endpgm
1676 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1677 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1678 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1679 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1680 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1681 %a = load volatile float, ptr addrspace(1) %gep0
1682 %b = load volatile float, ptr addrspace(1) %gep1
1683 %c = load volatile float, ptr addrspace(1) %gep2
1684 %c.fneg = fsub float -0.0, %c
1685 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
1686 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
1687 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
1688 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1689 store float %med3, ptr addrspace(1) %outgep
1693 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1694 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1696 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1697 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
1698 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
1699 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1700 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
1701 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
1702 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
1703 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1704 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
1705 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
1706 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
1707 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1708 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1709 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1710 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1711 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1712 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1713 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
1714 ; SI-SDAG-NEXT: v_med3_f32 v2, -v2, |v3|, -|v4|
1715 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1716 ; SI-SDAG-NEXT: s_endpgm
1718 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1719 ; SI-GISEL: ; %bb.0:
1720 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1721 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1722 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
1723 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
1724 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
1725 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1726 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
1727 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1728 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1729 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
1730 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1731 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1732 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
1733 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1734 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1735 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2
1736 ; SI-GISEL-NEXT: v_mul_f32_e64 v4, -1.0, |v4|
1737 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, |v3|, v4
1738 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
1739 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1740 ; SI-GISEL-NEXT: s_endpgm
1742 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1744 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1745 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1746 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1747 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
1748 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
1749 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1750 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
1751 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
1752 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1753 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
1754 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
1755 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1756 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
1757 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1758 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
1759 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1760 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
1761 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1762 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
1763 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
1764 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1765 ; VI-SDAG-NEXT: v_med3_f32 v2, -v7, |v2|, -|v3|
1766 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
1767 ; VI-SDAG-NEXT: s_endpgm
1769 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1770 ; VI-GISEL: ; %bb.0:
1771 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1772 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1773 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1774 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1775 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
1776 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1777 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1778 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
1779 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
1780 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
1781 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1782 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
1783 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
1784 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
1785 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1786 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
1787 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1788 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
1789 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1790 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
1791 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1792 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
1793 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
1794 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1795 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1796 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7
1797 ; VI-GISEL-NEXT: v_mul_f32_e64 v3, -1.0, |v3|
1798 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, |v2|, v3
1799 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
1800 ; VI-GISEL-NEXT: s_endpgm
1802 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1803 ; GFX9-SDAG: ; %bb.0:
1804 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1805 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1806 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1807 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
1808 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1809 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
1810 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1811 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
1812 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1813 ; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, |v2|, -|v3|
1814 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
1815 ; GFX9-SDAG-NEXT: s_endpgm
1817 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1818 ; GFX9-GISEL: ; %bb.0:
1819 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1820 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1821 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1822 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
1823 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1824 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
1825 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1826 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
1827 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1828 ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
1829 ; GFX9-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3|
1830 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, |v2|, v3
1831 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
1832 ; GFX9-GISEL-NEXT: s_endpgm
1834 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1835 ; GFX11-SDAG: ; %bb.0:
1836 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1837 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1838 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1839 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1840 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1841 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1842 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1843 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1844 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1845 ; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, |v2|, -|v3|
1846 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1847 ; GFX11-SDAG-NEXT: s_nop 0
1848 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1849 ; GFX11-SDAG-NEXT: s_endpgm
1851 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1852 ; GFX11-GISEL: ; %bb.0:
1853 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
1854 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1855 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1856 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
1857 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1858 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
1859 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1860 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
1861 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1862 ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
1863 ; GFX11-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3|
1864 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1865 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, |v2|, v3
1866 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1867 ; GFX11-GISEL-NEXT: s_nop 0
1868 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1869 ; GFX11-GISEL-NEXT: s_endpgm
1870 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1871 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1872 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1873 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1874 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1875 %a = load volatile float, ptr addrspace(1) %gep0
1876 %b = load volatile float, ptr addrspace(1) %gep1
1877 %c = load volatile float, ptr addrspace(1) %gep2
1879 %a.fneg = fsub float -0.0, %a
1880 %b.fabs = call float @llvm.fabs.f32(float %b)
1881 %c.fabs = call float @llvm.fabs.f32(float %c)
1882 %c.fabs.fneg = fsub float -0.0, %c.fabs
1884 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
1885 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
1886 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
1887 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1889 store float %med3, ptr addrspace(1) %outgep
1893 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1894 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1896 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1897 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
1898 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
1899 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1900 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
1901 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
1902 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
1903 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1904 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
1905 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
1906 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
1907 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1908 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1909 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1910 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1911 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1912 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1913 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
1914 ; SI-SDAG-NEXT: v_med3_f32 v2, -|v2|, -|v3|, -|v4|
1915 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1916 ; SI-SDAG-NEXT: s_endpgm
1918 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1919 ; SI-GISEL: ; %bb.0:
1920 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
1921 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1922 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
1923 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
1924 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
1925 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1926 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
1927 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1928 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1929 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
1930 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1931 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1932 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
1933 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1934 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
1935 ; SI-GISEL-NEXT: v_mul_f32_e64 v2, -1.0, |v2|
1936 ; SI-GISEL-NEXT: v_mul_f32_e64 v3, -1.0, |v3|
1937 ; SI-GISEL-NEXT: v_mul_f32_e64 v4, -1.0, |v4|
1938 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
1939 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
1940 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1941 ; SI-GISEL-NEXT: s_endpgm
1943 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1945 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1946 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1947 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1948 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
1949 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
1950 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1951 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
1952 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
1953 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1954 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
1955 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
1956 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1957 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
1958 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1959 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
1960 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1961 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
1962 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1963 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
1964 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
1965 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1966 ; VI-SDAG-NEXT: v_med3_f32 v2, -|v7|, -|v2|, -|v3|
1967 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
1968 ; VI-SDAG-NEXT: s_endpgm
1970 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1971 ; VI-GISEL: ; %bb.0:
1972 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
1973 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
1974 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1975 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1976 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
1977 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1978 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1979 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
1980 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
1981 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
1982 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1983 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
1984 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
1985 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
1986 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
1987 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
1988 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1989 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
1990 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1991 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
1992 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
1993 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
1994 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
1995 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
1996 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1997 ; VI-GISEL-NEXT: v_mul_f32_e64 v4, -1.0, |v7|
1998 ; VI-GISEL-NEXT: v_mul_f32_e64 v2, -1.0, |v2|
1999 ; VI-GISEL-NEXT: v_mul_f32_e64 v3, -1.0, |v3|
2000 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
2001 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2002 ; VI-GISEL-NEXT: s_endpgm
2004 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2005 ; GFX9-SDAG: ; %bb.0:
2006 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2007 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2008 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2009 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
2010 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
2011 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
2012 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
2013 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
2014 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
2015 ; GFX9-SDAG-NEXT: v_med3_f32 v1, -|v1|, -|v2|, -|v3|
2016 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
2017 ; GFX9-SDAG-NEXT: s_endpgm
2019 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2020 ; GFX9-GISEL: ; %bb.0:
2021 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2022 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2023 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2024 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
2025 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
2026 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
2027 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
2028 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
2029 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
2030 ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
2031 ; GFX9-GISEL-NEXT: v_max_f32_e64 v2, -|v2|, -|v2|
2032 ; GFX9-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3|
2033 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
2034 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
2035 ; GFX9-GISEL-NEXT: s_endpgm
2037 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2038 ; GFX11-SDAG: ; %bb.0:
2039 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2040 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2041 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2042 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2043 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2044 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2045 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2046 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2047 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2048 ; GFX11-SDAG-NEXT: v_med3_f32 v1, -|v1|, -|v2|, -|v3|
2049 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
2050 ; GFX11-SDAG-NEXT: s_nop 0
2051 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2052 ; GFX11-SDAG-NEXT: s_endpgm
2054 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2055 ; GFX11-GISEL: ; %bb.0:
2056 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2057 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2058 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2059 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2060 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2061 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2062 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2063 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2064 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2065 ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
2066 ; GFX11-GISEL-NEXT: v_max_f32_e64 v2, -|v2|, -|v2|
2067 ; GFX11-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3|
2068 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2069 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
2070 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
2071 ; GFX11-GISEL-NEXT: s_nop 0
2072 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2073 ; GFX11-GISEL-NEXT: s_endpgm
2074 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2075 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2076 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2077 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2078 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2079 %a = load volatile float, ptr addrspace(1) %gep0
2080 %b = load volatile float, ptr addrspace(1) %gep1
2081 %c = load volatile float, ptr addrspace(1) %gep2
2083 %a.fabs = call float @llvm.fabs.f32(float %a)
2084 %a.fabs.fneg = fsub float -0.0, %a.fabs
2085 %b.fabs = call float @llvm.fabs.f32(float %b)
2086 %b.fabs.fneg = fsub float -0.0, %b.fabs
2087 %c.fabs = call float @llvm.fabs.f32(float %c)
2088 %c.fabs.fneg = fsub float -0.0, %c.fabs
2090 %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
2091 %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
2092 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
2093 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2095 store float %med3, ptr addrspace(1) %outgep
2099 define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2100 ; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
2102 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2103 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
2104 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
2105 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2106 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
2107 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
2108 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
2109 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2110 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
2111 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
2112 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
2113 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2114 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2115 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2116 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2117 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2118 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2119 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
2120 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
2121 ; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3
2122 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4
2123 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
2124 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2125 ; SI-SDAG-NEXT: s_endpgm
2127 ; SI-GISEL-LABEL: v_nnan_inputs_med3_f32_pat0:
2128 ; SI-GISEL: ; %bb.0:
2129 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2130 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2131 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
2132 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
2133 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
2134 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2135 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
2136 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2137 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2138 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
2139 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2140 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2141 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
2142 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2143 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2144 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
2145 ; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3
2146 ; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4
2147 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
2148 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
2149 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2150 ; SI-GISEL-NEXT: s_endpgm
2152 ; VI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
2154 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2155 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2156 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2157 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
2158 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
2159 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2160 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
2161 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
2162 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2163 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
2164 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
2165 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2166 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
2167 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2168 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
2169 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2170 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
2171 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2172 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
2173 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
2174 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2175 ; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7
2176 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2
2177 ; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3
2178 ; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3
2179 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
2180 ; VI-SDAG-NEXT: s_endpgm
2182 ; VI-GISEL-LABEL: v_nnan_inputs_med3_f32_pat0:
2183 ; VI-GISEL: ; %bb.0:
2184 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2185 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2186 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2187 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
2188 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
2189 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2190 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2191 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
2192 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
2193 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
2194 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2195 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
2196 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
2197 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
2198 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2199 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
2200 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2201 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
2202 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2203 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
2204 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2205 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
2206 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
2207 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2208 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2209 ; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7
2210 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2
2211 ; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3
2212 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
2213 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2214 ; VI-GISEL-NEXT: s_endpgm
2216 ; GFX9-LABEL: v_nnan_inputs_med3_f32_pat0:
2218 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2219 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2220 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2221 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
2222 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2223 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
2224 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2225 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
2226 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2227 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
2228 ; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2
2229 ; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3
2230 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
2231 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
2232 ; GFX9-NEXT: s_endpgm
2234 ; GFX11-LABEL: v_nnan_inputs_med3_f32_pat0:
2236 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2237 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2238 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2239 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2240 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2241 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2242 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2243 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2244 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2245 ; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
2246 ; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3
2247 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2248 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
2249 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
2250 ; GFX11-NEXT: s_nop 0
2251 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2252 ; GFX11-NEXT: s_endpgm
2253 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2254 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2255 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2256 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2257 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2258 %a = load volatile float, ptr addrspace(1) %gep0
2259 %b = load volatile float, ptr addrspace(1) %gep1
2260 %c = load volatile float, ptr addrspace(1) %gep2
2262 %a.nnan = fadd nnan float %a, 1.0
2263 %b.nnan = fadd nnan float %b, 2.0
2264 %c.nnan = fadd nnan float %c, 4.0
2266 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
2267 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
2268 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
2269 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2270 store float %med3, ptr addrspace(1) %outgep
2274 define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2275 ; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
2277 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2278 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
2279 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
2280 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2281 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
2282 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
2283 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
2284 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2285 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
2286 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
2287 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
2288 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2289 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2290 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2291 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2292 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2293 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2294 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
2295 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
2296 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2297 ; SI-SDAG-NEXT: s_endpgm
2299 ; SI-GISEL-LABEL: v_nnan_input_calls_med3_f32_pat0:
2300 ; SI-GISEL: ; %bb.0:
2301 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2302 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2303 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
2304 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
2305 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
2306 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2307 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
2308 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2309 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2310 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
2311 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2312 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2313 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
2314 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2315 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2316 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
2317 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
2318 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2319 ; SI-GISEL-NEXT: s_endpgm
2321 ; VI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
2323 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2324 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2325 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2326 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
2327 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
2328 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2329 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
2330 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
2331 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2332 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
2333 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
2334 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2335 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
2336 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2337 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
2338 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2339 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
2340 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2341 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
2342 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
2343 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2344 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
2345 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
2346 ; VI-SDAG-NEXT: s_endpgm
2348 ; VI-GISEL-LABEL: v_nnan_input_calls_med3_f32_pat0:
2349 ; VI-GISEL: ; %bb.0:
2350 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2351 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2352 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2353 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
2354 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
2355 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2356 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2357 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
2358 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
2359 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
2360 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2361 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
2362 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
2363 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
2364 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2365 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
2366 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2367 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
2368 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2369 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
2370 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2371 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
2372 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
2373 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2374 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2375 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
2376 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2377 ; VI-GISEL-NEXT: s_endpgm
2379 ; GFX9-LABEL: v_nnan_input_calls_med3_f32_pat0:
2381 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2382 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2383 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2384 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
2385 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2386 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
2387 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2388 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
2389 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2390 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
2391 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
2392 ; GFX9-NEXT: s_endpgm
2394 ; GFX11-LABEL: v_nnan_input_calls_med3_f32_pat0:
2396 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2397 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2398 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2399 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2400 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2401 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2402 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2403 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2404 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2405 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
2406 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
2407 ; GFX11-NEXT: s_nop 0
2408 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2409 ; GFX11-NEXT: s_endpgm
2410 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2411 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2412 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2413 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2414 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2415 %a = load volatile float, ptr addrspace(1) %gep0
2416 %b = load volatile float, ptr addrspace(1) %gep1
2417 %c = load volatile float, ptr addrspace(1) %gep2
2418 %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
2419 %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
2420 %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
2421 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2422 store float %med3, ptr addrspace(1) %outgep
2426 define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2427 ; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
2429 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2430 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
2431 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
2432 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2433 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
2434 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
2435 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
2436 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2437 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
2438 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
2439 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
2440 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2441 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2442 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2443 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2444 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2445 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2446 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
2447 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
2448 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2449 ; SI-SDAG-NEXT: s_endpgm
2451 ; SI-GISEL-LABEL: v_nnan_call_med3_f32_pat0:
2452 ; SI-GISEL: ; %bb.0:
2453 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2454 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2455 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
2456 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
2457 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
2458 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2459 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
2460 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2461 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2462 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
2463 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2464 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2465 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
2466 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2467 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2468 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
2469 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
2470 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2471 ; SI-GISEL-NEXT: s_endpgm
2473 ; VI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
2475 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2476 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2477 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2478 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
2479 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
2480 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2481 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
2482 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
2483 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2484 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
2485 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
2486 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2487 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
2488 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2489 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
2490 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2491 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
2492 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2493 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
2494 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
2495 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2496 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
2497 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
2498 ; VI-SDAG-NEXT: s_endpgm
2500 ; VI-GISEL-LABEL: v_nnan_call_med3_f32_pat0:
2501 ; VI-GISEL: ; %bb.0:
2502 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2503 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2504 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2505 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
2506 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
2507 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2508 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2509 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
2510 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
2511 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
2512 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2513 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
2514 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
2515 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
2516 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2517 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
2518 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2519 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
2520 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2521 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
2522 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2523 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
2524 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
2525 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2526 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2527 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
2528 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2529 ; VI-GISEL-NEXT: s_endpgm
2531 ; GFX9-LABEL: v_nnan_call_med3_f32_pat0:
2533 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2534 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2535 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2536 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
2537 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2538 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
2539 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2540 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
2541 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2542 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
2543 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
2544 ; GFX9-NEXT: s_endpgm
2546 ; GFX11-LABEL: v_nnan_call_med3_f32_pat0:
2548 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2549 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2550 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2551 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2552 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2553 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2554 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2555 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2556 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2557 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
2558 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
2559 ; GFX11-NEXT: s_nop 0
2560 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2561 ; GFX11-NEXT: s_endpgm
2562 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2563 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2564 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2565 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2566 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2567 %a = load volatile float, ptr addrspace(1) %gep0
2568 %b = load volatile float, ptr addrspace(1) %gep1
2569 %c = load volatile float, ptr addrspace(1) %gep2
2570 %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
2571 %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
2572 %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
2573 %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2574 store float %med3, ptr addrspace(1) %outgep
2578 define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2579 ; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
2581 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2582 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
2583 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
2584 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2585 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
2586 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
2587 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
2588 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2589 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
2590 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
2591 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
2592 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2593 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2594 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2595 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2596 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2597 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2598 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
2599 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
2600 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2601 ; SI-SDAG-NEXT: s_endpgm
2603 ; SI-GISEL-LABEL: v_fast_call_med3_f32_pat0:
2604 ; SI-GISEL: ; %bb.0:
2605 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2606 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2607 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
2608 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
2609 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
2610 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2611 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
2612 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2613 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2614 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
2615 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2616 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2617 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
2618 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2619 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2620 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
2621 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
2622 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2623 ; SI-GISEL-NEXT: s_endpgm
2625 ; VI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
2627 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2628 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2629 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2630 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
2631 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
2632 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2633 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
2634 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
2635 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2636 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
2637 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
2638 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2639 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
2640 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2641 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
2642 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2643 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
2644 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2645 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
2646 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
2647 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2648 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
2649 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
2650 ; VI-SDAG-NEXT: s_endpgm
2652 ; VI-GISEL-LABEL: v_fast_call_med3_f32_pat0:
2653 ; VI-GISEL: ; %bb.0:
2654 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2655 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2656 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2657 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
2658 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
2659 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2660 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2661 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
2662 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
2663 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
2664 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2665 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
2666 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
2667 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
2668 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2669 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
2670 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2671 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
2672 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2673 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
2674 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2675 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
2676 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
2677 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2678 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2679 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
2680 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2681 ; VI-GISEL-NEXT: s_endpgm
2683 ; GFX9-LABEL: v_fast_call_med3_f32_pat0:
2685 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2686 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2687 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2688 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
2689 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2690 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
2691 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2692 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
2693 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2694 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
2695 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
2696 ; GFX9-NEXT: s_endpgm
2698 ; GFX11-LABEL: v_fast_call_med3_f32_pat0:
2700 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2701 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2702 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2703 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2704 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2705 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2706 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2707 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2708 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2709 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
2710 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
2711 ; GFX11-NEXT: s_nop 0
2712 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2713 ; GFX11-NEXT: s_endpgm
2714 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2715 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2716 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2717 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2718 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2719 %a = load volatile float, ptr addrspace(1) %gep0
2720 %b = load volatile float, ptr addrspace(1) %gep1
2721 %c = load volatile float, ptr addrspace(1) %gep2
2722 %tmp0 = call fast float @llvm.minnum.f32(float %a, float %b)
2723 %tmp1 = call fast float @llvm.maxnum.f32(float %a, float %b)
2724 %tmp2 = call fast float @llvm.minnum.f32(float %tmp1, float %c)
2725 %med3 = call fast float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2726 store float %med3, ptr addrspace(1) %outgep
2732 ; 0: max(min(x, y), min(max(x, y), z))
2733 ; 1: max(min(x, y), min(max(y, x), z))
2734 ; 2: max(min(x, y), min(z, max(x, y)))
2735 ; 3: max(min(x, y), min(z, max(y, x)))
2736 ; 4: max(min(y, x), min(max(x, y), z))
2737 ; 5: max(min(y, x), min(max(y, x), z))
2738 ; 6: max(min(y, x), min(z, max(x, y)))
2739 ; 7: max(min(y, x), min(z, max(y, x)))
2740 ; + commute outermost max
2742 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
2743 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
2745 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2746 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
2747 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
2748 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2749 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
2750 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
2751 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
2752 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2753 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
2754 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
2755 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
2756 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2757 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2758 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2759 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2760 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2761 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2762 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
2763 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
2764 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2765 ; SI-SDAG-NEXT: s_endpgm
2767 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0:
2768 ; SI-GISEL: ; %bb.0:
2769 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2770 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2771 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
2772 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
2773 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
2774 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2775 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
2776 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2777 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2778 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
2779 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2780 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2781 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
2782 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2783 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2784 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
2785 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
2786 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2787 ; SI-GISEL-NEXT: s_endpgm
2789 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
2791 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2792 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2793 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2794 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
2795 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
2796 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2797 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
2798 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
2799 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2800 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
2801 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
2802 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2803 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
2804 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2805 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
2806 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2807 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
2808 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2809 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
2810 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
2811 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2812 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
2813 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
2814 ; VI-SDAG-NEXT: s_endpgm
2816 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0:
2817 ; VI-GISEL: ; %bb.0:
2818 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2819 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2820 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2821 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
2822 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
2823 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2824 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2825 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
2826 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
2827 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
2828 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2829 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
2830 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
2831 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
2832 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2833 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
2834 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2835 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
2836 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2837 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
2838 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2839 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
2840 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
2841 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2842 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2843 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
2844 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2845 ; VI-GISEL-NEXT: s_endpgm
2847 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0:
2849 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2850 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2851 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2852 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
2853 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2854 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
2855 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2856 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
2857 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2858 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
2859 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
2860 ; GFX9-NEXT: s_endpgm
2862 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat0:
2864 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
2865 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2866 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2867 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
2868 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2869 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
2870 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2871 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
2872 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2873 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
2874 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
2875 ; GFX11-NEXT: s_nop 0
2876 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2877 ; GFX11-NEXT: s_endpgm
2878 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2879 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2880 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2881 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2882 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2883 %a = load volatile float, ptr addrspace(1) %gep0
2884 %b = load volatile float, ptr addrspace(1) %gep1
2885 %c = load volatile float, ptr addrspace(1) %gep2
2886 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
2887 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
2888 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
2889 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2890 store float %med3, ptr addrspace(1) %outgep
2894 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
2895 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
2897 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2898 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
2899 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
2900 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2901 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
2902 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
2903 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
2904 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2905 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
2906 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
2907 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
2908 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2909 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2910 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2911 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2912 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2913 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
2914 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
2915 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
2916 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2917 ; SI-SDAG-NEXT: s_endpgm
2919 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1:
2920 ; SI-GISEL: ; %bb.0:
2921 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
2922 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2923 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
2924 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
2925 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
2926 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2927 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
2928 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2929 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2930 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
2931 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2932 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2933 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
2934 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2935 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
2936 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
2937 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
2938 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2939 ; SI-GISEL-NEXT: s_endpgm
2941 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
2943 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2944 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2945 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2946 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
2947 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
2948 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2949 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
2950 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
2951 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2952 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
2953 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
2954 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2955 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
2956 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2957 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
2958 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2959 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
2960 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
2961 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
2962 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
2963 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2964 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
2965 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
2966 ; VI-SDAG-NEXT: s_endpgm
2968 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1:
2969 ; VI-GISEL: ; %bb.0:
2970 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
2971 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
2972 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2973 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
2974 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
2975 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2976 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2977 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
2978 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
2979 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
2980 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2981 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
2982 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
2983 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
2984 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
2985 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
2986 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2987 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
2988 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2989 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
2990 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
2991 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
2992 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
2993 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2994 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2995 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
2996 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
2997 ; VI-GISEL-NEXT: s_endpgm
2999 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat1:
3001 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3002 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3003 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3004 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
3005 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3006 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
3007 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3008 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
3009 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3010 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
3011 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
3012 ; GFX9-NEXT: s_endpgm
3014 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat1:
3016 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3017 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3018 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3019 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3020 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3021 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3022 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3023 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3024 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3025 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
3026 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
3027 ; GFX11-NEXT: s_nop 0
3028 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3029 ; GFX11-NEXT: s_endpgm
3030 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3031 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3032 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3033 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3034 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3035 %a = load volatile float, ptr addrspace(1) %gep0
3036 %b = load volatile float, ptr addrspace(1) %gep1
3037 %c = load volatile float, ptr addrspace(1) %gep2
3038 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
3039 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3040 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
3041 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3042 store float %med3, ptr addrspace(1) %outgep
3046 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3047 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3049 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3050 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
3051 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
3052 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3053 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
3054 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
3055 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
3056 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3057 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
3058 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
3059 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
3060 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3061 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3062 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3063 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3064 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3065 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3066 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
3067 ; SI-SDAG-NEXT: v_med3_f32 v2, -v2, v3, v4
3068 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3069 ; SI-SDAG-NEXT: s_endpgm
3071 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3072 ; SI-GISEL: ; %bb.0:
3073 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3074 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3075 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
3076 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
3077 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
3078 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3079 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
3080 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3081 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3082 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
3083 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3084 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3085 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
3086 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3087 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3088 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2
3089 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
3090 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
3091 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3092 ; SI-GISEL-NEXT: s_endpgm
3094 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3096 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3097 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3098 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3099 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
3100 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
3101 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3102 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
3103 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
3104 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3105 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
3106 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
3107 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3108 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
3109 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3110 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
3111 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3112 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
3113 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3114 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
3115 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
3116 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3117 ; VI-SDAG-NEXT: v_med3_f32 v2, -v7, v2, v3
3118 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
3119 ; VI-SDAG-NEXT: s_endpgm
3121 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3122 ; VI-GISEL: ; %bb.0:
3123 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3124 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3125 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3126 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
3127 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
3128 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3129 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3130 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
3131 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
3132 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
3133 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3134 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
3135 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
3136 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3137 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3138 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
3139 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3140 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
3141 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3142 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
3143 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3144 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
3145 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
3146 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3147 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3148 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7
3149 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
3150 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
3151 ; VI-GISEL-NEXT: s_endpgm
3153 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3154 ; GFX9-SDAG: ; %bb.0:
3155 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3156 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3157 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3158 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
3159 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
3160 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
3161 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
3162 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
3163 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
3164 ; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3
3165 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
3166 ; GFX9-SDAG-NEXT: s_endpgm
3168 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3169 ; GFX9-GISEL: ; %bb.0:
3170 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3171 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3172 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3173 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
3174 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
3175 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
3176 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
3177 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
3178 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
3179 ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
3180 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
3181 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
3182 ; GFX9-GISEL-NEXT: s_endpgm
3184 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3185 ; GFX11-SDAG: ; %bb.0:
3186 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3187 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3188 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3189 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3190 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
3191 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3192 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
3193 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3194 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
3195 ; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3
3196 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
3197 ; GFX11-SDAG-NEXT: s_nop 0
3198 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3199 ; GFX11-SDAG-NEXT: s_endpgm
3201 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3202 ; GFX11-GISEL: ; %bb.0:
3203 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3204 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3205 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3206 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3207 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
3208 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3209 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
3210 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3211 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
3212 ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
3213 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
3214 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
3215 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
3216 ; GFX11-GISEL-NEXT: s_nop 0
3217 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3218 ; GFX11-GISEL-NEXT: s_endpgm
3219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3220 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3221 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3222 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3223 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3224 %a = load volatile float, ptr addrspace(1) %gep0
3225 %b = load volatile float, ptr addrspace(1) %gep1
3226 %c = load volatile float, ptr addrspace(1) %gep2
3227 %a.fneg = fsub float -0.0, %a
3228 %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
3229 %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b)
3230 %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
3231 %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
3232 store float %med3, ptr addrspace(1) %outgep
3236 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3237 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
3239 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3240 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
3241 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
3242 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3243 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
3244 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
3245 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
3246 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3247 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
3248 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
3249 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
3250 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3251 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3252 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3253 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3254 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3255 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3256 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
3257 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
3258 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3259 ; SI-SDAG-NEXT: s_endpgm
3261 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2:
3262 ; SI-GISEL: ; %bb.0:
3263 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3264 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3265 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
3266 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
3267 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
3268 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3269 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
3270 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3271 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3272 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
3273 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3274 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3275 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
3276 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3277 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3278 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
3279 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
3280 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3281 ; SI-GISEL-NEXT: s_endpgm
3283 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
3285 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3286 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3287 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3288 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
3289 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
3290 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3291 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
3292 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
3293 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3294 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
3295 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
3296 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3297 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
3298 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3299 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
3300 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3301 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
3302 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3303 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
3304 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
3305 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3306 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
3307 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
3308 ; VI-SDAG-NEXT: s_endpgm
3310 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2:
3311 ; VI-GISEL: ; %bb.0:
3312 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3313 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3314 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3315 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
3316 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
3317 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3318 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3319 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
3320 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
3321 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
3322 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3323 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
3324 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
3325 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3326 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3327 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
3328 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3329 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
3330 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3331 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
3332 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3333 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
3334 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
3335 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3336 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3337 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
3338 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
3339 ; VI-GISEL-NEXT: s_endpgm
3341 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat2:
3343 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3344 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3345 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3346 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
3347 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3348 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
3349 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3350 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
3351 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3352 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
3353 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
3354 ; GFX9-NEXT: s_endpgm
3356 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat2:
3358 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3359 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3360 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3361 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3362 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3363 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3364 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3365 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3366 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3367 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
3368 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
3369 ; GFX11-NEXT: s_nop 0
3370 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3371 ; GFX11-NEXT: s_endpgm
3372 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3373 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3374 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3375 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3376 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3377 %a = load volatile float, ptr addrspace(1) %gep0
3378 %b = load volatile float, ptr addrspace(1) %gep1
3379 %c = load volatile float, ptr addrspace(1) %gep2
3380 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
3381 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
3382 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3383 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3384 store float %med3, ptr addrspace(1) %outgep
3388 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3389 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
3391 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3392 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
3393 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
3394 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3395 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
3396 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
3397 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
3398 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3399 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
3400 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
3401 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
3402 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3403 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3404 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3405 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3406 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3407 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3408 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
3409 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
3410 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3411 ; SI-SDAG-NEXT: s_endpgm
3413 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3:
3414 ; SI-GISEL: ; %bb.0:
3415 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3416 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3417 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
3418 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
3419 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
3420 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3421 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
3422 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3423 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3424 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
3425 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3426 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3427 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
3428 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3429 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3430 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
3431 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
3432 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3433 ; SI-GISEL-NEXT: s_endpgm
3435 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
3437 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3438 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3439 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3440 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
3441 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
3442 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3443 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
3444 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
3445 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3446 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
3447 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
3448 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3449 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
3450 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3451 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
3452 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3453 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
3454 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3455 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
3456 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
3457 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3458 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
3459 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
3460 ; VI-SDAG-NEXT: s_endpgm
3462 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3:
3463 ; VI-GISEL: ; %bb.0:
3464 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3465 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3466 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3467 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
3468 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
3469 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3470 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3471 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
3472 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
3473 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
3474 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3475 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
3476 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
3477 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3478 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3479 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
3480 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3481 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
3482 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3483 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
3484 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3485 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
3486 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
3487 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3488 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3489 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
3490 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
3491 ; VI-GISEL-NEXT: s_endpgm
3493 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat3:
3495 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3496 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3497 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3498 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
3499 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3500 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
3501 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3502 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
3503 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3504 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
3505 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
3506 ; GFX9-NEXT: s_endpgm
3508 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat3:
3510 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3511 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3512 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3513 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3514 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3515 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3516 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3517 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3518 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3519 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
3520 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
3521 ; GFX11-NEXT: s_nop 0
3522 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3523 ; GFX11-NEXT: s_endpgm
3524 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3525 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3526 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3527 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3528 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3529 %a = load volatile float, ptr addrspace(1) %gep0
3530 %b = load volatile float, ptr addrspace(1) %gep1
3531 %c = load volatile float, ptr addrspace(1) %gep2
3532 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
3533 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3534 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3535 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3536 store float %med3, ptr addrspace(1) %outgep
3540 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3541 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
3543 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3544 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
3545 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
3546 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3547 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
3548 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
3549 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
3550 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3551 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
3552 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
3553 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
3554 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3555 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3556 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3557 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3558 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3559 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3560 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
3561 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
3562 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3563 ; SI-SDAG-NEXT: s_endpgm
3565 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4:
3566 ; SI-GISEL: ; %bb.0:
3567 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3568 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3569 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
3570 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
3571 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
3572 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3573 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
3574 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3575 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3576 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
3577 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3578 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3579 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
3580 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3581 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3582 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
3583 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
3584 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3585 ; SI-GISEL-NEXT: s_endpgm
3587 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
3589 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3590 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3591 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3592 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
3593 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
3594 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3595 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
3596 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
3597 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3598 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
3599 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
3600 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3601 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
3602 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3603 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
3604 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3605 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
3606 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3607 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
3608 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
3609 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3610 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
3611 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
3612 ; VI-SDAG-NEXT: s_endpgm
3614 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4:
3615 ; VI-GISEL: ; %bb.0:
3616 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3617 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3618 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3619 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
3620 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
3621 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3622 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3623 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
3624 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
3625 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
3626 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3627 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
3628 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
3629 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3630 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3631 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
3632 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3633 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
3634 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3635 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
3636 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3637 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
3638 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
3639 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3640 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3641 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
3642 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
3643 ; VI-GISEL-NEXT: s_endpgm
3645 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat4:
3647 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3648 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3649 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3650 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
3651 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3652 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
3653 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3654 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
3655 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3656 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
3657 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
3658 ; GFX9-NEXT: s_endpgm
3660 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat4:
3662 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3663 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3664 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3665 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3666 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3667 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3668 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3669 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3670 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3671 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
3672 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
3673 ; GFX11-NEXT: s_nop 0
3674 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3675 ; GFX11-NEXT: s_endpgm
3676 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3677 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3678 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3679 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3680 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3681 %a = load volatile float, ptr addrspace(1) %gep0
3682 %b = load volatile float, ptr addrspace(1) %gep1
3683 %c = load volatile float, ptr addrspace(1) %gep2
3684 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
3685 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3686 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3687 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3688 store float %med3, ptr addrspace(1) %outgep
3692 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3693 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
3695 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3696 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
3697 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
3698 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3699 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
3700 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
3701 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
3702 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3703 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
3704 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
3705 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
3706 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3707 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3708 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3709 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3710 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3711 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3712 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
3713 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
3714 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3715 ; SI-SDAG-NEXT: s_endpgm
3717 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5:
3718 ; SI-GISEL: ; %bb.0:
3719 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3720 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3721 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
3722 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
3723 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
3724 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3725 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
3726 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3727 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3728 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
3729 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3730 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3731 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
3732 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3733 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3734 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
3735 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
3736 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3737 ; SI-GISEL-NEXT: s_endpgm
3739 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
3741 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3742 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3743 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3744 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
3745 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
3746 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3747 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
3748 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
3749 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3750 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
3751 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
3752 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3753 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
3754 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3755 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
3756 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3757 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
3758 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3759 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
3760 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
3761 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3762 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
3763 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
3764 ; VI-SDAG-NEXT: s_endpgm
3766 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5:
3767 ; VI-GISEL: ; %bb.0:
3768 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3769 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3770 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3771 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
3772 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
3773 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3774 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3775 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
3776 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
3777 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
3778 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3779 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
3780 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
3781 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3782 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3783 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
3784 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3785 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
3786 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3787 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
3788 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3789 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
3790 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
3791 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3792 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3793 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
3794 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
3795 ; VI-GISEL-NEXT: s_endpgm
3797 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat5:
3799 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3800 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3801 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3802 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
3803 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3804 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
3805 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3806 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
3807 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3808 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
3809 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
3810 ; GFX9-NEXT: s_endpgm
3812 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat5:
3814 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3815 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3816 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3817 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3818 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3819 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3820 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3821 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3822 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3823 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
3824 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
3825 ; GFX11-NEXT: s_nop 0
3826 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3827 ; GFX11-NEXT: s_endpgm
3828 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3829 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3830 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3831 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3832 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3833 %a = load volatile float, ptr addrspace(1) %gep0
3834 %b = load volatile float, ptr addrspace(1) %gep1
3835 %c = load volatile float, ptr addrspace(1) %gep2
3836 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
3837 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3838 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
3839 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3840 store float %med3, ptr addrspace(1) %outgep
3844 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3845 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
3847 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3848 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
3849 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
3850 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3851 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
3852 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
3853 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
3854 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3855 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
3856 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
3857 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
3858 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3859 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3860 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3861 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3862 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3863 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
3864 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
3865 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
3866 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3867 ; SI-SDAG-NEXT: s_endpgm
3869 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6:
3870 ; SI-GISEL: ; %bb.0:
3871 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
3872 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3873 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
3874 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
3875 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
3876 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3877 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
3878 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3879 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3880 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
3881 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3882 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3883 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
3884 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3885 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
3886 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
3887 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
3888 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3889 ; SI-GISEL-NEXT: s_endpgm
3891 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
3893 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3894 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3895 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
3896 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
3897 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
3898 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3899 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
3900 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
3901 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3902 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
3903 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
3904 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3905 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
3906 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3907 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
3908 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3909 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
3910 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
3911 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
3912 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
3913 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3914 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
3915 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
3916 ; VI-SDAG-NEXT: s_endpgm
3918 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6:
3919 ; VI-GISEL: ; %bb.0:
3920 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3921 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
3922 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
3923 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
3924 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
3925 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3926 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3927 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
3928 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
3929 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
3930 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3931 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
3932 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
3933 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3934 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
3935 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
3936 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3937 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
3938 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3939 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
3940 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
3941 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
3942 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
3943 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
3944 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3945 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
3946 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
3947 ; VI-GISEL-NEXT: s_endpgm
3949 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat6:
3951 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
3952 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3953 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3954 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
3955 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3956 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
3957 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3958 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
3959 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3960 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
3961 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
3962 ; GFX9-NEXT: s_endpgm
3964 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat6:
3966 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
3967 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3968 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3969 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
3970 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3971 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
3972 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3973 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
3974 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3975 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
3976 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
3977 ; GFX11-NEXT: s_nop 0
3978 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3979 ; GFX11-NEXT: s_endpgm
3980 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3981 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3982 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3983 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3984 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3985 %a = load volatile float, ptr addrspace(1) %gep0
3986 %b = load volatile float, ptr addrspace(1) %gep1
3987 %c = load volatile float, ptr addrspace(1) %gep2
3988 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
3989 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
3990 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3991 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3992 store float %med3, ptr addrspace(1) %outgep
3996 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3997 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
3999 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4000 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4001 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4002 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4003 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4004 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4005 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4006 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4007 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4008 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4009 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4010 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4011 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4012 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4013 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4014 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4015 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4016 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4017 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
4018 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4019 ; SI-SDAG-NEXT: s_endpgm
4021 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7:
4022 ; SI-GISEL: ; %bb.0:
4023 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4024 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4025 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4026 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4027 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4028 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4029 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4030 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4031 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4032 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4033 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4034 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4035 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4036 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4037 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4038 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
4039 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4040 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4041 ; SI-GISEL-NEXT: s_endpgm
4043 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
4045 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4046 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4047 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4048 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4049 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4050 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4051 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4052 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4053 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4054 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4055 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4056 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4057 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4058 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4059 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4060 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4061 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4062 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4063 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4064 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4065 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4066 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
4067 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4068 ; VI-SDAG-NEXT: s_endpgm
4070 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7:
4071 ; VI-GISEL: ; %bb.0:
4072 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4073 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4074 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4075 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4076 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4077 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4078 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4079 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4080 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4081 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4082 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4083 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4084 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4085 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4086 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4087 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
4088 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4089 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
4090 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4091 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
4092 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4093 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4094 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4095 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4096 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4097 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
4098 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
4099 ; VI-GISEL-NEXT: s_endpgm
4101 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat7:
4103 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4104 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4105 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4106 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
4107 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4108 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
4109 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4110 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
4111 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4112 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
4113 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
4114 ; GFX9-NEXT: s_endpgm
4116 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat7:
4118 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
4119 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4120 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4121 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
4122 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4123 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
4124 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4125 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
4126 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4127 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
4128 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
4129 ; GFX11-NEXT: s_nop 0
4130 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4131 ; GFX11-NEXT: s_endpgm
4132 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4133 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4134 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4135 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4136 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4137 %a = load volatile float, ptr addrspace(1) %gep0
4138 %b = load volatile float, ptr addrspace(1) %gep1
4139 %c = load volatile float, ptr addrspace(1) %gep2
4140 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
4141 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4142 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4143 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
4144 store float %med3, ptr addrspace(1) %outgep
4148 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4149 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
4151 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4152 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4153 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4154 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4155 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4156 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4157 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4158 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4159 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4160 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4161 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4162 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4163 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4164 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4165 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4166 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4167 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4168 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4169 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
4170 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4171 ; SI-SDAG-NEXT: s_endpgm
4173 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8:
4174 ; SI-GISEL: ; %bb.0:
4175 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4176 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4177 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4178 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4179 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4180 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4181 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4182 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4183 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4184 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4185 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4186 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4187 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4188 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4189 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4190 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
4191 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4192 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4193 ; SI-GISEL-NEXT: s_endpgm
4195 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
4197 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4198 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4199 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4200 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4201 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4202 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4203 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4204 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4205 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4206 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4207 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4208 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4209 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4210 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4211 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4212 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4213 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4214 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4215 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4216 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4217 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4218 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
4219 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4220 ; VI-SDAG-NEXT: s_endpgm
4222 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8:
4223 ; VI-GISEL: ; %bb.0:
4224 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4225 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4226 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4227 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4228 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4229 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4230 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4231 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4232 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4233 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4234 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4235 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4236 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4237 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4238 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4239 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
4240 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4241 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
4242 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4243 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
4244 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4245 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4246 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4247 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4248 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4249 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
4250 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
4251 ; VI-GISEL-NEXT: s_endpgm
4253 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat8:
4255 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4256 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4257 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4258 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
4259 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4260 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
4261 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4262 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
4263 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4264 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
4265 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
4266 ; GFX9-NEXT: s_endpgm
4268 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat8:
4270 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
4271 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4272 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4273 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
4274 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4275 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
4276 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4277 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
4278 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4279 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
4280 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
4281 ; GFX11-NEXT: s_nop 0
4282 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4283 ; GFX11-NEXT: s_endpgm
4284 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4285 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4286 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4287 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4288 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4289 %a = load volatile float, ptr addrspace(1) %gep0
4290 %b = load volatile float, ptr addrspace(1) %gep1
4291 %c = load volatile float, ptr addrspace(1) %gep2
4292 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4293 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
4294 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
4295 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4296 store float %med3, ptr addrspace(1) %outgep
4300 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4301 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
4303 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4304 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4305 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4306 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4307 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4308 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4309 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4310 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4311 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4312 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4313 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4314 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4315 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4316 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4317 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4318 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4319 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4320 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4321 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
4322 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4323 ; SI-SDAG-NEXT: s_endpgm
4325 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9:
4326 ; SI-GISEL: ; %bb.0:
4327 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4328 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4329 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4330 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4331 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4332 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4333 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4334 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4335 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4336 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4337 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4338 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4339 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4340 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4341 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4342 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
4343 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4344 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4345 ; SI-GISEL-NEXT: s_endpgm
4347 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
4349 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4350 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4351 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4352 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4353 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4354 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4355 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4356 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4357 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4358 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4359 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4360 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4361 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4362 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4363 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4364 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4365 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4366 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4367 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4368 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4369 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4370 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
4371 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4372 ; VI-SDAG-NEXT: s_endpgm
4374 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9:
4375 ; VI-GISEL: ; %bb.0:
4376 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4377 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4378 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4379 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4380 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4381 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4382 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4383 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4384 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4385 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4386 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4387 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4388 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4389 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4390 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4391 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
4392 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4393 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
4394 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4395 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
4396 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4397 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4398 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4399 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4400 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4401 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
4402 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
4403 ; VI-GISEL-NEXT: s_endpgm
4405 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat9:
4407 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4408 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4409 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4410 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
4411 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4412 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
4413 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4414 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
4415 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4416 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
4417 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
4418 ; GFX9-NEXT: s_endpgm
4420 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat9:
4422 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
4423 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4424 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4425 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
4426 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4427 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
4428 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4429 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
4430 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4431 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
4432 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
4433 ; GFX11-NEXT: s_nop 0
4434 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4435 ; GFX11-NEXT: s_endpgm
4436 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4437 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4438 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4439 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4440 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4441 %a = load volatile float, ptr addrspace(1) %gep0
4442 %b = load volatile float, ptr addrspace(1) %gep1
4443 %c = load volatile float, ptr addrspace(1) %gep2
4444 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4445 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4446 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
4447 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4448 store float %med3, ptr addrspace(1) %outgep
4452 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4453 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
4455 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4456 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4457 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4458 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4459 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4460 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4461 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4462 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4463 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4464 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4465 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4466 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4467 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4468 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4469 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4470 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4471 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4472 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4473 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
4474 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4475 ; SI-SDAG-NEXT: s_endpgm
4477 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10:
4478 ; SI-GISEL: ; %bb.0:
4479 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4480 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4481 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4482 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4483 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4484 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4485 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4486 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4487 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4488 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4489 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4490 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4491 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4492 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4493 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4494 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
4495 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4496 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4497 ; SI-GISEL-NEXT: s_endpgm
4499 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
4501 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4502 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4503 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4504 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4505 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4506 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4507 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4508 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4509 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4510 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4511 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4512 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4513 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4514 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4515 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4516 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4517 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4518 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4519 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4520 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4521 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4522 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
4523 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4524 ; VI-SDAG-NEXT: s_endpgm
4526 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10:
4527 ; VI-GISEL: ; %bb.0:
4528 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4529 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4530 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4531 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4532 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4533 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4534 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4535 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4536 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4537 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4538 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4539 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4540 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4541 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4542 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4543 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
4544 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4545 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
4546 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4547 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
4548 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4549 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4550 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4551 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4552 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4553 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
4554 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
4555 ; VI-GISEL-NEXT: s_endpgm
4557 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat10:
4559 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4560 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4561 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4562 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
4563 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4564 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
4565 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4566 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
4567 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4568 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
4569 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
4570 ; GFX9-NEXT: s_endpgm
4572 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat10:
4574 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
4575 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4576 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4577 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
4578 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4579 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
4580 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4581 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
4582 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4583 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
4584 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
4585 ; GFX11-NEXT: s_nop 0
4586 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4587 ; GFX11-NEXT: s_endpgm
4588 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4589 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4590 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4591 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4592 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4593 %a = load volatile float, ptr addrspace(1) %gep0
4594 %b = load volatile float, ptr addrspace(1) %gep1
4595 %c = load volatile float, ptr addrspace(1) %gep2
4596 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4597 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
4598 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4599 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4600 store float %med3, ptr addrspace(1) %outgep
4604 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4605 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
4607 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4608 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4609 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4610 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4611 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4612 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4613 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4614 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4615 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4616 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4617 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4618 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4619 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4620 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4621 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4622 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4623 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4624 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4625 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
4626 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4627 ; SI-SDAG-NEXT: s_endpgm
4629 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11:
4630 ; SI-GISEL: ; %bb.0:
4631 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4632 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4633 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4634 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4635 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4636 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4637 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4638 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4639 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4640 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4641 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4642 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4643 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4644 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4645 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4646 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
4647 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4648 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4649 ; SI-GISEL-NEXT: s_endpgm
4651 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
4653 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4654 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4655 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4656 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4657 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4658 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4659 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4660 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4661 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4662 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4663 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4664 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4665 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4666 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4667 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4668 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4669 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4670 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4671 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4672 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4673 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4674 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
4675 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4676 ; VI-SDAG-NEXT: s_endpgm
4678 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11:
4679 ; VI-GISEL: ; %bb.0:
4680 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4681 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4682 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4683 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4684 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4685 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4686 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4687 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4688 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4689 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4690 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4691 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4692 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4693 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4694 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4695 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
4696 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4697 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
4698 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4699 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
4700 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4701 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4702 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4703 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4704 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4705 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
4706 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
4707 ; VI-GISEL-NEXT: s_endpgm
4709 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat11:
4711 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4712 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4713 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4714 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
4715 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4716 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
4717 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4718 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
4719 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4720 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
4721 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
4722 ; GFX9-NEXT: s_endpgm
4724 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat11:
4726 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
4727 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4728 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4729 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
4730 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4731 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
4732 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4733 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
4734 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4735 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
4736 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
4737 ; GFX11-NEXT: s_nop 0
4738 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4739 ; GFX11-NEXT: s_endpgm
4740 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4741 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4742 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4743 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4744 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4745 %a = load volatile float, ptr addrspace(1) %gep0
4746 %b = load volatile float, ptr addrspace(1) %gep1
4747 %c = load volatile float, ptr addrspace(1) %gep2
4748 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4749 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4750 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4751 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4752 store float %med3, ptr addrspace(1) %outgep
4756 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4757 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
4759 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4760 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4761 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4762 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4763 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4764 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4765 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4766 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4767 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4768 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4769 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4770 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4771 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4772 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4773 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4774 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4775 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4776 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4777 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
4778 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4779 ; SI-SDAG-NEXT: s_endpgm
4781 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12:
4782 ; SI-GISEL: ; %bb.0:
4783 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4784 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4785 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4786 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4787 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4788 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4789 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4790 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4791 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4792 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4793 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4794 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4795 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4796 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4797 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4798 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
4799 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4800 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4801 ; SI-GISEL-NEXT: s_endpgm
4803 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
4805 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4806 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4807 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4808 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4809 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4810 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4811 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4812 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4813 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4814 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4815 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4816 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4817 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4818 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4819 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4820 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4821 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4822 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4823 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4824 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4825 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4826 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
4827 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4828 ; VI-SDAG-NEXT: s_endpgm
4830 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12:
4831 ; VI-GISEL: ; %bb.0:
4832 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4833 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4834 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4835 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4836 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4837 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4838 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4839 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4840 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4841 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4842 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4843 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4844 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4845 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4846 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4847 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
4848 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4849 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
4850 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4851 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
4852 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
4853 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4854 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4855 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4856 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4857 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
4858 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
4859 ; VI-GISEL-NEXT: s_endpgm
4861 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat12:
4863 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4864 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4865 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4866 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
4867 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4868 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
4869 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4870 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
4871 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4872 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
4873 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
4874 ; GFX9-NEXT: s_endpgm
4876 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat12:
4878 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
4879 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4880 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4881 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
4882 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4883 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
4884 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4885 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
4886 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4887 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
4888 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
4889 ; GFX11-NEXT: s_nop 0
4890 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4891 ; GFX11-NEXT: s_endpgm
4892 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4893 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4894 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4895 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4896 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4897 %a = load volatile float, ptr addrspace(1) %gep0
4898 %b = load volatile float, ptr addrspace(1) %gep1
4899 %c = load volatile float, ptr addrspace(1) %gep2
4900 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
4901 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4902 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4903 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4904 store float %med3, ptr addrspace(1) %outgep
4908 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4909 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
4911 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4912 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
4913 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
4914 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4915 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
4916 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
4917 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
4918 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4919 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
4920 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
4921 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
4922 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4923 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4924 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4925 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4926 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4927 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
4928 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
4929 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
4930 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4931 ; SI-SDAG-NEXT: s_endpgm
4933 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13:
4934 ; SI-GISEL: ; %bb.0:
4935 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
4936 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
4937 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
4938 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
4939 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
4940 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4941 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
4942 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4943 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4944 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
4945 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4946 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4947 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
4948 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4949 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
4950 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
4951 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
4952 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4953 ; SI-GISEL-NEXT: s_endpgm
4955 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
4957 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4958 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4959 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
4960 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
4961 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
4962 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4963 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
4964 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
4965 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4966 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
4967 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
4968 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4969 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
4970 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4971 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
4972 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4973 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
4974 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
4975 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
4976 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
4977 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4978 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
4979 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
4980 ; VI-SDAG-NEXT: s_endpgm
4982 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13:
4983 ; VI-GISEL: ; %bb.0:
4984 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
4985 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
4986 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
4987 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
4988 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
4989 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
4990 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4991 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
4992 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
4993 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
4994 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
4995 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
4996 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
4997 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
4998 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
4999 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
5000 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5001 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
5002 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5003 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
5004 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5005 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
5006 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
5007 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5008 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5009 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
5010 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5011 ; VI-GISEL-NEXT: s_endpgm
5013 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat13:
5015 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5016 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5017 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5018 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
5019 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5020 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
5021 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5022 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
5023 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5024 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
5025 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
5026 ; GFX9-NEXT: s_endpgm
5028 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat13:
5030 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5031 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5032 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5033 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5034 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5035 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5036 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5037 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5038 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5039 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
5040 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
5041 ; GFX11-NEXT: s_nop 0
5042 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5043 ; GFX11-NEXT: s_endpgm
5044 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5045 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5046 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5047 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5048 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5049 %a = load volatile float, ptr addrspace(1) %gep0
5050 %b = load volatile float, ptr addrspace(1) %gep1
5051 %c = load volatile float, ptr addrspace(1) %gep2
5052 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
5053 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
5054 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
5055 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
5056 store float %med3, ptr addrspace(1) %outgep
5060 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
5061 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
5063 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5064 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
5065 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
5066 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5067 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
5068 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
5069 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
5070 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5071 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
5072 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
5073 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
5074 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5075 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5076 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5077 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5078 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5079 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5080 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
5081 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
5082 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5083 ; SI-SDAG-NEXT: s_endpgm
5085 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14:
5086 ; SI-GISEL: ; %bb.0:
5087 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5088 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5089 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
5090 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
5091 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
5092 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5093 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
5094 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5095 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5096 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
5097 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5098 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5099 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
5100 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5101 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5102 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
5103 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
5104 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5105 ; SI-GISEL-NEXT: s_endpgm
5107 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
5109 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5110 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5111 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5112 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
5113 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
5114 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5115 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
5116 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
5117 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5118 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
5119 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
5120 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5121 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
5122 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5123 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
5124 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5125 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
5126 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5127 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
5128 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
5129 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5130 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
5131 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
5132 ; VI-SDAG-NEXT: s_endpgm
5134 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14:
5135 ; VI-GISEL: ; %bb.0:
5136 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5137 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5138 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5139 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
5140 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
5141 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5142 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5143 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
5144 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
5145 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
5146 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5147 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
5148 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
5149 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
5150 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5151 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
5152 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5153 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
5154 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5155 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
5156 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5157 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
5158 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
5159 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5160 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5161 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
5162 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5163 ; VI-GISEL-NEXT: s_endpgm
5165 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat14:
5167 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5168 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5169 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5170 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
5171 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5172 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
5173 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5174 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
5175 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5176 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
5177 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
5178 ; GFX9-NEXT: s_endpgm
5180 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat14:
5182 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5183 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5184 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5185 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5186 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5187 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5188 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5189 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5190 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5191 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
5192 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
5193 ; GFX11-NEXT: s_nop 0
5194 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5195 ; GFX11-NEXT: s_endpgm
5196 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5197 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5198 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5199 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5200 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5201 %a = load volatile float, ptr addrspace(1) %gep0
5202 %b = load volatile float, ptr addrspace(1) %gep1
5203 %c = load volatile float, ptr addrspace(1) %gep2
5204 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
5205 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
5206 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
5207 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
5208 store float %med3, ptr addrspace(1) %outgep
5212 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
5213 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
5215 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5216 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
5217 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
5218 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5219 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
5220 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
5221 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
5222 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5223 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
5224 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
5225 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
5226 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5227 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5228 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5229 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5230 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5231 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5232 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
5233 ; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4
5234 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5235 ; SI-SDAG-NEXT: s_endpgm
5237 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15:
5238 ; SI-GISEL: ; %bb.0:
5239 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5240 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5241 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
5242 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
5243 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
5244 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5245 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
5246 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5247 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5248 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
5249 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5250 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5251 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
5252 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5253 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5254 ; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4
5255 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
5256 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5257 ; SI-GISEL-NEXT: s_endpgm
5259 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
5261 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5262 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5263 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5264 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
5265 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
5266 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5267 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
5268 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
5269 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5270 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
5271 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
5272 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5273 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
5274 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5275 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
5276 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5277 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
5278 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5279 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
5280 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
5281 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5282 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3
5283 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
5284 ; VI-SDAG-NEXT: s_endpgm
5286 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15:
5287 ; VI-GISEL: ; %bb.0:
5288 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5289 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5290 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5291 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
5292 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
5293 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5294 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5295 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
5296 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
5297 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
5298 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5299 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
5300 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
5301 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
5302 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5303 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
5304 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5305 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
5306 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5307 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
5308 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5309 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
5310 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
5311 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5312 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5313 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3
5314 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5315 ; VI-GISEL-NEXT: s_endpgm
5317 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat15:
5319 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5320 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5321 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5322 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
5323 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5324 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
5325 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5326 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
5327 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5328 ; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3
5329 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
5330 ; GFX9-NEXT: s_endpgm
5332 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat15:
5334 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5335 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5336 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5337 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5338 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5339 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5340 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5341 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5342 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5343 ; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
5344 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
5345 ; GFX11-NEXT: s_nop 0
5346 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5347 ; GFX11-NEXT: s_endpgm
5348 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5349 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5350 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5351 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5352 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5353 %a = load volatile float, ptr addrspace(1) %gep0
5354 %b = load volatile float, ptr addrspace(1) %gep1
5355 %c = load volatile float, ptr addrspace(1) %gep2
5356 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
5357 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
5358 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
5359 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
5360 store float %med3, ptr addrspace(1) %outgep
5364 ; Also handle `min` at the root:
5365 ; min(max(x, y), max(min(x, y), z))
5367 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
5368 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
5370 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5371 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
5372 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
5373 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5374 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
5375 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
5376 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
5377 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5378 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
5379 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
5380 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
5381 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5382 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5383 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5384 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5385 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5386 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5387 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
5388 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
5389 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5390 ; SI-SDAG-NEXT: s_endpgm
5392 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16:
5393 ; SI-GISEL: ; %bb.0:
5394 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5395 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5396 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
5397 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
5398 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
5399 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5400 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
5401 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5402 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5403 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
5404 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5405 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5406 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
5407 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5408 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5409 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
5410 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
5411 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5412 ; SI-GISEL-NEXT: s_endpgm
5414 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
5416 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5417 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5418 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5419 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
5420 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
5421 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5422 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
5423 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
5424 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5425 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
5426 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
5427 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5428 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
5429 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5430 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
5431 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5432 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
5433 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5434 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
5435 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
5436 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5437 ; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3
5438 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
5439 ; VI-SDAG-NEXT: s_endpgm
5441 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16:
5442 ; VI-GISEL: ; %bb.0:
5443 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5444 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5445 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5446 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
5447 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
5448 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5449 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5450 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
5451 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
5452 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
5453 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5454 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
5455 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
5456 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
5457 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5458 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
5459 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5460 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
5461 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5462 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
5463 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5464 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
5465 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
5466 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5467 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5468 ; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3
5469 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5470 ; VI-GISEL-NEXT: s_endpgm
5472 ; GFX9-LABEL: v_test_global_nnans_med3_f32_pat16:
5474 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5475 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5476 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5477 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
5478 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5479 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
5480 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5481 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
5482 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5483 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
5484 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
5485 ; GFX9-NEXT: s_endpgm
5487 ; GFX11-LABEL: v_test_global_nnans_med3_f32_pat16:
5489 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5490 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5491 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5492 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5493 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5494 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5495 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5496 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5497 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5498 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
5499 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
5500 ; GFX11-NEXT: s_nop 0
5501 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5502 ; GFX11-NEXT: s_endpgm
5503 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5504 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5505 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5506 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5507 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5508 %a = load volatile float, ptr addrspace(1) %gep0
5509 %b = load volatile float, ptr addrspace(1) %gep1
5510 %c = load volatile float, ptr addrspace(1) %gep2
5511 %tmp0 = call float @llvm.maxnum.f32(float %a, float %b)
5512 %tmp1 = call float @llvm.minnum.f32(float %a, float %b)
5513 %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
5514 %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
5515 store float %med3, ptr addrspace(1) %outgep
5519 ; ---------------------------------------------------------------------
5521 ; ---------------------------------------------------------------------
5523 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
5524 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5526 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5527 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
5528 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
5529 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5530 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
5531 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
5532 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
5533 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5534 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
5535 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
5536 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
5537 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
5538 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5539 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5540 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5541 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5542 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5543 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5544 ; SI-SDAG-NEXT: s_mov_b32 s10, -1
5545 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
5546 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
5547 ; SI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
5548 ; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3
5549 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
5550 ; SI-SDAG-NEXT: buffer_store_dword v5, off, s[8:11], 0
5551 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5552 ; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
5553 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
5554 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5555 ; SI-SDAG-NEXT: s_endpgm
5557 ; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5558 ; SI-GISEL: ; %bb.0:
5559 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5560 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5561 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
5562 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
5563 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
5564 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5565 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
5566 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5567 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5568 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
5569 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5570 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5571 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
5572 ; SI-GISEL-NEXT: s_mov_b32 s3, s11
5573 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
5574 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5575 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5576 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
5577 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
5578 ; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3
5579 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
5580 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4
5581 ; SI-GISEL-NEXT: buffer_store_dword v5, off, s[0:3], 0
5582 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5583 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
5584 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
5585 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
5586 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5587 ; SI-GISEL-NEXT: s_endpgm
5589 ; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5591 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5592 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5593 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5594 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
5595 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
5596 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5597 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
5598 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
5599 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5600 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
5601 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
5602 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5603 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
5604 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5605 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
5606 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5607 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
5608 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5609 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
5610 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
5611 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5612 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
5613 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
5614 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
5615 ; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2
5616 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2
5617 ; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
5618 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
5619 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v5
5620 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5621 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
5622 ; VI-SDAG-NEXT: s_endpgm
5624 ; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5625 ; VI-GISEL: ; %bb.0:
5626 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5627 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5628 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5629 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
5630 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
5631 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5632 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5633 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
5634 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
5635 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
5636 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5637 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
5638 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
5639 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
5640 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5641 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
5642 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5643 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
5644 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5645 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
5646 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5647 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
5648 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
5649 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5650 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5651 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7
5652 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
5653 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
5654 ; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2
5655 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2
5656 ; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
5657 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
5658 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v5
5659 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5660 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5661 ; VI-GISEL-NEXT: s_endpgm
5663 ; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5665 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5666 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5667 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5668 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
5669 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5670 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
5671 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5672 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
5673 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5674 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
5675 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
5676 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
5677 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
5678 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
5679 ; GFX9-NEXT: global_store_dword v[0:1], v4, off
5680 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5681 ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
5682 ; GFX9-NEXT: v_max_f32_e32 v1, v4, v1
5683 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
5684 ; GFX9-NEXT: s_endpgm
5686 ; GFX11-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5688 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5689 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5690 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5691 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5692 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5693 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5694 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5695 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5696 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5697 ; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
5698 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5699 ; GFX11-NEXT: v_min_f32_e32 v4, v1, v2
5700 ; GFX11-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
5701 ; GFX11-NEXT: v_minmax_f32 v1, v1, v2, v4
5702 ; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
5703 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5704 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
5705 ; GFX11-NEXT: s_nop 0
5706 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5707 ; GFX11-NEXT: s_endpgm
5708 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5709 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5710 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5711 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5712 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5713 %a = load volatile float, ptr addrspace(1) %gep0
5714 %b = load volatile float, ptr addrspace(1) %gep1
5715 %c = load volatile float, ptr addrspace(1) %gep2
5716 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
5717 store volatile float %tmp0, ptr addrspace(1) undef
5718 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
5719 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
5720 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
5721 store float %med3, ptr addrspace(1) %outgep
5725 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
5726 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5728 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5729 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
5730 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
5731 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5732 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
5733 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
5734 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
5735 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5736 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
5737 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
5738 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
5739 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
5740 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5741 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5742 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5743 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5744 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5745 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5746 ; SI-SDAG-NEXT: s_mov_b32 s10, -1
5747 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
5748 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
5749 ; SI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
5750 ; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3
5751 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
5752 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0
5753 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
5754 ; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
5755 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
5756 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5757 ; SI-SDAG-NEXT: s_endpgm
5759 ; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5760 ; SI-GISEL: ; %bb.0:
5761 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5762 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5763 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
5764 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
5765 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
5766 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5767 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
5768 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5769 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5770 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
5771 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5772 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5773 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
5774 ; SI-GISEL-NEXT: s_mov_b32 s3, s11
5775 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
5776 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5777 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5778 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
5779 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
5780 ; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3
5781 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
5782 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4
5783 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0
5784 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
5785 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
5786 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
5787 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
5788 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5789 ; SI-GISEL-NEXT: s_endpgm
5791 ; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5793 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5794 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5795 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5796 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
5797 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
5798 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5799 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
5800 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
5801 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5802 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
5803 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
5804 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5805 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
5806 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5807 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
5808 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5809 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
5810 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5811 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
5812 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
5813 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5814 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
5815 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
5816 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
5817 ; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2
5818 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2
5819 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
5820 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
5821 ; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
5822 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
5823 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
5824 ; VI-SDAG-NEXT: s_endpgm
5826 ; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5827 ; VI-GISEL: ; %bb.0:
5828 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5829 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
5830 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5831 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
5832 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
5833 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5834 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5835 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
5836 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
5837 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
5838 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
5839 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
5840 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
5841 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
5842 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
5843 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
5844 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5845 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
5846 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5847 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
5848 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5849 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
5850 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
5851 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
5852 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5853 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7
5854 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
5855 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
5856 ; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2
5857 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2
5858 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5859 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
5860 ; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
5861 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
5862 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
5863 ; VI-GISEL-NEXT: s_endpgm
5865 ; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5867 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
5868 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5869 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5870 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
5871 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5872 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
5873 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5874 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
5875 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5876 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
5877 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
5878 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
5879 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
5880 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
5881 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
5882 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5883 ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
5884 ; GFX9-NEXT: v_max_f32_e32 v1, v4, v1
5885 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
5886 ; GFX9-NEXT: s_endpgm
5888 ; GFX11-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5889 ; GFX11-SDAG: ; %bb.0:
5890 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5891 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5892 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5893 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5894 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
5895 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5896 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
5897 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5898 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
5899 ; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
5900 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5901 ; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v4, v1, v2
5902 ; GFX11-SDAG-NEXT: v_min_f32_e32 v3, v4, v3
5903 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
5904 ; GFX11-SDAG-NEXT: v_minmax_f32 v1, v1, v2, v3
5905 ; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v4, off dlc
5906 ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
5907 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
5908 ; GFX11-SDAG-NEXT: s_nop 0
5909 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5910 ; GFX11-SDAG-NEXT: s_endpgm
5912 ; GFX11-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5913 ; GFX11-GISEL: ; %bb.0:
5914 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
5915 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5916 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5917 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
5918 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
5919 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
5920 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
5921 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
5922 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
5923 ; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
5924 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5925 ; GFX11-GISEL-NEXT: v_min_f32_e32 v4, v1, v2
5926 ; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
5927 ; GFX11-GISEL-NEXT: v_minmax_f32 v2, v1, v2, v4
5928 ; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v1, off dlc
5929 ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
5930 ; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1]
5931 ; GFX11-GISEL-NEXT: s_nop 0
5932 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5933 ; GFX11-GISEL-NEXT: s_endpgm
5934 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5935 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5936 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5937 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5938 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5939 %a = load volatile float, ptr addrspace(1) %gep0
5940 %b = load volatile float, ptr addrspace(1) %gep1
5941 %c = load volatile float, ptr addrspace(1) %gep2
5942 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
5943 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
5944 store volatile float %tmp1, ptr addrspace(1) undef
5945 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
5946 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
5947 store float %med3, ptr addrspace(1) %outgep
5951 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
5952 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
5954 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5955 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
5956 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
5957 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5958 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
5959 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
5960 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
5961 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
5962 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
5963 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
5964 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
5965 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
5966 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5967 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5968 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5969 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5970 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5971 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
5972 ; SI-SDAG-NEXT: s_mov_b32 s10, -1
5973 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
5974 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
5975 ; SI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
5976 ; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3
5977 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
5978 ; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
5979 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0
5980 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
5981 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
5982 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5983 ; SI-SDAG-NEXT: s_endpgm
5985 ; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
5986 ; SI-GISEL: ; %bb.0:
5987 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
5988 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
5989 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
5990 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
5991 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
5992 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
5993 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
5994 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5995 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5996 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
5997 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5998 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
5999 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
6000 ; SI-GISEL-NEXT: s_mov_b32 s3, s11
6001 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
6002 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6003 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6004 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
6005 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
6006 ; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3
6007 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
6008 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4
6009 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
6010 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0
6011 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
6012 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
6013 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6014 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6015 ; SI-GISEL-NEXT: s_endpgm
6017 ; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6019 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6020 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6021 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6022 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
6023 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
6024 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6025 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
6026 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
6027 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6028 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
6029 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
6030 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6031 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
6032 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6033 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
6034 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6035 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
6036 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6037 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
6038 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
6039 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6040 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
6041 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
6042 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
6043 ; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2
6044 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2
6045 ; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
6046 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6047 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6048 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
6049 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6050 ; VI-SDAG-NEXT: s_endpgm
6052 ; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6053 ; VI-GISEL: ; %bb.0:
6054 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6055 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6056 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6057 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
6058 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
6059 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6060 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6061 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
6062 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
6063 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
6064 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6065 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
6066 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
6067 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
6068 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6069 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
6070 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6071 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
6072 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6073 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
6074 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6075 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
6076 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
6077 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6078 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6079 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7
6080 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
6081 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
6082 ; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2
6083 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2
6084 ; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
6085 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6086 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6087 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
6088 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6089 ; VI-GISEL-NEXT: s_endpgm
6091 ; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6093 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6094 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6095 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6096 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
6097 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6098 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
6099 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6100 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
6101 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6102 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
6103 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
6104 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
6105 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
6106 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
6107 ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
6108 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
6109 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6110 ; GFX9-NEXT: v_max_f32_e32 v1, v4, v1
6111 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
6112 ; GFX9-NEXT: s_endpgm
6114 ; GFX11-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6116 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
6117 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6118 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6119 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
6120 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6121 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
6122 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6123 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
6124 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6125 ; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
6126 ; GFX11-NEXT: v_max_f32_e32 v3, v3, v3
6127 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6128 ; GFX11-NEXT: v_maxmin_f32 v3, v1, v2, v3
6129 ; GFX11-NEXT: v_minmax_f32 v1, v1, v2, v3
6130 ; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
6131 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
6132 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
6133 ; GFX11-NEXT: s_nop 0
6134 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6135 ; GFX11-NEXT: s_endpgm
6136 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6137 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6138 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6139 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6140 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6141 %a = load volatile float, ptr addrspace(1) %gep0
6142 %b = load volatile float, ptr addrspace(1) %gep1
6143 %c = load volatile float, ptr addrspace(1) %gep2
6144 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
6145 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
6146 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
6147 store volatile float %tmp2, ptr addrspace(1) undef
6148 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6149 store float %med3, ptr addrspace(1) %outgep
6153 define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6154 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
6156 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6157 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
6158 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
6159 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6160 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
6161 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
6162 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6163 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
6164 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
6165 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6166 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6167 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6168 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6169 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[6:7]
6170 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
6171 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[12:15], 0 addr64 glc
6172 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6173 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
6174 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
6175 ; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3
6176 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
6177 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4
6178 ; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
6179 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
6180 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6181 ; SI-SDAG-NEXT: s_endpgm
6183 ; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0:
6184 ; SI-GISEL: ; %bb.0:
6185 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6186 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6187 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
6188 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
6189 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
6190 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6191 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
6192 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6193 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6194 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
6195 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6196 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6197 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
6198 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6199 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6200 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
6201 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
6202 ; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3
6203 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
6204 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4
6205 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
6206 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
6207 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6208 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6209 ; SI-GISEL-NEXT: s_endpgm
6211 ; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
6213 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6214 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v0
6215 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6216 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
6217 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v4
6218 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6219 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
6220 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v4
6221 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6222 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
6223 ; VI-SDAG-NEXT: flat_load_dword v6, v[0:1] glc
6224 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6225 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
6226 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6227 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s6, v4
6228 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc
6229 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] glc
6230 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6231 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v4
6232 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
6233 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6234 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6
6235 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
6236 ; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2
6237 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2
6238 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
6239 ; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
6240 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
6241 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6242 ; VI-SDAG-NEXT: s_endpgm
6244 ; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0:
6245 ; VI-GISEL: ; %bb.0:
6246 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6247 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6248 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6249 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
6250 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
6251 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6252 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6253 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
6254 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
6255 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
6256 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6257 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
6258 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
6259 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
6260 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6261 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
6262 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6263 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v6
6264 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc
6265 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] glc
6266 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6267 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
6268 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
6269 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6270 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6271 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7
6272 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
6273 ; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2
6274 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2
6275 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
6276 ; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
6277 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
6278 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6279 ; VI-GISEL-NEXT: s_endpgm
6281 ; GFX9-LABEL: v_test_safe_med3_f32_pat0:
6283 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6284 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6285 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6286 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
6287 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6288 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
6289 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6290 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
6291 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6292 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
6293 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
6294 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
6295 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
6296 ; GFX9-NEXT: v_max_f32_e32 v2, v3, v3
6297 ; GFX9-NEXT: v_min_f32_e32 v1, v1, v2
6298 ; GFX9-NEXT: v_max_f32_e32 v1, v4, v1
6299 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
6300 ; GFX9-NEXT: s_endpgm
6302 ; GFX11-SDAG-LABEL: v_test_safe_med3_f32_pat0:
6303 ; GFX11-SDAG: ; %bb.0:
6304 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
6305 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6306 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6307 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
6308 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
6309 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
6310 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
6311 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
6312 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
6313 ; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
6314 ; GFX11-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
6315 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6316 ; GFX11-SDAG-NEXT: v_maxmin_f32 v3, v1, v2, v3
6317 ; GFX11-SDAG-NEXT: v_minmax_f32 v1, v1, v2, v3
6318 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
6319 ; GFX11-SDAG-NEXT: s_nop 0
6320 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6321 ; GFX11-SDAG-NEXT: s_endpgm
6323 ; GFX11-GISEL-LABEL: v_test_safe_med3_f32_pat0:
6324 ; GFX11-GISEL: ; %bb.0:
6325 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
6326 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6327 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6328 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
6329 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
6330 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
6331 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
6332 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
6333 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
6334 ; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
6335 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6336 ; GFX11-GISEL-NEXT: v_min_f32_e32 v4, v1, v2
6337 ; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
6338 ; GFX11-GISEL-NEXT: v_minmax_f32 v1, v1, v2, v4
6339 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
6340 ; GFX11-GISEL-NEXT: s_nop 0
6341 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6342 ; GFX11-GISEL-NEXT: s_endpgm
6343 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6344 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6345 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6346 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6347 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6348 %a = load volatile float, ptr addrspace(1) %gep0
6349 %b = load volatile float, ptr addrspace(1) %gep1
6350 %c = load volatile float, ptr addrspace(1) %gep2
6351 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
6352 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
6353 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
6354 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6355 store float %med3, ptr addrspace(1) %outgep
6359 define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6360 ; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6362 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6363 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
6364 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
6365 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6366 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
6367 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
6368 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
6369 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6370 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
6371 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
6372 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
6373 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6374 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6375 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6376 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6377 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6378 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6379 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
6380 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
6381 ; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3
6382 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4
6383 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
6384 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6385 ; SI-SDAG-NEXT: s_endpgm
6387 ; SI-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6388 ; SI-GISEL: ; %bb.0:
6389 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6390 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6391 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
6392 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
6393 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
6394 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6395 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
6396 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6397 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6398 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
6399 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6400 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6401 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
6402 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6403 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6404 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
6405 ; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3
6406 ; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4
6407 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
6408 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6409 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6410 ; SI-GISEL-NEXT: s_endpgm
6412 ; VI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6414 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6415 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6416 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6417 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
6418 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
6419 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6420 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
6421 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
6422 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6423 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
6424 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
6425 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6426 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
6427 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6428 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
6429 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6430 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
6431 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6432 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
6433 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
6434 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6435 ; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7
6436 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2
6437 ; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3
6438 ; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3
6439 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6440 ; VI-SDAG-NEXT: s_endpgm
6442 ; VI-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6443 ; VI-GISEL: ; %bb.0:
6444 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6445 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6446 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6447 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
6448 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
6449 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6450 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6451 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
6452 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
6453 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
6454 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6455 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
6456 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
6457 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
6458 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6459 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
6460 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6461 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
6462 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6463 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
6464 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6465 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
6466 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
6467 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6468 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6469 ; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7
6470 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2
6471 ; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3
6472 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
6473 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6474 ; VI-GISEL-NEXT: s_endpgm
6476 ; GFX9-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6478 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6479 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6480 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6481 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
6482 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6483 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
6484 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6485 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
6486 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6487 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
6488 ; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2
6489 ; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3
6490 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
6491 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
6492 ; GFX9-NEXT: s_endpgm
6494 ; GFX11-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6496 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
6497 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6498 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6499 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
6500 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6501 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
6502 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6503 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
6504 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6505 ; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
6506 ; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3
6507 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6508 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
6509 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
6510 ; GFX11-NEXT: s_nop 0
6511 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6512 ; GFX11-NEXT: s_endpgm
6513 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6514 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6515 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6516 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6517 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6518 %a = load volatile float, ptr addrspace(1) %gep0
6519 %b = load volatile float, ptr addrspace(1) %gep1
6520 %c = load volatile float, ptr addrspace(1) %gep2
6522 %a.nnan = fadd float %a, 1.0
6523 %b.nnan = fadd nnan float %b, 2.0
6524 %c.nnan = fadd nnan float %c, 4.0
6526 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
6527 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
6528 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
6529 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6530 store float %med3, ptr addrspace(1) %outgep
6534 define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6535 ; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6537 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6538 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
6539 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
6540 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6541 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
6542 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
6543 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
6544 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6545 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
6546 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
6547 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
6548 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6549 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6550 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6551 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6552 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6553 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6554 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
6555 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
6556 ; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3
6557 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4
6558 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
6559 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6560 ; SI-SDAG-NEXT: s_endpgm
6562 ; SI-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6563 ; SI-GISEL: ; %bb.0:
6564 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6565 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6566 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
6567 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
6568 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
6569 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6570 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
6571 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6572 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6573 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
6574 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6575 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6576 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
6577 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6578 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6579 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
6580 ; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3
6581 ; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4
6582 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
6583 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6584 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6585 ; SI-GISEL-NEXT: s_endpgm
6587 ; VI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6589 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6590 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6591 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6592 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
6593 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
6594 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6595 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
6596 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
6597 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6598 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
6599 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
6600 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6601 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
6602 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6603 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
6604 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6605 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
6606 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6607 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
6608 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
6609 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6610 ; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7
6611 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2
6612 ; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3
6613 ; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3
6614 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6615 ; VI-SDAG-NEXT: s_endpgm
6617 ; VI-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6618 ; VI-GISEL: ; %bb.0:
6619 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6620 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6621 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6622 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
6623 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
6624 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6625 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6626 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
6627 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
6628 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
6629 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6630 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
6631 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
6632 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
6633 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6634 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
6635 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6636 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
6637 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6638 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
6639 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6640 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
6641 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
6642 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6643 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6644 ; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7
6645 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2
6646 ; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3
6647 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
6648 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6649 ; VI-GISEL-NEXT: s_endpgm
6651 ; GFX9-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6653 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6654 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6655 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6656 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
6657 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6658 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
6659 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6660 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
6661 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6662 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
6663 ; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2
6664 ; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3
6665 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
6666 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
6667 ; GFX9-NEXT: s_endpgm
6669 ; GFX11-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6671 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
6672 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6673 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6674 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
6675 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6676 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
6677 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6678 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
6679 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6680 ; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
6681 ; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3
6682 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6683 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
6684 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
6685 ; GFX11-NEXT: s_nop 0
6686 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6687 ; GFX11-NEXT: s_endpgm
6688 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6689 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6690 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6691 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6692 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6693 %a = load volatile float, ptr addrspace(1) %gep0
6694 %b = load volatile float, ptr addrspace(1) %gep1
6695 %c = load volatile float, ptr addrspace(1) %gep2
6697 %a.nnan = fadd nnan float %a, 1.0
6698 %b.nnan = fadd float %b, 2.0
6699 %c.nnan = fadd nnan float %c, 4.0
6701 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
6702 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
6703 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
6704 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6705 store float %med3, ptr addrspace(1) %outgep
6709 define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6710 ; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6712 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6713 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
6714 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
6715 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6716 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
6717 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
6718 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
6719 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6720 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
6721 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
6722 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
6723 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6724 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6725 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6726 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6727 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6728 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6729 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
6730 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
6731 ; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3
6732 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4
6733 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
6734 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6735 ; SI-SDAG-NEXT: s_endpgm
6737 ; SI-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6738 ; SI-GISEL: ; %bb.0:
6739 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6740 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6741 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
6742 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
6743 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
6744 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6745 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
6746 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6747 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6748 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
6749 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6750 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6751 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
6752 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6753 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6754 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2
6755 ; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3
6756 ; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4
6757 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
6758 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6759 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6760 ; SI-GISEL-NEXT: s_endpgm
6762 ; VI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6764 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6765 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6766 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6767 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
6768 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
6769 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6770 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
6771 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
6772 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6773 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
6774 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
6775 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6776 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
6777 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6778 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
6779 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6780 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
6781 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6782 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
6783 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
6784 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6785 ; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7
6786 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2
6787 ; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3
6788 ; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3
6789 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6790 ; VI-SDAG-NEXT: s_endpgm
6792 ; VI-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6793 ; VI-GISEL: ; %bb.0:
6794 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6795 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6796 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6797 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
6798 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
6799 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6800 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6801 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
6802 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
6803 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
6804 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6805 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
6806 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
6807 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
6808 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6809 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
6810 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6811 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
6812 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6813 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
6814 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6815 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
6816 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
6817 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6818 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6819 ; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7
6820 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2
6821 ; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3
6822 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
6823 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6824 ; VI-GISEL-NEXT: s_endpgm
6826 ; GFX9-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6828 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6829 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6830 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6831 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
6832 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6833 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
6834 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6835 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
6836 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6837 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
6838 ; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2
6839 ; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3
6840 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
6841 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
6842 ; GFX9-NEXT: s_endpgm
6844 ; GFX11-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6846 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
6847 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6848 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6849 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
6850 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6851 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
6852 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6853 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
6854 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6855 ; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
6856 ; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3
6857 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6858 ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
6859 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
6860 ; GFX11-NEXT: s_nop 0
6861 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
6862 ; GFX11-NEXT: s_endpgm
6863 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6864 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6865 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6866 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6867 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6868 %a = load volatile float, ptr addrspace(1) %gep0
6869 %b = load volatile float, ptr addrspace(1) %gep1
6870 %c = load volatile float, ptr addrspace(1) %gep2
6872 %a.nnan = fadd nnan float %a, 1.0
6873 %b.nnan = fadd nnan float %b, 2.0
6874 %c.nnan = fadd float %c, 4.0
6876 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
6877 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
6878 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
6879 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6880 store float %med3, ptr addrspace(1) %outgep
6884 define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6885 ; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6887 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6888 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
6889 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
6890 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6891 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
6892 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
6893 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
6894 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6895 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
6896 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
6897 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
6898 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6899 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6900 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6901 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6902 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6903 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
6904 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
6905 ; SI-SDAG-NEXT: v_med3_f32 v2, -v2, v3, v4
6906 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6907 ; SI-SDAG-NEXT: s_endpgm
6909 ; SI-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6910 ; SI-GISEL: ; %bb.0:
6911 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
6912 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6913 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
6914 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
6915 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
6916 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6917 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
6918 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6919 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6920 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
6921 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6922 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6923 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
6924 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6925 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
6926 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2
6927 ; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4
6928 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6929 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6930 ; SI-GISEL-NEXT: s_endpgm
6932 ; VI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6934 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6935 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6936 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6937 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
6938 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
6939 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6940 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
6941 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
6942 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6943 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
6944 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
6945 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6946 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
6947 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6948 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
6949 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6950 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
6951 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
6952 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
6953 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
6954 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6955 ; VI-SDAG-NEXT: v_med3_f32 v2, -v7, v2, v3
6956 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
6957 ; VI-SDAG-NEXT: s_endpgm
6959 ; VI-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6960 ; VI-GISEL: ; %bb.0:
6961 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6962 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
6963 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6964 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
6965 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
6966 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6967 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6968 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
6969 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
6970 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
6971 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
6972 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
6973 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
6974 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
6975 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
6976 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
6977 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6978 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
6979 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6980 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
6981 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
6982 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
6983 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
6984 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
6985 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
6986 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7
6987 ; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3
6988 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
6989 ; VI-GISEL-NEXT: s_endpgm
6991 ; GFX9-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6992 ; GFX9-SDAG: ; %bb.0:
6993 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
6994 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
6995 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6996 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
6997 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
6998 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
6999 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
7000 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
7001 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
7002 ; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3
7003 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
7004 ; GFX9-SDAG-NEXT: s_endpgm
7006 ; GFX9-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
7007 ; GFX9-GISEL: ; %bb.0:
7008 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7009 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7010 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7011 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
7012 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
7013 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
7014 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
7015 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
7016 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
7017 ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
7018 ; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
7019 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
7020 ; GFX9-GISEL-NEXT: s_endpgm
7022 ; GFX11-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
7023 ; GFX11-SDAG: ; %bb.0:
7024 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
7025 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7026 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7027 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
7028 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7029 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
7030 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7031 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
7032 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7033 ; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3
7034 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
7035 ; GFX11-SDAG-NEXT: s_nop 0
7036 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7037 ; GFX11-SDAG-NEXT: s_endpgm
7039 ; GFX11-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
7040 ; GFX11-GISEL: ; %bb.0:
7041 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
7042 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7043 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7044 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
7045 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7046 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
7047 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7048 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
7049 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7050 ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
7051 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
7052 ; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3
7053 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
7054 ; GFX11-GISEL-NEXT: s_nop 0
7055 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7056 ; GFX11-GISEL-NEXT: s_endpgm
7057 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7058 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7059 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
7060 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
7061 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7062 %a = load volatile float, ptr addrspace(1) %gep0
7063 %b = load volatile float, ptr addrspace(1) %gep1
7064 %c = load volatile float, ptr addrspace(1) %gep2
7065 %a.fneg = fsub float -0.0, %a
7066 %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
7067 %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
7068 %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
7069 %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
7070 store float %med3, ptr addrspace(1) %outgep
7074 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
7075 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7077 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
7078 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
7079 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
7080 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7081 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
7082 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
7083 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7084 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
7085 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
7086 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7087 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7088 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
7089 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7090 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[6:7]
7091 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[12:15], 0 addr64 glc
7092 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7093 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
7094 ; SI-SDAG-NEXT: v_min_f32_e64 v5, -v2, v3
7095 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
7096 ; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
7097 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2
7098 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7099 ; SI-SDAG-NEXT: s_endpgm
7101 ; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7102 ; SI-GISEL: ; %bb.0:
7103 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
7104 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7105 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
7106 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
7107 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
7108 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7109 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
7110 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7111 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7112 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
7113 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
7114 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7115 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
7116 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
7117 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7118 ; SI-GISEL-NEXT: v_mul_f32_e32 v5, -1.0, v2
7119 ; SI-GISEL-NEXT: v_min_f32_e32 v5, v5, v3
7120 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
7121 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
7122 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2
7123 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
7124 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7125 ; SI-GISEL-NEXT: s_endpgm
7127 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7129 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7130 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v0
7131 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7132 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
7133 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v4
7134 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7135 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
7136 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v4
7137 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
7138 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
7139 ; VI-SDAG-NEXT: flat_load_dword v6, v[0:1] glc
7140 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7141 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
7142 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7143 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s6, v4
7144 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc
7145 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] glc
7146 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7147 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v4
7148 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
7149 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7150 ; VI-SDAG-NEXT: v_min_f32_e64 v4, -v6, v2
7151 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v6, v2
7152 ; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
7153 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2
7154 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
7155 ; VI-SDAG-NEXT: s_endpgm
7157 ; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7158 ; VI-GISEL: ; %bb.0:
7159 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7160 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
7161 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7162 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
7163 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
7164 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
7165 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7166 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
7167 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
7168 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
7169 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
7170 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
7171 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
7172 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
7173 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7174 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
7175 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7176 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v6
7177 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc
7178 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] glc
7179 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7180 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
7181 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
7182 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
7183 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7184 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7
7185 ; VI-GISEL-NEXT: v_max_f32_e32 v5, v7, v2
7186 ; VI-GISEL-NEXT: v_min_f32_e32 v2, v4, v2
7187 ; VI-GISEL-NEXT: v_min_f32_e32 v3, v5, v3
7188 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
7189 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
7190 ; VI-GISEL-NEXT: s_endpgm
7192 ; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7193 ; GFX9-SDAG: ; %bb.0:
7194 ; GFX9-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7195 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7196 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7197 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
7198 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
7199 ; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[4:5] glc
7200 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
7201 ; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[6:7] glc
7202 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
7203 ; GFX9-SDAG-NEXT: v_min_f32_e64 v4, -v1, v2
7204 ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
7205 ; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
7206 ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v4, v1
7207 ; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
7208 ; GFX9-SDAG-NEXT: s_endpgm
7210 ; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7211 ; GFX9-GISEL: ; %bb.0:
7212 ; GFX9-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7213 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7214 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7215 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
7216 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
7217 ; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[4:5] glc
7218 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
7219 ; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[6:7] glc
7220 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
7221 ; GFX9-GISEL-NEXT: v_max_f32_e64 v4, -v1, -v1
7222 ; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
7223 ; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v4, v2
7224 ; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
7225 ; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v2, v1
7226 ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
7227 ; GFX9-GISEL-NEXT: s_endpgm
7229 ; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7230 ; GFX11-SDAG: ; %bb.0:
7231 ; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
7232 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7233 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7234 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
7235 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7236 ; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
7237 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7238 ; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
7239 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7240 ; GFX11-SDAG-NEXT: v_maxmin_f32 v3, v1, v2, v3
7241 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
7242 ; GFX11-SDAG-NEXT: v_minmax_f32 v1, -v1, v2, v3
7243 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
7244 ; GFX11-SDAG-NEXT: s_nop 0
7245 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7246 ; GFX11-SDAG-NEXT: s_endpgm
7248 ; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7249 ; GFX11-GISEL: ; %bb.0:
7250 ; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
7251 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7252 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7253 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
7254 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7255 ; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
7256 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7257 ; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
7258 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7259 ; GFX11-GISEL-NEXT: v_max_f32_e64 v4, -v1, -v1
7260 ; GFX11-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
7261 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7262 ; GFX11-GISEL-NEXT: v_min_f32_e32 v4, v4, v2
7263 ; GFX11-GISEL-NEXT: v_minmax_f32 v1, v1, v3, v4
7264 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
7265 ; GFX11-GISEL-NEXT: s_nop 0
7266 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7267 ; GFX11-GISEL-NEXT: s_endpgm
7268 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7269 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7270 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
7271 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
7272 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7273 %a = load volatile float, ptr addrspace(1) %gep0
7274 %b = load volatile float, ptr addrspace(1) %gep1
7275 %c = load volatile float, ptr addrspace(1) %gep2
7276 %a.fneg = fsub float -0.0, %a
7277 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
7278 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
7279 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
7280 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
7281 store float %med3, ptr addrspace(1) %outgep
7285 ; A simple min and max is not sufficient
7286 define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
7287 ; SI-SDAG-LABEL: v_test_global_nnans_min_max_f32:
7289 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
7290 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
7291 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
7292 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7293 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
7294 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
7295 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
7296 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7297 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
7298 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
7299 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
7300 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7301 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7302 ; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
7303 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7304 ; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
7305 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7306 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
7307 ; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
7308 ; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
7309 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7310 ; SI-SDAG-NEXT: s_endpgm
7312 ; SI-GISEL-LABEL: v_test_global_nnans_min_max_f32:
7313 ; SI-GISEL: ; %bb.0:
7314 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
7315 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7316 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
7317 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
7318 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
7319 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7320 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
7321 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7322 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7323 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
7324 ; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
7325 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7326 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
7327 ; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
7328 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7329 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
7330 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
7331 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
7332 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7333 ; SI-GISEL-NEXT: s_endpgm
7335 ; VI-SDAG-LABEL: v_test_global_nnans_min_max_f32:
7337 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7338 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0
7339 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7340 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
7341 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
7342 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7343 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
7344 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
7345 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
7346 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
7347 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
7348 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
7349 ; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc
7350 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7351 ; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc
7352 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7353 ; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc
7354 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7355 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
7356 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
7357 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7358 ; VI-SDAG-NEXT: v_max_f32_e32 v2, v7, v2
7359 ; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
7360 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
7361 ; VI-SDAG-NEXT: s_endpgm
7363 ; VI-GISEL-LABEL: v_test_global_nnans_min_max_f32:
7364 ; VI-GISEL: ; %bb.0:
7365 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7366 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0
7367 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7368 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
7369 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
7370 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
7371 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7372 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
7373 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
7374 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
7375 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
7376 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
7377 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
7378 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
7379 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
7380 ; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc
7381 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7382 ; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc
7383 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7384 ; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc
7385 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7386 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
7387 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
7388 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
7389 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7390 ; VI-GISEL-NEXT: v_max_f32_e32 v2, v7, v2
7391 ; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
7392 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
7393 ; VI-GISEL-NEXT: s_endpgm
7395 ; GFX9-LABEL: v_test_global_nnans_min_max_f32:
7397 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7398 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7399 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7400 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
7401 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7402 ; GFX9-NEXT: global_load_dword v2, v0, s[4:5] glc
7403 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7404 ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc
7405 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7406 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
7407 ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
7408 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
7409 ; GFX9-NEXT: s_endpgm
7411 ; GFX11-LABEL: v_test_global_nnans_min_max_f32:
7413 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
7414 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7415 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7416 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
7417 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7418 ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
7419 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7420 ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc
7421 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7422 ; GFX11-NEXT: v_maxmin_f32 v1, v1, v2, v3
7423 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
7424 ; GFX11-NEXT: s_nop 0
7425 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7426 ; GFX11-NEXT: s_endpgm
7427 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7428 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7429 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
7430 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
7431 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7432 %a = load volatile float, ptr addrspace(1) %gep0
7433 %b = load volatile float, ptr addrspace(1) %gep1
7434 %c = load volatile float, ptr addrspace(1) %gep2
7435 %max = call float @llvm.maxnum.f32(float %a, float %b)
7436 %minmax = call float @llvm.minnum.f32(float %max, float %c)
7437 store float %minmax, ptr addrspace(1) %outgep
7441 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
7442 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7444 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
7445 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
7446 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
7447 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7448 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
7449 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7450 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
7451 ; SI-SDAG-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
7452 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
7453 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7454 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7455 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
7456 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
7457 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7458 ; SI-SDAG-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7459 ; SI-SDAG-NEXT: s_endpgm
7461 ; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7462 ; SI-GISEL: ; %bb.0:
7463 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
7464 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7465 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
7466 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
7467 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
7468 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7469 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
7470 ; SI-GISEL-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
7471 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 1.0
7472 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, 2.0
7473 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7474 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7475 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
7476 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7477 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7478 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
7479 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7480 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7481 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 4.0
7482 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
7483 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7484 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
7485 ; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7486 ; SI-GISEL-NEXT: s_endpgm
7488 ; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7490 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7491 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 1, v0
7492 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7493 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
7494 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
7495 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7496 ; VI-SDAG-NEXT: flat_load_ushort v3, v[0:1]
7497 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
7498 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
7499 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7500 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7501 ; VI-SDAG-NEXT: v_add_f16_e32 v2, 1.0, v3
7502 ; VI-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v2
7503 ; VI-SDAG-NEXT: v_min_f16_e32 v2, 4.0, v2
7504 ; VI-SDAG-NEXT: flat_store_short v[0:1], v2
7505 ; VI-SDAG-NEXT: s_endpgm
7507 ; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7508 ; VI-GISEL: ; %bb.0:
7509 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7510 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v0
7511 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7512 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
7513 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
7514 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
7515 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7516 ; VI-GISEL-NEXT: flat_load_ushort v3, v[0:1]
7517 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
7518 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
7519 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
7520 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7521 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7522 ; VI-GISEL-NEXT: v_add_f16_e32 v2, 1.0, v3
7523 ; VI-GISEL-NEXT: v_max_f16_e32 v2, 2.0, v2
7524 ; VI-GISEL-NEXT: v_min_f16_e32 v2, 4.0, v2
7525 ; VI-GISEL-NEXT: flat_store_short v[0:1], v2
7526 ; VI-GISEL-NEXT: s_endpgm
7528 ; GFX9-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7530 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7531 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7532 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7533 ; GFX9-NEXT: global_load_ushort v1, v0, s[2:3]
7534 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7535 ; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
7536 ; GFX9-NEXT: v_med3_f16 v1, v1, 2.0, 4.0
7537 ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
7538 ; GFX9-NEXT: s_endpgm
7540 ; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7542 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
7543 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7544 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7545 ; GFX11-NEXT: global_load_u16 v1, v0, s[2:3]
7546 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7547 ; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1
7548 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7549 ; GFX11-NEXT: v_med3_f16 v1, v1, 2.0, 4.0
7550 ; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
7551 ; GFX11-NEXT: s_nop 0
7552 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7553 ; GFX11-NEXT: s_endpgm
7554 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7555 %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
7556 %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid
7557 %a = load half, ptr addrspace(1) %gep0
7558 %a.add = fadd nnan half %a, 1.0
7559 %max = call half @llvm.maxnum.f16(half %a.add, half 2.0)
7560 %med = call half @llvm.minnum.f16(half %max, half 4.0)
7562 store half %med, ptr addrspace(1) %outgep
7566 define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
7567 ; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
7569 ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
7570 ; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
7571 ; SI-SDAG-NEXT: s_mov_b32 s10, 0
7572 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7573 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
7574 ; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11]
7575 ; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11]
7576 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7577 ; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
7578 ; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5]
7579 ; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7]
7580 ; SI-SDAG-NEXT: buffer_load_ushort v2, v[0:1], s[8:11], 0 addr64 glc
7581 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7582 ; SI-SDAG-NEXT: buffer_load_ushort v3, v[0:1], s[12:15], 0 addr64 glc
7583 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7584 ; SI-SDAG-NEXT: buffer_load_ushort v4, v[0:1], s[16:19], 0 addr64 glc
7585 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7586 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
7587 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7588 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
7589 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
7590 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2
7591 ; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3
7592 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4
7593 ; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4
7594 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7595 ; SI-SDAG-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7596 ; SI-SDAG-NEXT: s_endpgm
7598 ; SI-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0:
7599 ; SI-GISEL: ; %bb.0:
7600 ; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
7601 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7602 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
7603 ; SI-GISEL-NEXT: s_mov_b32 s10, 0
7604 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
7605 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 1.0
7606 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 2.0
7607 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7608 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
7609 ; SI-GISEL-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 glc
7610 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7611 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v5, 4.0
7612 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
7613 ; SI-GISEL-NEXT: buffer_load_ushort v6, v[0:1], s[8:11], 0 addr64 glc
7614 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7615 ; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
7616 ; SI-GISEL-NEXT: buffer_load_ushort v7, v[0:1], s[8:11], 0 addr64 glc
7617 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7618 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
7619 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2
7620 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
7621 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7622 ; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
7623 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v7
7624 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
7625 ; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
7626 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7627 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
7628 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
7629 ; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3
7630 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
7631 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
7632 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v5
7633 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7634 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7635 ; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
7636 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7637 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
7638 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7639 ; SI-GISEL-NEXT: v_max_f32_e32 v2, v3, v2
7640 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7641 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
7642 ; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7643 ; SI-GISEL-NEXT: s_endpgm
7645 ; VI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
7647 ; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7648 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 1, v0
7649 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7650 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
7651 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6
7652 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7653 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
7654 ; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6
7655 ; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
7656 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7
7657 ; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6
7658 ; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
7659 ; VI-SDAG-NEXT: flat_load_ushort v7, v[0:1] glc
7660 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7661 ; VI-SDAG-NEXT: flat_load_ushort v2, v[2:3] glc
7662 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7663 ; VI-SDAG-NEXT: flat_load_ushort v3, v[4:5] glc
7664 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7665 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
7666 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6
7667 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7668 ; VI-SDAG-NEXT: v_add_f16_e32 v4, 1.0, v7
7669 ; VI-SDAG-NEXT: v_add_f16_e32 v2, 2.0, v2
7670 ; VI-SDAG-NEXT: v_add_f16_e32 v3, 4.0, v3
7671 ; VI-SDAG-NEXT: v_min_f16_e32 v5, v4, v2
7672 ; VI-SDAG-NEXT: v_max_f16_e32 v2, v4, v2
7673 ; VI-SDAG-NEXT: v_min_f16_e32 v2, v2, v3
7674 ; VI-SDAG-NEXT: v_max_f16_e32 v2, v5, v2
7675 ; VI-SDAG-NEXT: flat_store_short v[0:1], v2
7676 ; VI-SDAG-NEXT: s_endpgm
7678 ; VI-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0:
7679 ; VI-GISEL: ; %bb.0:
7680 ; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7681 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 1, v0
7682 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7683 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
7684 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
7685 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
7686 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7687 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
7688 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
7689 ; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6
7690 ; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
7691 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6
7692 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7
7693 ; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
7694 ; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
7695 ; VI-GISEL-NEXT: flat_load_ushort v7, v[0:1] glc
7696 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7697 ; VI-GISEL-NEXT: flat_load_ushort v2, v[2:3] glc
7698 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7699 ; VI-GISEL-NEXT: flat_load_ushort v3, v[4:5] glc
7700 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7701 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
7702 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
7703 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6
7704 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7705 ; VI-GISEL-NEXT: v_add_f16_e32 v4, 1.0, v7
7706 ; VI-GISEL-NEXT: v_add_f16_e32 v2, 2.0, v2
7707 ; VI-GISEL-NEXT: v_add_f16_e32 v3, 4.0, v3
7708 ; VI-GISEL-NEXT: v_min_f16_e32 v5, v4, v2
7709 ; VI-GISEL-NEXT: v_max_f16_e32 v2, v4, v2
7710 ; VI-GISEL-NEXT: v_min_f16_e32 v2, v2, v3
7711 ; VI-GISEL-NEXT: v_max_f16_e32 v2, v5, v2
7712 ; VI-GISEL-NEXT: flat_store_short v[0:1], v2
7713 ; VI-GISEL-NEXT: s_endpgm
7715 ; GFX9-LABEL: v_nnan_inputs_med3_f16_pat0:
7717 ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
7718 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7719 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7720 ; GFX9-NEXT: global_load_ushort v1, v0, s[2:3] glc
7721 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7722 ; GFX9-NEXT: global_load_ushort v2, v0, s[4:5] glc
7723 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7724 ; GFX9-NEXT: global_load_ushort v3, v0, s[6:7] glc
7725 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7726 ; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
7727 ; GFX9-NEXT: v_add_f16_e32 v2, 2.0, v2
7728 ; GFX9-NEXT: v_add_f16_e32 v3, 4.0, v3
7729 ; GFX9-NEXT: v_med3_f16 v1, v1, v2, v3
7730 ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
7731 ; GFX9-NEXT: s_endpgm
7733 ; GFX11-LABEL: v_nnan_inputs_med3_f16_pat0:
7735 ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
7736 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
7737 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7738 ; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
7739 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7740 ; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
7741 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7742 ; GFX11-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc
7743 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7744 ; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1
7745 ; GFX11-NEXT: v_add_f16_e32 v2, 2.0, v2
7746 ; GFX11-NEXT: v_add_f16_e32 v3, 4.0, v3
7747 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7748 ; GFX11-NEXT: v_med3_f16 v1, v1, v2, v3
7749 ; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
7750 ; GFX11-NEXT: s_nop 0
7751 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7752 ; GFX11-NEXT: s_endpgm
7753 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7754 %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
7755 %gep1 = getelementptr half, ptr addrspace(1) %bptr, i32 %tid
7756 %gep2 = getelementptr half, ptr addrspace(1) %cptr, i32 %tid
7757 %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid
7758 %a = load volatile half, ptr addrspace(1) %gep0
7759 %b = load volatile half, ptr addrspace(1) %gep1
7760 %c = load volatile half, ptr addrspace(1) %gep2
7762 %a.nnan = fadd nnan half %a, 1.0
7763 %b.nnan = fadd nnan half %b, 2.0
7764 %c.nnan = fadd nnan half %c, 4.0
7766 %tmp0 = call half @llvm.minnum.f16(half %a.nnan, half %b.nnan)
7767 %tmp1 = call half @llvm.maxnum.f16(half %a.nnan, half %b.nnan)
7768 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %c.nnan)
7769 %med3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
7770 store half %med3, ptr addrspace(1) %outgep
7774 define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
7775 ; SI-SDAG-LABEL: two_non_inline_constant:
7777 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
7778 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
7779 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
7780 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7781 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
7782 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7783 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
7784 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
7785 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
7786 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7787 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v2
7788 ; SI-SDAG-NEXT: v_max_f32_e32 v2, 0x41000000, v2
7789 ; SI-SDAG-NEXT: v_min_f32_e32 v2, 0x41800000, v2
7790 ; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7791 ; SI-SDAG-NEXT: s_endpgm
7793 ; SI-GISEL-LABEL: two_non_inline_constant:
7794 ; SI-GISEL: ; %bb.0:
7795 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
7796 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7797 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
7798 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
7799 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
7800 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7801 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
7802 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
7803 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7804 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v2
7805 ; SI-GISEL-NEXT: v_max_f32_e32 v2, 0x41000000, v2
7806 ; SI-GISEL-NEXT: v_min_f32_e32 v2, 0x41800000, v2
7807 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
7808 ; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7809 ; SI-GISEL-NEXT: s_endpgm
7811 ; VI-SDAG-LABEL: two_non_inline_constant:
7813 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7814 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
7815 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7816 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
7817 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
7818 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7819 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
7820 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
7821 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
7822 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7823 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7824 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v3
7825 ; VI-SDAG-NEXT: v_max_f32_e32 v2, 0x41000000, v2
7826 ; VI-SDAG-NEXT: v_min_f32_e32 v2, 0x41800000, v2
7827 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
7828 ; VI-SDAG-NEXT: s_endpgm
7830 ; VI-GISEL-LABEL: two_non_inline_constant:
7831 ; VI-GISEL: ; %bb.0:
7832 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7833 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
7834 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7835 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
7836 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
7837 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
7838 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7839 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
7840 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
7841 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
7842 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
7843 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7844 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7845 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v3
7846 ; VI-GISEL-NEXT: v_max_f32_e32 v2, 0x41000000, v2
7847 ; VI-GISEL-NEXT: v_min_f32_e32 v2, 0x41800000, v2
7848 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
7849 ; VI-GISEL-NEXT: s_endpgm
7851 ; GFX9-LABEL: two_non_inline_constant:
7853 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7854 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7855 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7856 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
7857 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7858 ; GFX9-NEXT: v_add_f32_e32 v1, 0.5, v1
7859 ; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v1
7860 ; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v1
7861 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
7862 ; GFX9-NEXT: s_endpgm
7864 ; GFX11-SDAG-LABEL: two_non_inline_constant:
7865 ; GFX11-SDAG: ; %bb.0:
7866 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
7867 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7868 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7869 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
7870 ; GFX11-SDAG-NEXT: s_mov_b32 s2, 0x41000000
7871 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
7872 ; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 0.5, v1
7873 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
7874 ; GFX11-SDAG-NEXT: v_maxmin_f32 v1, v1, s2, 0x41800000
7875 ; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
7876 ; GFX11-SDAG-NEXT: s_nop 0
7877 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7878 ; GFX11-SDAG-NEXT: s_endpgm
7880 ; GFX11-GISEL-LABEL: two_non_inline_constant:
7881 ; GFX11-GISEL: ; %bb.0:
7882 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
7883 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7884 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x41800000
7885 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7886 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
7887 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
7888 ; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 0.5, v1
7889 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
7890 ; GFX11-GISEL-NEXT: v_maxmin_f32 v1, v1, 0x41000000, v2
7891 ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
7892 ; GFX11-GISEL-NEXT: s_nop 0
7893 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7894 ; GFX11-GISEL-NEXT: s_endpgm
7895 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7896 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7897 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7898 %a = load float, ptr addrspace(1) %gep0
7899 %add = fadd nnan float %a, 0.5
7900 %max = call float @llvm.maxnum.f32(float %add, float 8.0)
7901 %med = call float @llvm.minnum.f32(float %max, float 16.0)
7903 store float %med, ptr addrspace(1) %out.gep
7907 ; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants.
7908 define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
7909 ; SI-SDAG-LABEL: one_non_inline_constant:
7911 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
7912 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
7913 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
7914 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7915 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
7916 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7917 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
7918 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
7919 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000
7920 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
7921 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
7922 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7923 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 0.5, v2
7924 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 0x41800000, v2
7925 ; SI-SDAG-NEXT: v_med3_f32 v3, v4, 1.0, v3
7926 ; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
7927 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0
7928 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
7929 ; SI-SDAG-NEXT: s_endpgm
7931 ; SI-GISEL-LABEL: one_non_inline_constant:
7932 ; SI-GISEL: ; %bb.0:
7933 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
7934 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
7935 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
7936 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
7937 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
7938 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7939 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
7940 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
7941 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x41800000
7942 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
7943 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
7944 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7945 ; SI-GISEL-NEXT: v_add_f32_e32 v3, 0.5, v2
7946 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x41800000, v2
7947 ; SI-GISEL-NEXT: v_med3_f32 v3, v3, 1.0, s4
7948 ; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
7949 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0
7950 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
7951 ; SI-GISEL-NEXT: s_endpgm
7953 ; VI-SDAG-LABEL: one_non_inline_constant:
7955 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7956 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
7957 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000
7958 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
7959 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
7960 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
7961 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7962 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
7963 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
7964 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
7965 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7966 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7967 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v3
7968 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, 1.0, v4
7969 ; VI-SDAG-NEXT: v_add_f32_e32 v3, 0x41800000, v3
7970 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
7971 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v3
7972 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
7973 ; VI-SDAG-NEXT: s_endpgm
7975 ; VI-GISEL-LABEL: one_non_inline_constant:
7976 ; VI-GISEL: ; %bb.0:
7977 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
7978 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
7979 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
7980 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
7981 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
7982 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
7983 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7984 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
7985 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
7986 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x41800000
7987 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
7988 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
7989 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
7990 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7991 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v3
7992 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, 1.0, s2
7993 ; VI-GISEL-NEXT: v_add_f32_e32 v3, 0x41800000, v3
7994 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
7995 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v3
7996 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
7997 ; VI-GISEL-NEXT: s_endpgm
7999 ; GFX9-SDAG-LABEL: one_non_inline_constant:
8000 ; GFX9-SDAG: ; %bb.0:
8001 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
8002 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8003 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x41800000
8004 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
8005 ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
8006 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
8007 ; GFX9-SDAG-NEXT: v_add_f32_e32 v3, 0.5, v1
8008 ; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 0x41800000, v1
8009 ; GFX9-SDAG-NEXT: v_med3_f32 v2, v3, 1.0, v2
8010 ; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[0:1]
8011 ; GFX9-SDAG-NEXT: global_store_dword v[0:1], v1, off
8012 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
8013 ; GFX9-SDAG-NEXT: s_endpgm
8015 ; GFX9-GISEL-LABEL: one_non_inline_constant:
8016 ; GFX9-GISEL: ; %bb.0:
8017 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
8018 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8019 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
8020 ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
8021 ; GFX9-GISEL-NEXT: s_mov_b32 s2, 0x41800000
8022 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
8023 ; GFX9-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v1
8024 ; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 0x41800000, v1
8025 ; GFX9-GISEL-NEXT: v_med3_f32 v2, v2, 1.0, s2
8026 ; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[0:1]
8027 ; GFX9-GISEL-NEXT: global_store_dword v[0:1], v1, off
8028 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
8029 ; GFX9-GISEL-NEXT: s_endpgm
8031 ; GFX11-LABEL: one_non_inline_constant:
8033 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
8034 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8035 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8036 ; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
8037 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8038 ; GFX11-NEXT: v_add_f32_e32 v2, 0.5, v1
8039 ; GFX11-NEXT: v_add_f32_e32 v1, 0x41800000, v1
8040 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
8041 ; GFX11-NEXT: v_med3_f32 v2, v2, 1.0, 0x41800000
8042 ; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
8043 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
8044 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
8045 ; GFX11-NEXT: s_nop 0
8046 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
8047 ; GFX11-NEXT: s_endpgm
8048 %tid = call i32 @llvm.amdgcn.workitem.id.x()
8049 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
8050 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
8051 %a = load float, ptr addrspace(1) %gep0
8052 %add = fadd nnan float %a, 0.5
8053 %max = call float @llvm.maxnum.f32(float %add, float 1.0)
8054 %med = call float @llvm.minnum.f32(float %max, float 16.0)
8056 store float %med, ptr addrspace(1) %out.gep
8058 %extra.use = fadd float %a, 16.0
8059 store volatile float %extra.use, ptr addrspace(1) undef
8063 define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
8064 ; SI-SDAG-LABEL: two_non_inline_constant_multi_use:
8066 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
8067 ; SI-SDAG-NEXT: s_mov_b32 s6, 0
8068 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
8069 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8070 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0
8071 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
8072 ; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3]
8073 ; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
8074 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x41000000
8075 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000
8076 ; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7]
8077 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
8078 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
8079 ; SI-SDAG-NEXT: v_add_f32_e32 v4, 0.5, v2
8080 ; SI-SDAG-NEXT: v_add_f32_e32 v5, 0x41800000, v2
8081 ; SI-SDAG-NEXT: v_add_f32_e32 v2, 0x41000000, v2
8082 ; SI-SDAG-NEXT: v_med3_f32 v3, v4, s4, v3
8083 ; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
8084 ; SI-SDAG-NEXT: buffer_store_dword v5, off, s[4:7], 0
8085 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
8086 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0
8087 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
8088 ; SI-SDAG-NEXT: s_endpgm
8090 ; SI-GISEL-LABEL: two_non_inline_constant_multi_use:
8091 ; SI-GISEL: ; %bb.0:
8092 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
8093 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8094 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0
8095 ; SI-GISEL-NEXT: s_mov_b32 s6, 0
8096 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
8097 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
8098 ; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
8099 ; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
8100 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x41000000
8101 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000
8102 ; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
8103 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
8104 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
8105 ; SI-GISEL-NEXT: v_add_f32_e32 v4, 0.5, v2
8106 ; SI-GISEL-NEXT: v_add_f32_e32 v5, 0x41800000, v2
8107 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x41000000, v2
8108 ; SI-GISEL-NEXT: v_med3_f32 v3, v4, s4, v3
8109 ; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
8110 ; SI-GISEL-NEXT: buffer_store_dword v5, off, s[4:7], 0
8111 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
8112 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0
8113 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
8114 ; SI-GISEL-NEXT: s_endpgm
8116 ; VI-SDAG-LABEL: two_non_inline_constant_multi_use:
8118 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
8119 ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
8120 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000
8121 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
8122 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
8123 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2
8124 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
8125 ; VI-SDAG-NEXT: flat_load_dword v3, v[0:1]
8126 ; VI-SDAG-NEXT: s_mov_b32 s2, 0x41000000
8127 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
8128 ; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2
8129 ; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
8130 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
8131 ; VI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v3
8132 ; VI-SDAG-NEXT: v_med3_f32 v2, v2, s2, v4
8133 ; VI-SDAG-NEXT: v_add_f32_e32 v5, 0x41800000, v3
8134 ; VI-SDAG-NEXT: v_add_f32_e32 v3, 0x41000000, v3
8135 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
8136 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v5
8137 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
8138 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v3
8139 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
8140 ; VI-SDAG-NEXT: s_endpgm
8142 ; VI-GISEL-LABEL: two_non_inline_constant_multi_use:
8143 ; VI-GISEL: ; %bb.0:
8144 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
8145 ; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
8146 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41800000
8147 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
8148 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
8149 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
8150 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
8151 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
8152 ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
8153 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
8154 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x41000000
8155 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
8156 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
8157 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
8158 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
8159 ; VI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v3
8160 ; VI-GISEL-NEXT: v_med3_f32 v2, v2, s2, v4
8161 ; VI-GISEL-NEXT: v_add_f32_e32 v5, 0x41800000, v3
8162 ; VI-GISEL-NEXT: v_add_f32_e32 v3, 0x41000000, v3
8163 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
8164 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v5
8165 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
8166 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v3
8167 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
8168 ; VI-GISEL-NEXT: s_endpgm
8170 ; GFX9-LABEL: two_non_inline_constant_multi_use:
8172 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
8173 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8174 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x41800000
8175 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8176 ; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
8177 ; GFX9-NEXT: s_mov_b32 s2, 0x41000000
8178 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8179 ; GFX9-NEXT: v_add_f32_e32 v3, 0.5, v1
8180 ; GFX9-NEXT: v_add_f32_e32 v4, 0x41800000, v1
8181 ; GFX9-NEXT: v_add_f32_e32 v1, 0x41000000, v1
8182 ; GFX9-NEXT: v_med3_f32 v2, v3, s2, v2
8183 ; GFX9-NEXT: global_store_dword v0, v2, s[0:1]
8184 ; GFX9-NEXT: global_store_dword v[0:1], v4, off
8185 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8186 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
8187 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8188 ; GFX9-NEXT: s_endpgm
8190 ; GFX11-SDAG-LABEL: two_non_inline_constant_multi_use:
8191 ; GFX11-SDAG: ; %bb.0:
8192 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
8193 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8194 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
8195 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
8196 ; GFX11-SDAG-NEXT: s_mov_b32 s2, 0x41000000
8197 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
8198 ; GFX11-SDAG-NEXT: v_add_f32_e32 v3, 0x41800000, v1
8199 ; GFX11-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v1
8200 ; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 0x41000000, v1
8201 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
8202 ; GFX11-SDAG-NEXT: v_med3_f32 v2, v2, s2, 0x41800000
8203 ; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1]
8204 ; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v3, off dlc
8205 ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
8206 ; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v1, off dlc
8207 ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
8208 ; GFX11-SDAG-NEXT: s_nop 0
8209 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
8210 ; GFX11-SDAG-NEXT: s_endpgm
8212 ; GFX11-GISEL-LABEL: two_non_inline_constant_multi_use:
8213 ; GFX11-GISEL: ; %bb.0:
8214 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
8215 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
8216 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
8217 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
8218 ; GFX11-GISEL-NEXT: s_mov_b32 s2, 0x41800000
8219 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
8220 ; GFX11-GISEL-NEXT: v_add_f32_e32 v3, 0x41800000, v1
8221 ; GFX11-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v1
8222 ; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 0x41000000, v1
8223 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
8224 ; GFX11-GISEL-NEXT: v_med3_f32 v2, v2, 0x41000000, s2
8225 ; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1]
8226 ; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v3, off dlc
8227 ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
8228 ; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v1, off dlc
8229 ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
8230 ; GFX11-GISEL-NEXT: s_nop 0
8231 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
8232 ; GFX11-GISEL-NEXT: s_endpgm
8233 %tid = call i32 @llvm.amdgcn.workitem.id.x()
8234 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
8235 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
8236 %a = load float, ptr addrspace(1) %gep0
8237 %add = fadd nnan float %a, 0.5
8238 %max = call float @llvm.maxnum.f32(float %add, float 8.0)
8239 %med = call float @llvm.minnum.f32(float %max, float 16.0)
8241 store float %med, ptr addrspace(1) %out.gep
8243 %extra.use0 = fadd float %a, 16.0
8244 store volatile float %extra.use0, ptr addrspace(1) undef
8245 %extra.use1 = fadd float %a, 8.0
8246 store volatile float %extra.use1, ptr addrspace(1) undef
8250 declare i32 @llvm.amdgcn.workitem.id.x() #0
8251 declare float @llvm.fabs.f32(float) #0
8252 declare float @llvm.minnum.f32(float, float) #0
8253 declare float @llvm.maxnum.f32(float, float) #0
8254 declare double @llvm.minnum.f64(double, double) #0
8255 declare double @llvm.maxnum.f64(double, double) #0
8256 declare half @llvm.fabs.f16(half) #0
8257 declare half @llvm.minnum.f16(half, half) #0
8258 declare half @llvm.maxnum.f16(half, half) #0
8260 attributes #0 = { nounwind readnone }
8261 attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
8262 attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
8263 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: