1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
6 ; Make sure the memory operand information is preserved.
7 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-MIR %s
8 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s
11 define amdgpu_ps float @ds_fmax_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) {
12 ; GFX8-LABEL: ds_fmax_f32_ss:
14 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
15 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
16 ; GFX8-NEXT: s_mov_b32 m0, -1
17 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
18 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
19 ; GFX8-NEXT: ; return to shader part epilog
21 ; GFX9-LABEL: ds_fmax_f32_ss:
23 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
24 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
25 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
26 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
27 ; GFX9-NEXT: ; return to shader part epilog
28 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss
29 ; GFX8-MIR: bb.1 (%ir-block.0):
30 ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3
31 ; GFX8-MIR-NEXT: {{ $}}
32 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
33 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
34 ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
35 ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
36 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
37 ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
38 ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
39 ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
40 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss
41 ; GFX9-MIR: bb.1 (%ir-block.0):
42 ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3
43 ; GFX9-MIR-NEXT: {{ $}}
44 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
45 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
46 ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
47 ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
48 ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
49 ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
50 ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
51 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
55 define amdgpu_ps float @ds_fmax_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) {
56 ; GFX8-LABEL: ds_fmax_f32_ss_offset:
58 ; GFX8-NEXT: v_mov_b32_e32 v0, s3
59 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
60 ; GFX8-NEXT: s_mov_b32 m0, -1
61 ; GFX8-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512
62 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
63 ; GFX8-NEXT: ; return to shader part epilog
65 ; GFX9-LABEL: ds_fmax_f32_ss_offset:
67 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
68 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
69 ; GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512
70 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
71 ; GFX9-NEXT: ; return to shader part epilog
72 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset
73 ; GFX8-MIR: bb.1 (%ir-block.0):
74 ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3
75 ; GFX8-MIR-NEXT: {{ $}}
76 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
77 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
78 ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
79 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
80 ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
81 ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
82 ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
83 ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
84 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset
85 ; GFX9-MIR: bb.1 (%ir-block.0):
86 ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3
87 ; GFX9-MIR-NEXT: {{ $}}
88 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
89 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
90 ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
91 ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
92 ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
93 ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
94 ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
95 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
96 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
100 define amdgpu_ps void @ds_fmax_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
101 ; GFX8-LABEL: ds_fmax_f32_ss_nortn:
103 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
104 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
105 ; GFX8-NEXT: s_mov_b32 m0, -1
106 ; GFX8-NEXT: ds_max_f32 v0, v1
107 ; GFX8-NEXT: s_endpgm
109 ; GFX9-LABEL: ds_fmax_f32_ss_nortn:
111 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
112 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
113 ; GFX9-NEXT: ds_max_f32 v0, v1
114 ; GFX9-NEXT: s_endpgm
115 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn
116 ; GFX8-MIR: bb.1 (%ir-block.0):
117 ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3
118 ; GFX8-MIR-NEXT: {{ $}}
119 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
120 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
121 ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
122 ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
123 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
124 ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
125 ; GFX8-MIR-NEXT: S_ENDPGM 0
126 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn
127 ; GFX9-MIR: bb.1 (%ir-block.0):
128 ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3
129 ; GFX9-MIR-NEXT: {{ $}}
130 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
131 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
132 ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
133 ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
134 ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
135 ; GFX9-MIR-NEXT: S_ENDPGM 0
136 %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
140 define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
141 ; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn:
143 ; GFX8-NEXT: v_mov_b32_e32 v0, s3
144 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
145 ; GFX8-NEXT: s_mov_b32 m0, -1
146 ; GFX8-NEXT: ds_max_f32 v1, v0 offset:512
147 ; GFX8-NEXT: s_endpgm
149 ; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn:
151 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
152 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
153 ; GFX9-NEXT: ds_max_f32 v1, v0 offset:512
154 ; GFX9-NEXT: s_endpgm
155 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
156 ; GFX8-MIR: bb.1 (%ir-block.0):
157 ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3
158 ; GFX8-MIR-NEXT: {{ $}}
159 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
160 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
161 ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
162 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
163 ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
164 ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
165 ; GFX8-MIR-NEXT: S_ENDPGM 0
166 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
167 ; GFX9-MIR: bb.1 (%ir-block.0):
168 ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3
169 ; GFX9-MIR-NEXT: {{ $}}
170 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
171 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
172 ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
173 ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
174 ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
175 ; GFX9-MIR-NEXT: S_ENDPGM 0
176 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
177 %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
181 define float @ds_fmax_f32_vv(ptr addrspace(3) %ptr, float %val) {
182 ; GFX8-LABEL: ds_fmax_f32_vv:
184 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX8-NEXT: s_mov_b32 m0, -1
186 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
187 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
188 ; GFX8-NEXT: s_setpc_b64 s[30:31]
190 ; GFX9-LABEL: ds_fmax_f32_vv:
192 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
194 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
195 ; GFX9-NEXT: s_setpc_b64 s[30:31]
196 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv
197 ; GFX8-MIR: bb.1 (%ir-block.0):
198 ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1
199 ; GFX8-MIR-NEXT: {{ $}}
200 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
201 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
202 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
203 ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
204 ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
205 ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0
206 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv
207 ; GFX9-MIR: bb.1 (%ir-block.0):
208 ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1
209 ; GFX9-MIR-NEXT: {{ $}}
210 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
211 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
212 ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
213 ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
214 ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0
215 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
219 define float @ds_fmax_f32_vv_offset(ptr addrspace(3) %ptr, float %val) {
220 ; GFX8-LABEL: ds_fmax_f32_vv_offset:
222 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX8-NEXT: s_mov_b32 m0, -1
224 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512
225 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
226 ; GFX8-NEXT: s_setpc_b64 s[30:31]
228 ; GFX9-LABEL: ds_fmax_f32_vv_offset:
230 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512
232 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
233 ; GFX9-NEXT: s_setpc_b64 s[30:31]
234 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset
235 ; GFX8-MIR: bb.1 (%ir-block.0):
236 ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1
237 ; GFX8-MIR-NEXT: {{ $}}
238 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
239 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
240 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
241 ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
242 ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
243 ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0
244 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset
245 ; GFX9-MIR: bb.1 (%ir-block.0):
246 ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1
247 ; GFX9-MIR-NEXT: {{ $}}
248 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
249 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
250 ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
251 ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
252 ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0
253 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
254 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
258 define void @ds_fmax_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) {
259 ; GFX8-LABEL: ds_fmax_f32_vv_nortn:
261 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GFX8-NEXT: s_mov_b32 m0, -1
263 ; GFX8-NEXT: ds_max_f32 v0, v1
264 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
265 ; GFX8-NEXT: s_setpc_b64 s[30:31]
267 ; GFX9-LABEL: ds_fmax_f32_vv_nortn:
269 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270 ; GFX9-NEXT: ds_max_f32 v0, v1
271 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
272 ; GFX9-NEXT: s_setpc_b64 s[30:31]
273 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn
274 ; GFX8-MIR: bb.1 (%ir-block.0):
275 ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1
276 ; GFX8-MIR-NEXT: {{ $}}
277 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
278 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
279 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
280 ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
281 ; GFX8-MIR-NEXT: SI_RETURN
282 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn
283 ; GFX9-MIR: bb.1 (%ir-block.0):
284 ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1
285 ; GFX9-MIR-NEXT: {{ $}}
286 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
287 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
288 ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
289 ; GFX9-MIR-NEXT: SI_RETURN
290 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
294 define void @ds_fmax_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) {
295 ; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn:
297 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298 ; GFX8-NEXT: s_mov_b32 m0, -1
299 ; GFX8-NEXT: ds_max_f32 v0, v1 offset:512
300 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
301 ; GFX8-NEXT: s_setpc_b64 s[30:31]
303 ; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn:
305 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; GFX9-NEXT: ds_max_f32 v0, v1 offset:512
307 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
308 ; GFX9-NEXT: s_setpc_b64 s[30:31]
309 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
310 ; GFX8-MIR: bb.1 (%ir-block.0):
311 ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1
312 ; GFX8-MIR-NEXT: {{ $}}
313 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
314 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
315 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
316 ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
317 ; GFX8-MIR-NEXT: SI_RETURN
318 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
319 ; GFX9-MIR: bb.1 (%ir-block.0):
320 ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1
321 ; GFX9-MIR-NEXT: {{ $}}
322 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
323 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
324 ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
325 ; GFX9-MIR-NEXT: SI_RETURN
326 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
327 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
331 define float @ds_fmax_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
332 ; GFX8-LABEL: ds_fmax_f32_vv_volatile:
334 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX8-NEXT: s_mov_b32 m0, -1
336 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
337 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
338 ; GFX8-NEXT: s_setpc_b64 s[30:31]
340 ; GFX9-LABEL: ds_fmax_f32_vv_volatile:
342 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
344 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
345 ; GFX9-NEXT: s_setpc_b64 s[30:31]
346 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile
347 ; GFX8-MIR: bb.1 (%ir-block.0):
348 ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1
349 ; GFX8-MIR-NEXT: {{ $}}
350 ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
351 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
352 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1
353 ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
354 ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
355 ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0
356 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile
357 ; GFX9-MIR: bb.1 (%ir-block.0):
358 ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1
359 ; GFX9-MIR-NEXT: {{ $}}
360 ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
361 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
362 ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
363 ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
364 ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0
365 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
369 declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
371 attributes #0 = { argmemonly nounwind willreturn }