1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
6 ; Make sure the memory operand information is preserved.
7 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-MIR %s
8 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s
11 define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
12 ; GFX8-LABEL: ds_fmax_f32_ss:
14 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
15 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
16 ; GFX8-NEXT: s_mov_b32 m0, -1
17 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
18 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
19 ; GFX8-NEXT: ; return to shader part epilog
21 ; GFX9-LABEL: ds_fmax_f32_ss:
23 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
24 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
25 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
26 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
27 ; GFX9-NEXT: ; return to shader part epilog
28 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss
29 ; GFX8-MIR: bb.1 (%ir-block.0):
30 ; GFX8-MIR: liveins: $sgpr2, $sgpr3
31 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
32 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
33 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
34 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
35 ; GFX8-MIR: $m0 = S_MOV_B32 -1
36 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
37 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
38 ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0
39 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss
40 ; GFX9-MIR: bb.1 (%ir-block.0):
41 ; GFX9-MIR: liveins: $sgpr2, $sgpr3
42 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
43 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
44 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
45 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
46 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
47 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
48 ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0
49 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
53 define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
54 ; GFX8-LABEL: ds_fmax_f32_ss_offset:
56 ; GFX8-NEXT: v_mov_b32_e32 v0, s3
57 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
58 ; GFX8-NEXT: s_mov_b32 m0, -1
59 ; GFX8-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512
60 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
61 ; GFX8-NEXT: ; return to shader part epilog
63 ; GFX9-LABEL: ds_fmax_f32_ss_offset:
65 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
66 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
67 ; GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512
68 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
69 ; GFX9-NEXT: ; return to shader part epilog
70 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset
71 ; GFX8-MIR: bb.1 (%ir-block.0):
72 ; GFX8-MIR: liveins: $sgpr2, $sgpr3
73 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
74 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
75 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
76 ; GFX8-MIR: $m0 = S_MOV_B32 -1
77 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
78 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
79 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
80 ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0
81 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset
82 ; GFX9-MIR: bb.1 (%ir-block.0):
83 ; GFX9-MIR: liveins: $sgpr2, $sgpr3
84 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
85 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
86 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
87 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
88 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
89 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
90 ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0
91 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
92 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
96 define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
97 ; GFX8-LABEL: ds_fmax_f32_ss_nortn:
99 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
100 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
101 ; GFX8-NEXT: s_mov_b32 m0, -1
102 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
103 ; GFX8-NEXT: s_endpgm
105 ; GFX9-LABEL: ds_fmax_f32_ss_nortn:
107 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
108 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
109 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
110 ; GFX9-NEXT: s_endpgm
111 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn
112 ; GFX8-MIR: bb.1 (%ir-block.0):
113 ; GFX8-MIR: liveins: $sgpr2, $sgpr3
114 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
115 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
116 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
117 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
118 ; GFX8-MIR: $m0 = S_MOV_B32 -1
119 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
120 ; GFX8-MIR: S_ENDPGM 0
121 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn
122 ; GFX9-MIR: bb.1 (%ir-block.0):
123 ; GFX9-MIR: liveins: $sgpr2, $sgpr3
124 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
125 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
126 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
127 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
128 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
129 ; GFX9-MIR: S_ENDPGM 0
130 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
134 define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
135 ; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn:
137 ; GFX8-NEXT: v_mov_b32_e32 v0, s3
138 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
139 ; GFX8-NEXT: s_mov_b32 m0, -1
140 ; GFX8-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512
141 ; GFX8-NEXT: s_endpgm
143 ; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn:
145 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
146 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
147 ; GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512
148 ; GFX9-NEXT: s_endpgm
149 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
150 ; GFX8-MIR: bb.1 (%ir-block.0):
151 ; GFX8-MIR: liveins: $sgpr2, $sgpr3
152 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
153 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
154 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
155 ; GFX8-MIR: $m0 = S_MOV_B32 -1
156 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
157 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
158 ; GFX8-MIR: S_ENDPGM 0
159 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
160 ; GFX9-MIR: bb.1 (%ir-block.0):
161 ; GFX9-MIR: liveins: $sgpr2, $sgpr3
162 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
163 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
164 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
165 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
166 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
167 ; GFX9-MIR: S_ENDPGM 0
168 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
169 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
173 define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) {
174 ; GFX8-LABEL: ds_fmax_f32_vv:
176 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GFX8-NEXT: s_mov_b32 m0, -1
178 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
179 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
180 ; GFX8-NEXT: s_setpc_b64 s[30:31]
182 ; GFX9-LABEL: ds_fmax_f32_vv:
184 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
186 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
187 ; GFX9-NEXT: s_setpc_b64 s[30:31]
188 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv
189 ; GFX8-MIR: bb.1 (%ir-block.0):
190 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
191 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
192 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
193 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
194 ; GFX8-MIR: $m0 = S_MOV_B32 -1
195 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
196 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
197 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
198 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
199 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv
200 ; GFX9-MIR: bb.1 (%ir-block.0):
201 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
202 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
203 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
204 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
205 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
206 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
207 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
208 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
209 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
213 define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
214 ; GFX8-LABEL: ds_fmax_f32_vv_offset:
216 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX8-NEXT: s_mov_b32 m0, -1
218 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512
219 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
220 ; GFX8-NEXT: s_setpc_b64 s[30:31]
222 ; GFX9-LABEL: ds_fmax_f32_vv_offset:
224 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512
226 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
227 ; GFX9-NEXT: s_setpc_b64 s[30:31]
228 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset
229 ; GFX8-MIR: bb.1 (%ir-block.0):
230 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
231 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
232 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
233 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
234 ; GFX8-MIR: $m0 = S_MOV_B32 -1
235 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
236 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
237 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
238 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
239 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset
240 ; GFX9-MIR: bb.1 (%ir-block.0):
241 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
242 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
243 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
244 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
245 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
246 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
247 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
248 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
249 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
250 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
254 define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
255 ; GFX8-LABEL: ds_fmax_f32_vv_nortn:
257 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258 ; GFX8-NEXT: s_mov_b32 m0, -1
259 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
260 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
261 ; GFX8-NEXT: s_setpc_b64 s[30:31]
263 ; GFX9-LABEL: ds_fmax_f32_vv_nortn:
265 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
267 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
268 ; GFX9-NEXT: s_setpc_b64 s[30:31]
269 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn
270 ; GFX8-MIR: bb.1 (%ir-block.0):
271 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
272 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
273 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
274 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
275 ; GFX8-MIR: $m0 = S_MOV_B32 -1
276 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
277 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
278 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]]
279 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn
280 ; GFX9-MIR: bb.1 (%ir-block.0):
281 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
282 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
283 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
284 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
285 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
286 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
287 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]]
288 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
292 define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
293 ; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn:
295 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX8-NEXT: s_mov_b32 m0, -1
297 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512
298 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
299 ; GFX8-NEXT: s_setpc_b64 s[30:31]
301 ; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn:
303 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512
305 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
306 ; GFX9-NEXT: s_setpc_b64 s[30:31]
307 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
308 ; GFX8-MIR: bb.1 (%ir-block.0):
309 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
310 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
311 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
312 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
313 ; GFX8-MIR: $m0 = S_MOV_B32 -1
314 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
315 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
316 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]]
317 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
318 ; GFX9-MIR: bb.1 (%ir-block.0):
319 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
320 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
321 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
322 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
323 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
324 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
325 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]]
326 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
327 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
331 define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
332 ; GFX8-LABEL: ds_fmax_f32_vv_volatile:
334 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX8-NEXT: s_mov_b32 m0, -1
336 ; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1
337 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
338 ; GFX8-NEXT: s_setpc_b64 s[30:31]
340 ; GFX9-LABEL: ds_fmax_f32_vv_volatile:
342 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
344 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
345 ; GFX9-NEXT: s_setpc_b64 s[30:31]
346 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile
347 ; GFX8-MIR: bb.1 (%ir-block.0):
348 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
349 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
350 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
351 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
352 ; GFX8-MIR: $m0 = S_MOV_B32 -1
353 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
354 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
355 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
356 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
357 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile
358 ; GFX9-MIR: bb.1 (%ir-block.0):
359 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
360 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
361 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
362 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
363 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
364 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
365 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
366 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
367 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
371 declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
373 attributes #0 = { argmemonly nounwind willreturn }