1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -O0 -global-isel -stop-after=irtranslator -verify-machineinstrs -o - %s | FileCheck %s
4 define amdgpu_kernel void @asm_convergent() convergent{
5 ; CHECK-LABEL: name: asm_convergent
6 ; CHECK: bb.1 (%ir-block.0):
7 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
9 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
10 ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !0
11 ; CHECK-NEXT: S_ENDPGM 0
12 call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0
16 define amdgpu_kernel void @asm_simple_memory_clobber() {
17 ; CHECK-LABEL: name: asm_simple_memory_clobber
18 ; CHECK: bb.1 (%ir-block.0):
19 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
21 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
22 ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0
23 ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !0
24 ; CHECK-NEXT: S_ENDPGM 0
25 call void asm sideeffect "", "~{memory}"(), !srcloc !0
26 call void asm sideeffect "", ""(), !srcloc !0
30 define amdgpu_kernel void @asm_simple_vgpr_clobber() {
31 ; CHECK-LABEL: name: asm_simple_vgpr_clobber
32 ; CHECK: bb.1 (%ir-block.0):
33 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
36 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !0
37 ; CHECK-NEXT: S_ENDPGM 0
38 call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0
42 define amdgpu_kernel void @asm_simple_sgpr_clobber() {
43 ; CHECK-LABEL: name: asm_simple_sgpr_clobber
44 ; CHECK: bb.1 (%ir-block.0):
45 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
47 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
48 ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !0
49 ; CHECK-NEXT: S_ENDPGM 0
50 call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0
54 define amdgpu_kernel void @asm_simple_agpr_clobber() {
55 ; CHECK-LABEL: name: asm_simple_agpr_clobber
56 ; CHECK: bb.1 (%ir-block.0):
57 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
59 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
60 ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !0
61 ; CHECK-NEXT: S_ENDPGM 0
62 call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0
66 define i32 @asm_vgpr_early_clobber() {
67 ; CHECK-LABEL: name: asm_vgpr_early_clobber
68 ; CHECK: bb.1 (%ir-block.0):
69 ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2490379 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2490379 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0
70 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
71 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
72 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
73 ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
74 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
75 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0
76 %asmresult = extractvalue { i32, i32 } %1, 0
77 %asmresult1 = extractvalue { i32, i32 } %1, 1
78 %add = add i32 %asmresult, %asmresult1
82 define i32 @test_specific_vgpr_output() nounwind {
83 ; CHECK-LABEL: name: test_specific_vgpr_output
85 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1
86 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1
87 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
88 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
90 %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind
94 define i32 @test_single_vgpr_output() nounwind {
95 ; CHECK-LABEL: name: test_single_vgpr_output
97 ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2490378 /* regdef:VGPR_32 */, def %8
98 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
99 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
100 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
102 %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind
106 define i32 @test_single_sgpr_output_s32() nounwind {
107 ; CHECK-LABEL: name: test_single_sgpr_output_s32
109 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %8
110 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
111 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
112 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
114 %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
118 ; Check support for returning several floats
119 define float @test_multiple_register_outputs_same() #0 {
120 ; CHECK-LABEL: name: test_multiple_register_outputs_same
121 ; CHECK: bb.1 (%ir-block.0):
122 ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2490378 /* regdef:VGPR_32 */, def %8, 2490378 /* regdef:VGPR_32 */, def %9
123 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
124 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
125 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
126 ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32)
127 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
128 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"()
129 %asmresult = extractvalue { float, float } %1, 0
130 %asmresult1 = extractvalue { float, float } %1, 1
131 %add = fadd float %asmresult, %asmresult1
135 ; Check support for returning several floats
136 define double @test_multiple_register_outputs_mixed() #0 {
137 ; CHECK-LABEL: name: test_multiple_register_outputs_mixed
138 ; CHECK: bb.1 (%ir-block.0):
139 ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2490378 /* regdef:VGPR_32 */, def %8, 3801098 /* regdef:VReg_64 */, def %9
140 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
141 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9
142 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
143 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
144 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
145 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
146 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"()
147 %asmresult = extractvalue { float, double } %1, 1
148 ret double %asmresult
152 define float @test_vector_output() nounwind {
153 ; CHECK-LABEL: name: test_vector_output
154 ; CHECK: bb.1 (%ir-block.0):
155 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
156 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15
157 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15
158 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32)
159 ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32)
160 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
161 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind
162 %2 = extractelement <2 x float> %1, i32 0
166 define amdgpu_kernel void @test_input_vgpr_imm() {
167 ; CHECK-LABEL: name: test_input_vgpr_imm
168 ; CHECK: bb.1 (%ir-block.0):
169 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
171 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
172 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
173 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32)
174 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 2490377 /* reguse:VGPR_32 */, [[COPY1]]
175 ; CHECK-NEXT: S_ENDPGM 0
176 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42)
180 define amdgpu_kernel void @test_input_sgpr_imm() {
181 ; CHECK-LABEL: name: test_input_sgpr_imm
182 ; CHECK: bb.1 (%ir-block.0):
183 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
185 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
186 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
187 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32)
188 ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[COPY1]]
189 ; CHECK-NEXT: S_ENDPGM 0
190 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42)
194 define amdgpu_kernel void @test_input_imm() {
195 ; CHECK-LABEL: name: test_input_imm
196 ; CHECK: bb.1 (%ir-block.0):
197 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
199 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
200 ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42
201 ; CHECK-NEXT: INLINEASM &"s_mov_b64 s[0:1], $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42
202 ; CHECK-NEXT: S_ENDPGM 0
203 call void asm sideeffect "s_mov_b32 s0, $0", "i"(i32 42)
204 call void asm sideeffect "s_mov_b64 s[0:1], $0", "i"(i64 42)
208 define float @test_input_vgpr(i32 %src) nounwind {
209 ; CHECK-LABEL: name: test_input_vgpr
211 ; CHECK-NEXT: liveins: $vgpr0
213 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
214 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
215 ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2490378 /* regdef:VGPR_32 */, def %9, 2490377 /* reguse:VGPR_32 */, [[COPY1]]
216 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
217 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
218 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
220 %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind
224 define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind {
225 ; CHECK-LABEL: name: test_memory_constraint
226 ; CHECK: bb.1 (%ir-block.0):
227 ; CHECK-NEXT: liveins: $vgpr0
229 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
230 ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2490378 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
231 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
232 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
233 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
234 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a)
238 define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
239 ; CHECK-LABEL: name: test_vgpr_matching_constraint
240 ; CHECK: bb.1 (%ir-block.0):
241 ; CHECK-NEXT: liveins: $vgpr0
243 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
244 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
245 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
246 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
247 ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2490378 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
248 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11
249 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
250 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
252 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and)
256 define i32 @test_sgpr_matching_constraint() nounwind {
257 ; CHECK-LABEL: name: test_sgpr_matching_constraint
259 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %8
260 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
261 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %10
262 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10
263 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32)
264 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
265 ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %12, 2621449 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
266 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12
267 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32)
268 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
270 %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
271 %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind
272 %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,0"(i32 %asm0, i32 %asm1) nounwind
276 define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
277 ; CHECK-LABEL: name: test_many_matching_constraints
278 ; CHECK: bb.1 (%ir-block.0):
279 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
281 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
282 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
283 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
284 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
285 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
286 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
287 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
288 ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2490378 /* regdef:VGPR_32 */, def %11, 2490378 /* regdef:VGPR_32 */, def %12, 2490378 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
289 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11
290 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12
291 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13
292 ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
293 ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
294 ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
295 ; CHECK-NEXT: SI_RETURN
296 %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b)
297 %asmresult0 = extractvalue {i32, i32, i32} %asm, 0
298 store i32 %asmresult0, ptr addrspace(1) undef
299 %asmresult1 = extractvalue {i32, i32, i32} %asm, 1
300 store i32 %asmresult1, ptr addrspace(1) undef
301 %asmresult2 = extractvalue {i32, i32, i32} %asm, 2
302 store i32 %asmresult2, ptr addrspace(1) undef
306 define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
307 ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint
309 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %8
310 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
311 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
312 ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2490378 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
313 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
314 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
315 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
317 %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
318 %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind
322 define amdgpu_kernel void @asm_constraint_n_n() {
323 ; CHECK-LABEL: name: asm_constraint_n_n
324 ; CHECK: bb.1 (%ir-block.0):
325 ; CHECK-NEXT: liveins: $sgpr8_sgpr9
327 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
328 ; CHECK-NEXT: INLINEASM &"s_trap ${0:n}", 1 /* sideeffect attdialect */, 13 /* imm */, 10
329 ; CHECK-NEXT: S_ENDPGM 0
330 tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1