1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s
2 ; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s
6 declare hidden void @external_void_func_i1(i1) #0
7 declare hidden void @external_void_func_i1_signext(i1 signext) #0
8 declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
10 declare hidden void @external_void_func_i8(i8) #0
11 declare hidden void @external_void_func_i8_signext(i8 signext) #0
12 declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
14 declare hidden void @external_void_func_i16(i16) #0
15 declare hidden void @external_void_func_i16_signext(i16 signext) #0
16 declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
18 declare hidden void @external_void_func_i32(i32) #0
19 declare hidden void @external_void_func_i64(i64) #0
20 declare hidden void @external_void_func_v2i64(<2 x i64>) #0
21 declare hidden void @external_void_func_v3i64(<3 x i64>) #0
22 declare hidden void @external_void_func_v4i64(<4 x i64>) #0
24 declare hidden void @external_void_func_f16(half) #0
25 declare hidden void @external_void_func_f32(float) #0
26 declare hidden void @external_void_func_f64(double) #0
27 declare hidden void @external_void_func_v2f32(<2 x float>) #0
28 declare hidden void @external_void_func_v2f64(<2 x double>) #0
29 declare hidden void @external_void_func_v3f32(<3 x float>) #0
30 declare hidden void @external_void_func_v3f64(<3 x double>) #0
31 declare hidden void @external_void_func_v5f32(<5 x float>) #0
33 declare hidden void @external_void_func_v2i16(<2 x i16>) #0
34 declare hidden void @external_void_func_v2f16(<2 x half>) #0
35 declare hidden void @external_void_func_v3i16(<3 x i16>) #0
36 declare hidden void @external_void_func_v3f16(<3 x half>) #0
37 declare hidden void @external_void_func_v4i16(<4 x i16>) #0
38 declare hidden void @external_void_func_v4f16(<4 x half>) #0
40 declare hidden void @external_void_func_v2i32(<2 x i32>) #0
41 declare hidden void @external_void_func_v3i32(<3 x i32>) #0
42 declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
43 declare hidden void @external_void_func_v4i32(<4 x i32>) #0
44 declare hidden void @external_void_func_v5i32(<5 x i32>) #0
45 declare hidden void @external_void_func_v8i32(<8 x i32>) #0
46 declare hidden void @external_void_func_v16i32(<16 x i32>) #0
47 declare hidden void @external_void_func_v32i32(<32 x i32>) #0
48 declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
50 ; return value and argument
51 declare hidden i32 @external_i32_func_i32(i32) #0
54 declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
55 declare hidden void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval) #0
56 declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret, { i8, i32 } addrspace(5)* byval) #0
58 declare hidden void @external_void_func_v16i8(<16 x i8>) #0
61 ; FIXME: Should be passing -1
62 ; GCN-LABEL: {{^}}test_call_external_void_func_i1_imm:
63 ; MESA: s_mov_b32 s36, SCRATCH_RSRC_DWORD
65 ; MESA-DAG: s_mov_b64 s[0:1], s[36:37]
67 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
68 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4
69 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+4
70 ; GCN-DAG: v_mov_b32_e32 v0, 1{{$}}
71 ; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
73 ; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
75 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
76 call void @external_void_func_i1(i1 true)
80 ; GCN-LABEL: {{^}}test_call_external_void_func_i1_signext:
81 ; MESA: s_mov_b32 s33, s3{{$}}
82 ; HSA: s_mov_b32 s33, s9{{$}}
84 ; HSA: buffer_load_ubyte [[VAR:v[0-9]+]]
85 ; HSA: s_mov_b32 s32, s33
86 ; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
87 ; MESA-DAG: s_mov_b32 s32, s33{{$}}
90 ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
91 ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4
92 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+4
94 ; GCN: s_waitcnt vmcnt(0)
95 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
96 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
98 define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
99 %var = load volatile i1, i1 addrspace(1)* undef
100 call void @external_void_func_i1_signext(i1 %var)
104 ; FIXME: load should be scheduled before getpc
105 ; GCN-LABEL: {{^}}test_call_external_void_func_i1_zeroext:
106 ; MESA: s_mov_b32 s33, s3{{$}}
108 ; HSA: buffer_load_ubyte v0
109 ; HSA-DAG: s_mov_b32 s32, s33{{$}}
111 ; MESA: buffer_load_ubyte v0
112 ; MESA-DAG: s_mov_b32 s32, s33{{$}}
114 ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
115 ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4
116 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+4
119 ; GCN: s_waitcnt vmcnt(0)
120 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
121 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
123 define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
124 %var = load volatile i1, i1 addrspace(1)* undef
125 call void @external_void_func_i1_zeroext(i1 %var)
129 ; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm:
130 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
132 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
133 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4
134 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+4
135 ; GCN-DAG: v_mov_b32_e32 v0, 0x7b
137 ; GCN-DAG: s_mov_b32 s32, s33{{$}}
139 ; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
141 define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
142 call void @external_void_func_i8(i8 123)
146 ; FIXME: don't wait before call
147 ; GCN-LABEL: {{^}}test_call_external_void_func_i8_signext:
148 ; HSA-DAG: s_mov_b32 s33, s9{{$}}
149 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
151 ; GCN-DAG: buffer_load_sbyte v0
152 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
153 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4
154 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+4
156 ; GCN-DAG: s_mov_b32 s32, s3
159 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
161 define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
162 %var = load volatile i8, i8 addrspace(1)* undef
163 call void @external_void_func_i8_signext(i8 %var)
167 ; GCN-LABEL: {{^}}test_call_external_void_func_i8_zeroext:
168 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
169 ; HSA-DAG: s_mov_b32 s33, s9{{$}}
171 ; GCN-DAG: buffer_load_ubyte v0
172 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
173 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4
174 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+4
176 ; GCN-DAG: s_mov_b32 s32, s33
179 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
181 define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
182 %var = load volatile i8, i8 addrspace(1)* undef
183 call void @external_void_func_i8_zeroext(i8 %var)
187 ; GCN-LABEL: {{^}}test_call_external_void_func_i16_imm:
188 ; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
190 ; GCN-DAG: s_mov_b32 s32, s33
193 define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
194 call void @external_void_func_i16(i16 123)
198 ; GCN-LABEL: {{^}}test_call_external_void_func_i16_signext:
199 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
201 ; GCN-DAG: buffer_load_sshort v0
202 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
203 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4
204 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+4
206 ; GCN-DAG: s_mov_b32 s32, s33
209 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
211 define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
212 %var = load volatile i16, i16 addrspace(1)* undef
213 call void @external_void_func_i16_signext(i16 %var)
217 ; GCN-LABEL: {{^}}test_call_external_void_func_i16_zeroext:
218 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
221 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
222 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4
223 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+4
225 ; GCN-DAG: s_mov_b32 s32, s33
228 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
230 define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
231 %var = load volatile i16, i16 addrspace(1)* undef
232 call void @external_void_func_i16_zeroext(i16 %var)
236 ; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm:
237 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
239 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
240 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4
241 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+4
242 ; GCN-DAG: v_mov_b32_e32 v0, 42
243 ; GCN-DAG: s_mov_b32 s32, s33
245 ; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
247 define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
248 call void @external_void_func_i32(i32 42)
252 ; GCN-LABEL: {{^}}test_call_external_void_func_i64_imm:
253 ; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
254 ; GCN-DAG: v_mov_b32_e32 v1, 0{{$}}
255 ; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
256 ; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i64@rel32@lo+4
257 ; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i64@rel32@hi+4
258 ; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
260 define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
261 call void @external_void_func_i64(i64 123)
265 ; GCN-LABEL: {{^}}test_call_external_void_func_v2i64:
266 ; GCN: buffer_load_dwordx4 v[0:3]
269 define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
270 %val = load <2 x i64>, <2 x i64> addrspace(1)* null
271 call void @external_void_func_v2i64(<2 x i64> %val)
275 ; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm:
276 ; GCN-DAG: v_mov_b32_e32 v0, 1
277 ; GCN-DAG: v_mov_b32_e32 v1, 2
278 ; GCN-DAG: v_mov_b32_e32 v2, 3
279 ; GCN-DAG: v_mov_b32_e32 v3, 4
281 define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
282 call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
286 ; GCN-LABEL: {{^}}test_call_external_void_func_v3i64:
287 ; GCN: buffer_load_dwordx4 v[0:3]
288 ; GCN: v_mov_b32_e32 v4, 1
289 ; GCN: v_mov_b32_e32 v5, 2
292 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
293 %load = load <2 x i64>, <2 x i64> addrspace(1)* null
294 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
296 call void @external_void_func_v3i64(<3 x i64> %val)
300 ; GCN-LABEL: {{^}}test_call_external_void_func_v4i64:
301 ; GCN: buffer_load_dwordx4 v[0:3]
302 ; GCN-DAG: v_mov_b32_e32 v4, 1
303 ; GCN-DAG: v_mov_b32_e32 v5, 2
304 ; GCN-DAG: v_mov_b32_e32 v6, 3
305 ; GCN-DAG: v_mov_b32_e32 v7, 4
309 define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
310 %load = load <2 x i64>, <2 x i64> addrspace(1)* null
311 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312 call void @external_void_func_v4i64(<4 x i64> %val)
316 ; GCN-LABEL: {{^}}test_call_external_void_func_f16_imm:
317 ; VI: v_mov_b32_e32 v0, 0x4400
318 ; CI: v_mov_b32_e32 v0, 4.0
321 define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
322 call void @external_void_func_f16(half 4.0)
326 ; GCN-LABEL: {{^}}test_call_external_void_func_f32_imm:
327 ; GCN: v_mov_b32_e32 v0, 4.0
330 define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
331 call void @external_void_func_f32(float 4.0)
335 ; GCN-LABEL: {{^}}test_call_external_void_func_v2f32_imm:
336 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
337 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
339 define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
340 call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
344 ; GCN-LABEL: {{^}}test_call_external_void_func_v3f32_imm:
345 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
346 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
347 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
350 define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
351 call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
355 ; GCN-LABEL: {{^}}test_call_external_void_func_v5f32_imm:
356 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
357 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
358 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
359 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
360 ; GCN-DAG: v_mov_b32_e32 v4, 0.5
363 define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
364 call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
368 ; GCN-LABEL: {{^}}test_call_external_void_func_f64_imm:
369 ; GCN: v_mov_b32_e32 v0, 0{{$}}
370 ; GCN: v_mov_b32_e32 v1, 0x40100000
372 define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
373 call void @external_void_func_f64(double 4.0)
377 ; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm:
378 ; GCN: v_mov_b32_e32 v0, 0{{$}}
379 ; GCN: v_mov_b32_e32 v1, 2.0
380 ; GCN: v_mov_b32_e32 v2, 0{{$}}
381 ; GCN: v_mov_b32_e32 v3, 0x40100000
383 define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
384 call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
388 ; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm:
389 ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
390 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
391 ; GCN-DAG: v_mov_b32_e32 v2, 0{{$}}
392 ; GCN-DAG: v_mov_b32_e32 v3, 0x40100000
393 ; GCN-DAG: v_mov_b32_e32 v4, 0{{$}}
394 ; GCN-DAG: v_mov_b32_e32 v5, 0x40200000
395 ; GCN-DAG: s_swappc_b64
396 define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
397 call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
401 ; GCN-LABEL: {{^}}test_call_external_void_func_v2i16:
402 ; GFX9: buffer_load_dword v0
405 define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
406 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
407 call void @external_void_func_v2i16(<2 x i16> %val)
411 ; GCN-LABEL: {{^}}test_call_external_void_func_v3i16:
412 ; GFX9: buffer_load_dwordx2 v[0:1]
416 define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
417 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
418 call void @external_void_func_v3i16(<3 x i16> %val)
422 ; GCN-LABEL: {{^}}test_call_external_void_func_v3f16:
423 ; GFX9: buffer_load_dwordx2 v[0:1]
427 define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
428 %val = load <3 x half>, <3 x half> addrspace(1)* undef
429 call void @external_void_func_v3f16(<3 x half> %val)
433 ; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
434 ; GFX9: v_mov_b32_e32 v0, 0x20001
435 ; GFX9: v_mov_b32_e32 v1, 3
437 define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
438 call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
442 ; GCN-LABEL: {{^}}test_call_external_void_func_v3f16_imm:
443 ; GFX9: v_mov_b32_e32 v0, 0x40003c00
444 ; GFX9: v_mov_b32_e32 v1, 0x4400
446 define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
447 call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
451 ; GCN-LABEL: {{^}}test_call_external_void_func_v4i16:
452 ; GFX9: buffer_load_dwordx2 v[0:1]
456 define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
457 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
458 call void @external_void_func_v4i16(<4 x i16> %val)
462 ; GCN-LABEL: {{^}}test_call_external_void_func_v4i16_imm:
463 ; GFX9-DAG: v_mov_b32_e32 v0, 0x20001
464 ; GFX9-DAG: v_mov_b32_e32 v1, 0x40003
466 define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
467 call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
471 ; GCN-LABEL: {{^}}test_call_external_void_func_v2f16:
472 ; GFX9: buffer_load_dword v0
475 define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
476 %val = load <2 x half>, <2 x half> addrspace(1)* undef
477 call void @external_void_func_v2f16(<2 x half> %val)
481 ; GCN-LABEL: {{^}}test_call_external_void_func_v2i32:
482 ; GCN: buffer_load_dwordx2 v[0:1]
485 define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
486 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
487 call void @external_void_func_v2i32(<2 x i32> %val)
491 ; GCN-LABEL: {{^}}test_call_external_void_func_v2i32_imm:
492 ; GCN-DAG: v_mov_b32_e32 v0, 1
493 ; GCN-DAG: v_mov_b32_e32 v1, 2
495 define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
496 call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
500 ; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_imm:
501 ; HSA-DAG: s_mov_b32 s33, s9
502 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
505 ; GCN-DAG: v_mov_b32_e32 v0, 3
506 ; GCN-DAG: v_mov_b32_e32 v1, 4
507 ; GCN-DAG: v_mov_b32_e32 v2, 5
510 define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
511 call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
515 ; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_i32:
516 ; GCN-DAG: v_mov_b32_e32 v0, 3
517 ; GCN-DAG: v_mov_b32_e32 v1, 4
518 ; GCN-DAG: v_mov_b32_e32 v2, 5
519 ; GCN-DAG: v_mov_b32_e32 v3, 6
520 define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
521 call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
525 ; GCN-LABEL: {{^}}test_call_external_void_func_v4i32:
526 ; GCN: buffer_load_dwordx4 v[0:3]
529 define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
530 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
531 call void @external_void_func_v4i32(<4 x i32> %val)
535 ; GCN-LABEL: {{^}}test_call_external_void_func_v4i32_imm:
536 ; GCN-DAG: v_mov_b32_e32 v0, 1
537 ; GCN-DAG: v_mov_b32_e32 v1, 2
538 ; GCN-DAG: v_mov_b32_e32 v2, 3
539 ; GCN-DAG: v_mov_b32_e32 v3, 4
541 define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
542 call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
546 ; GCN-LABEL: {{^}}test_call_external_void_func_v5i32_imm:
547 ; GCN-DAG: v_mov_b32_e32 v0, 1
548 ; GCN-DAG: v_mov_b32_e32 v1, 2
549 ; GCN-DAG: v_mov_b32_e32 v2, 3
550 ; GCN-DAG: v_mov_b32_e32 v3, 4
551 ; GCN-DAG: v_mov_b32_e32 v4, 5
554 define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
555 call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
559 ; GCN-LABEL: {{^}}test_call_external_void_func_v8i32:
560 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
561 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
564 define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
565 %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
566 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
567 call void @external_void_func_v8i32(<8 x i32> %val)
571 ; GCN-LABEL: {{^}}test_call_external_void_func_v8i32_imm:
572 ; GCN-DAG: v_mov_b32_e32 v0, 1
573 ; GCN-DAG: v_mov_b32_e32 v1, 2
574 ; GCN-DAG: v_mov_b32_e32 v2, 3
575 ; GCN-DAG: v_mov_b32_e32 v3, 4
576 ; GCN-DAG: v_mov_b32_e32 v4, 5
577 ; GCN-DAG: v_mov_b32_e32 v5, 6
578 ; GCN-DAG: v_mov_b32_e32 v6, 7
579 ; GCN-DAG: v_mov_b32_e32 v7, 8
581 define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
582 call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
586 ; GCN-LABEL: {{^}}test_call_external_void_func_v16i32:
587 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
588 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
589 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
590 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
593 define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
594 %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
595 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
596 call void @external_void_func_v16i32(<16 x i32> %val)
600 ; GCN-LABEL: {{^}}test_call_external_void_func_v32i32:
601 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
602 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
603 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
604 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
605 ; GCN-DAG: buffer_load_dwordx4 v[16:19], off
606 ; GCN-DAG: buffer_load_dwordx4 v[20:23], off
607 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
608 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
611 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
612 %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
613 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
614 call void @external_void_func_v32i32(<32 x i32> %val)
618 ; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32:
619 ; HSA-DAG: s_mov_b32 s33, s9
620 ; HSA-NOT: s_add_u32 s32
622 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
623 ; MESA-NOT: s_add_u32 s32
625 ; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
626 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
627 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
628 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
629 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
630 ; GCN-DAG: buffer_load_dwordx4 v[16:19], off
631 ; GCN-DAG: buffer_load_dwordx4 v[20:23], off
632 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
633 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
636 ; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32{{$}}
639 define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
640 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
641 %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
642 %val1 = load i32, i32 addrspace(1)* undef
643 call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
647 ; GCN-LABEL: {{^}}test_call_external_i32_func_i32_imm:
648 ; GCN: v_mov_b32_e32 v0, 42
649 ; GCN: s_swappc_b64 s[30:31],
651 ; GCN: buffer_store_dword v0, off, s[36:39], 0
652 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
653 %val = call i32 @external_i32_func_i32(i32 42)
654 store volatile i32 %val, i32 addrspace(1)* %out
658 ; GCN-LABEL: {{^}}test_call_external_void_func_struct_i8_i32:
659 ; GCN: buffer_load_ubyte v0, off
660 ; GCN: buffer_load_dword v1, off
663 define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
664 %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
665 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
666 call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
670 ; GCN-LABEL: {{^}}test_call_external_void_func_byval_struct_i8_i32:
671 ; GCN-DAG: s_add_u32 [[SP:s[0-9]+]], s33, 0x400{{$}}
673 ; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3
674 ; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8
675 ; MESA-DAG: buffer_store_byte [[VAL0]], off, s[36:39], s33 offset:8
676 ; MESA-DAG: buffer_store_dword [[VAL1]], off, s[36:39], s33 offset:12
678 ; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], s33 offset:8
679 ; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], s33 offset:12
681 ; GCN-NOT: s_add_u32 [[SP]],
683 ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
684 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
686 ; HSA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]]{{$}}
687 ; HSA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:4
690 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8
691 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12
693 ; MESA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]]{{$}}
694 ; MESA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:4
696 ; GCN-NEXT: s_swappc_b64
698 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
699 %val = alloca { i8, i32 }, align 4, addrspace(5)
700 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
701 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
702 store i8 3, i8 addrspace(5)* %gep0
703 store i32 8, i32 addrspace(5)* %gep1
704 call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %val)
708 ; GCN-LABEL: {{^}}test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
709 ; MESA-DAG: s_add_u32 [[SP:s[0-9]+]], [[FP_REG:s[0-9]+]], 0x800{{$}}
710 ; HSA-DAG: s_add_u32 [[SP:s[0-9]+]], [[FP_REG:s[0-9]+]], 0x800{{$}}
712 ; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3
713 ; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8
714 ; GCN-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8
715 ; GCN-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
717 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8
718 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
720 ; GCN-NOT: s_add_u32 [[SP]]
721 ; GCN-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]]{{$}}
722 ; GCN-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
724 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
725 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20
726 ; GCN-NOT: s_sub_u32 [[SP]]
728 ; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
729 ; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
730 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
731 %in.val = alloca { i8, i32 }, align 4, addrspace(5)
732 %out.val = alloca { i8, i32 }, align 4, addrspace(5)
733 %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
734 %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
735 store i8 3, i8 addrspace(5)* %in.gep0
736 store i32 8, i32 addrspace(5)* %in.gep1
737 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val)
738 %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
739 %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
740 %out.val0 = load i8, i8 addrspace(5)* %out.gep0
741 %out.val1 = load i32, i32 addrspace(5)* %out.gep1
743 store volatile i8 %out.val0, i8 addrspace(1)* undef
744 store volatile i32 %out.val1, i32 addrspace(1)* undef
748 ; GCN-LABEL: {{^}}test_call_external_void_func_v16i8:
749 define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
750 %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
751 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
752 call void @external_void_func_v16i8(<16 x i8> %val)
756 ; GCN-LABEL: {{^}}stack_passed_arg_alignment_v32i32_f64:
757 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32{{$}}
758 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4
760 define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
762 call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
766 ; GCN-LABEL: {{^}}tail_call_byval_align16:
768 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
769 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
770 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16
771 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20
775 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4
776 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
777 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
778 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
781 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
783 %alloca = alloca double, align 8, addrspace(5)
784 tail call void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval align 16 %alloca)
788 ; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64:
790 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
791 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
792 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4
793 ; GCN: buffer_load_dword v33, off, s[0:3], s32{{$}}
795 ; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}}
796 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
797 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
798 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
801 define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
803 tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
807 ; GCN-LABEL: {{^}}stack_12xv3i32:
808 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
809 ; GCN: buffer_store_dword [[REG12]], {{.*$}}
810 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
811 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
812 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
813 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
814 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
815 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
816 ; GCN: v_mov_b32_e32 v31, 11
818 define void @stack_12xv3i32() #0 {
820 call void @external_void_func_12xv3i32(
821 <3 x i32><i32 0, i32 0, i32 0>,
822 <3 x i32><i32 1, i32 1, i32 1>,
823 <3 x i32><i32 2, i32 2, i32 2>,
824 <3 x i32><i32 3, i32 3, i32 3>,
825 <3 x i32><i32 4, i32 4, i32 4>,
826 <3 x i32><i32 5, i32 5, i32 5>,
827 <3 x i32><i32 6, i32 6, i32 6>,
828 <3 x i32><i32 7, i32 7, i32 7>,
829 <3 x i32><i32 8, i32 8, i32 8>,
830 <3 x i32><i32 9, i32 9, i32 9>,
831 <3 x i32><i32 10, i32 11, i32 12>,
832 <3 x i32><i32 13, i32 14, i32 15>)
836 ; GCN-LABEL: {{^}}stack_12xv3f32:
837 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
838 ; GCN: buffer_store_dword [[REG12]], {{.*$}}
839 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
840 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
841 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
842 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
843 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
844 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
845 ; GCN: v_mov_b32_e32 v31, 0x41300000
847 define void @stack_12xv3f32() #0 {
849 call void @external_void_func_12xv3f32(
850 <3 x float><float 0.0, float 0.0, float 0.0>,
851 <3 x float><float 1.0, float 1.0, float 1.0>,
852 <3 x float><float 2.0, float 2.0, float 2.0>,
853 <3 x float><float 3.0, float 3.0, float 3.0>,
854 <3 x float><float 4.0, float 4.0, float 4.0>,
855 <3 x float><float 5.0, float 5.0, float 5.0>,
856 <3 x float><float 6.0, float 6.0, float 6.0>,
857 <3 x float><float 7.0, float 7.0, float 7.0>,
858 <3 x float><float 8.0, float 8.0, float 8.0>,
859 <3 x float><float 9.0, float 9.0, float 9.0>,
860 <3 x float><float 10.0, float 11.0, float 12.0>,
861 <3 x float><float 13.0, float 14.0, float 15.0>)
865 ; GCN-LABEL: {{^}}stack_8xv5i32:
867 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
868 ; GCN: buffer_store_dword [[REG8]], {{.*$}}
869 ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
870 ; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
871 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
872 ; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
873 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
874 ; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
875 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
876 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
877 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
878 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
879 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
880 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
881 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
882 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
884 ; GCN: v_mov_b32_e32 v31, 7
886 define void @stack_8xv5i32() #0 {
888 call void @external_void_func_8xv5i32(
889 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
890 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
891 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
892 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
893 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
894 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
895 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
896 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
900 ; GCN-LABEL: {{^}}stack_8xv5f32:
901 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000
902 ; GCN: buffer_store_dword [[REG8]], {{.*$}}
903 ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000
904 ; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
905 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000
906 ; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
907 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000
908 ; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
909 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
910 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
911 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
912 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
913 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
914 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
915 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
916 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
918 ; GCN: v_mov_b32_e32 v31, 0x40e00000
920 define void @stack_8xv5f32() #0 {
922 call void @external_void_func_8xv5f32(
923 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
924 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
925 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
926 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
927 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
928 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
929 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
930 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
934 declare hidden void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval align 16) #0
935 declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
936 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
937 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
938 declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
939 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
940 declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
941 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
942 declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
943 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
944 attributes #0 = { nounwind }
945 attributes #1 = { nounwind readnone }
946 attributes #2 = { nounwind noinline }