1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s
4 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -filetype=obj -amdgpu-use-divergent-register-indexing < %s | llvm-readobj -r - | FileCheck --check-prefix=RELS %s
5 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,MUBUF,GFX10_W32-MUBUF,GFX9_10-MUBUF %s
6 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global,+wavefrontsize64 -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,MUBUF,GFX10_W64-MUBUF,GFX9_10-MUBUF %s
7 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,FLATSCR,GFX9-FLATSCR %s
8 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1030 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,FLATSCR,GFX10-FLATSCR %s
9 ; RUN: llc -march=amdgcn -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,FLATSCR,GFX9-FLATSCR-PAL %s
10 ; RUN: llc -march=amdgcn -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,FLATSCR,GFX10-FLATSCR-PAL %s
12 ; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0
13 ; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1
15 ; This used to fail due to a v_add_i32 instruction with an illegal immediate
16 ; operand that was created during Local Stack Slot Allocation. Test case derived
17 ; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
19 ; GCN-LABEL: {{^}}ps_main:
21 ; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2
22 ; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0
23 ; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
25 ; GFX10-FLATSCR: s_add_u32 s0, s0, s2
26 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
27 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
28 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
30 ; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
31 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
32 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
33 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
34 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
35 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
36 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
37 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
38 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
39 ; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
40 ; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
42 ; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
43 ; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
44 ; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
45 ; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
46 ; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
47 ; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
48 ; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
49 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
50 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
52 ; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
53 ; MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
54 ; MUBUF-DAG: s_mov_b32 s2, -1
55 ; SI-DAG: s_mov_b32 s3, 0xe8f000
56 ; VI-DAG: s_mov_b32 s3, 0xe80000
57 ; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000
58 ; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000
59 ; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000
61 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
63 ; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
64 ; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
66 ; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:
68 ; MUBUF-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
69 ; MUBUF-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
70 ; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
71 ; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
72 ; GCN-NOT: s_mov_b32 s0
74 ; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]]
75 ; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]]
77 ; MUBUF: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
78 ; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
79 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off
80 define amdgpu_ps float @ps_main(i32 %idx) {
81 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
82 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
83 %r = fadd float %v1, %v2
87 ; GCN-LABEL: {{^}}vs_main:
88 ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
89 ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
91 ; GFX10-FLATSCR: s_add_u32 s0, s0, s2
92 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
93 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
94 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
96 ; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
97 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
98 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
99 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
100 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
101 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
102 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
103 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
104 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
105 ; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
107 ; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
108 ; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
109 ; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
110 ; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
111 ; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
112 ; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
113 ; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
114 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
115 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
117 ; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
118 ; GCN-NOT: s_mov_b32 s0
120 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
122 ; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
123 ; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
125 ; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
126 ; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
128 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
129 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
131 define amdgpu_vs float @vs_main(i32 %idx) {
132 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
133 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
134 %r = fadd float %v1, %v2
138 ; GCN-LABEL: {{^}}cs_main:
139 ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
140 ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
142 ; GFX10-FLATSCR: s_add_u32 s0, s0, s2
143 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
144 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
145 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
147 ; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
148 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
149 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10
150 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
151 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
152 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
153 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
154 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
155 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
156 ; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
158 ; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
159 ; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
160 ; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10
161 ; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
162 ; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
163 ; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
164 ; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
165 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
166 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
168 ; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
170 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
172 ; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
173 ; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
175 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
176 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
177 define amdgpu_cs float @cs_main(i32 %idx) {
178 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
179 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
180 %r = fadd float %v1, %v2
184 ; GCN-LABEL: {{^}}hs_main:
185 ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
186 ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
188 ; GFX10-FLATSCR: s_add_u32 s0, s0, s5
189 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
190 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
191 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
193 ; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
194 ; SIVI-NOT: s_mov_b32 s0
195 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
196 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
198 ; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
199 ; GFX9_10-NOT: s_mov_b32 s5
200 ; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
201 ; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
203 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
204 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
205 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
206 define amdgpu_hs float @hs_main(i32 %idx) {
207 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
208 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
209 %r = fadd float %v1, %v2
213 ; GCN-LABEL: {{^}}gs_main:
214 ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
215 ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
217 ; GFX10-FLATSCR: s_add_u32 s0, s0, s5
218 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
219 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
220 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
222 ; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
223 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
224 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
225 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
226 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
227 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
228 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
229 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
230 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
231 ; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
233 ; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
234 ; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
235 ; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
236 ; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
237 ; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
238 ; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
239 ; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
240 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
241 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
243 ; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
244 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
245 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
247 ; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
248 ; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
249 ; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
251 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
252 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
253 ; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
254 define amdgpu_gs float @gs_main(i32 %idx) {
255 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
256 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
257 %r = fadd float %v1, %v2
261 ; Mesa GS and HS shaders have the preloaded scratch wave offset SGPR fixed at
262 ; SGPR5, and the inreg implementation is used to reference it in the IR. The
263 ; following tests confirm the shader and anything inserted after the return
264 ; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset.
266 ; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset:
267 ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
268 ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
270 ; GFX10-FLATSCR: s_add_u32 s0, s0, s5
271 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
272 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
273 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
275 ; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
276 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
277 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
278 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
279 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
280 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
281 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
282 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
283 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
284 ; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
286 ; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
287 ; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
288 ; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
289 ; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
290 ; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
291 ; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
292 ; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
293 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
294 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
296 ; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
297 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
299 ; SIVI-NOT: s_mov_b32 s6
300 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
301 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
303 ; GFX9_10-NOT: s_mov_b32 s5
304 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
305 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
307 ; MUBUF-DAG: s_mov_b32 s2, s5
309 ; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
310 ; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
311 define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
312 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
313 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
314 %f = fadd float %v1, %v2
315 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2
316 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3
317 ret <{i32, i32, i32, float}> %r2
320 ; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset:
321 ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
322 ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
324 ; GFX10-FLATSCR: s_add_u32 s0, s0, s5
325 ; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
326 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
327 ; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
329 ; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
330 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
331 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
332 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
333 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
334 ; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
335 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
336 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
337 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
338 ; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
340 ; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
341 ; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
342 ; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
343 ; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
344 ; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
345 ; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
346 ; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
347 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
348 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
350 ; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
351 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
353 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
354 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
356 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
357 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
359 ; MUBUF-DAG: s_mov_b32 s2, s5
361 ; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
362 ; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
363 define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
364 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
365 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
366 %f = fadd float %v1, %v2
367 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2
368 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3
369 ret <{i32, i32, i32, float}> %r2