[DAGCombiner] Add target hook function to decide folding (mul (add x, c1), c2)
[llvm-project.git] / llvm / test / MC / AMDGPU / sopk.s
blobfb9731ce36f97b62d295a35c89cbe4a5bb2ff828
1 // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s
2 // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s
3 // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,VI %s
4 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,GFX9 %s
5 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GCN,GFX10 %s
7 // RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
8 // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
9 // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
10 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s
12 //===----------------------------------------------------------------------===//
13 // Instructions
14 //===----------------------------------------------------------------------===//
16 s_movk_i32 s2, 0x6
17 // GCN: s_movk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb0]
19 s_cmovk_i32 s2, 0x6
20 // SICI: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1]
21 // VI9: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb0]
22 // GFX10: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1]
24 s_cmpk_eq_i32 s2, 0x6
25 // SICI: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1]
26 // VI9: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1]
27 // GFX10: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1]
29 s_cmpk_lg_i32 s2, 0x6
30 // SICI: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2]
31 // VI9: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1]
32 // GFX10: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2]
34 s_cmpk_gt_i32 s2, 0x6
35 // SICI: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2]
36 // VI9: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2]
37 // GFX10: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2]
39 s_cmpk_ge_i32 s2, 0x6
40 // SICI: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3]
41 // VI9: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2]
42 // GFX10: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3]
44 s_cmpk_lt_i32 s2, 0x6
45 // SICI: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3]
46 // VI9: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3]
47 // GFX10: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3]
49 s_cmpk_le_i32 s2, 0x6
50 // SICI: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4]
51 // VI9: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3]
52 // GFX10: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4]
54 s_cmpk_eq_u32 s2, 0x6
55 // SICI: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4]
56 // VI9: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4]
57 // GFX10: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4]
59 s_cmpk_lg_u32 s2, 0x6
60 // SICI: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5]
61 // VI9: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4]
62 // GFX10: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5]
64 s_cmpk_gt_u32 s2, 0x6
65 // SICI: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5]
66 // VI9: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5]
67 // GFX10: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5]
69 s_cmpk_ge_u32 s2, 0x6
70 // SICI: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6]
71 // VI9: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5]
72 // GFX10: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6]
74 s_cmpk_lt_u32 s2, 0x6
75 // SICI: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6]
76 // VI9: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6]
77 // GFX10: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6]
79 s_cmpk_le_u32 s2, 0x6
80 // SICI: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7]
81 // VI9: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6]
82 // GFX10: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7]
84 s_cmpk_le_u32 s2, 0xFFFF
85 // SICI: s_cmpk_le_u32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb7]
86 // VI9: s_cmpk_le_u32 s2, 0xffff ; encoding: [0xff,0xff,0x82,0xb6]
87 // GFX10: s_cmpk_le_u32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb7]
89 s_addk_i32 s2, 0x6
90 // SICI: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7]
91 // VI9: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7]
92 // GFX10: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7]
94 s_mulk_i32 s2, 0x6
95 // SICI: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8]
96 // VI9: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7]
97 // GFX10: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8]
99 s_mulk_i32 s2, -1
100 // SICI: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8]
101 // VI9: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x82,0xb7]
102 // GFX10: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8]
104 s_mulk_i32 s2, 0xFFFF
105 // SICI: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8]
106 // VI9: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x82,0xb7]
107 // GFX10: s_mulk_i32 s2, 0xffff ; encoding: [0xff,0xff,0x02,0xb8]
109 s_cbranch_i_fork s[2:3], 0x6
110 // SICI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x82,0xb8]
111 // VI9: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x02,0xb8]
113 //===----------------------------------------------------------------------===//
114 // getreg/setreg and hwreg macro
115 //===----------------------------------------------------------------------===//
117 // raw number mapped to known HW register
118 s_getreg_b32 s2, 0x6
119 // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
120 // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
121 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
123 // HW register identifier, non-default offset/width
124 s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31)
125 // SICI: s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) ; encoding: [0x45,0xf0,0x02,0xb9]
126 // VI9: s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) ; encoding: [0x45,0xf0,0x82,0xb8]
127 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_GPR_ALLOC, 1, 31) ; encoding: [0x45,0xf0,0x02,0xb9]
129 // HW register code of unknown HW register, non-default offset/width
130 s_getreg_b32 s2, hwreg(51, 1, 31)
131 // SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
132 // VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8]
133 // GFX10: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
135 // HW register code of unknown HW register, default offset/width
136 s_getreg_b32 s2, hwreg(51)
137 // SICI: s_getreg_b32 s2, hwreg(51) ; encoding: [0x33,0xf8,0x02,0xb9]
138 // VI9: s_getreg_b32 s2, hwreg(51) ; encoding: [0x33,0xf8,0x82,0xb8]
139 // GFX10: s_getreg_b32 s2, hwreg(51) ; encoding: [0x33,0xf8,0x02,0xb9]
141 // HW register code of unknown HW register, valid symbolic name range but no name available
142 s_getreg_b32 s2, hwreg(10)
143 // SICI: s_getreg_b32 s2, hwreg(10) ; encoding: [0x0a,0xf8,0x02,0xb9]
144 // VI9: s_getreg_b32 s2, hwreg(10) ; encoding: [0x0a,0xf8,0x82,0xb8]
145 // GFX10: s_getreg_b32 s2, hwreg(10) ; encoding: [0x0a,0xf8,0x02,0xb9]
147 // HW_REG_SH_MEM_BASES valid starting from GFX9
148 s_getreg_b32 s2, hwreg(15)
149 // SICI: s_getreg_b32 s2, hwreg(15) ; encoding: [0x0f,0xf8,0x02,0xb9]
150 // VI: s_getreg_b32 s2, hwreg(15) ; encoding: [0x0f,0xf8,0x82,0xb8]
151 // GFX9: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x82,0xb8]
152 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x02,0xb9]
154 // GFX10+ registers
155 s_getreg_b32 s2, hwreg(16)
156 // SICI: s_getreg_b32 s2, hwreg(16) ; encoding: [0x10,0xf8,0x02,0xb9]
157 // VI9: s_getreg_b32 s2, hwreg(16) ; encoding: [0x10,0xf8,0x82,0xb8]
158 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_LO) ; encoding: [0x10,0xf8,0x02,0xb9]
160 s_getreg_b32 s2, hwreg(17)
161 // SICI: s_getreg_b32 s2, hwreg(17) ; encoding: [0x11,0xf8,0x02,0xb9]
162 // VI9: s_getreg_b32 s2, hwreg(17) ; encoding: [0x11,0xf8,0x82,0xb8]
163 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_HI) ; encoding: [0x11,0xf8,0x02,0xb9]
165 s_getreg_b32 s2, hwreg(18)
166 // SICI: s_getreg_b32 s2, hwreg(18) ; encoding: [0x12,0xf8,0x02,0xb9]
167 // VI9: s_getreg_b32 s2, hwreg(18) ; encoding: [0x12,0xf8,0x82,0xb8]
168 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_LO) ; encoding: [0x12,0xf8,0x02,0xb9]
170 s_getreg_b32 s2, hwreg(19)
171 // SICI: s_getreg_b32 s2, hwreg(19) ; encoding: [0x13,0xf8,0x02,0xb9]
172 // VI9: s_getreg_b32 s2, hwreg(19) ; encoding: [0x13,0xf8,0x82,0xb8]
173 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_HI) ; encoding: [0x13,0xf8,0x02,0xb9]
175 s_getreg_b32 s2, hwreg(20)
176 // SICI: s_getreg_b32 s2, hwreg(20) ; encoding: [0x14,0xf8,0x02,0xb9]
177 // VI9: s_getreg_b32 s2, hwreg(20) ; encoding: [0x14,0xf8,0x82,0xb8]
178 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO) ; encoding: [0x14,0xf8,0x02,0xb9]
180 s_getreg_b32 s2, hwreg(21)
181 // SICI: s_getreg_b32 s2, hwreg(21) ; encoding: [0x15,0xf8,0x02,0xb9]
182 // VI9: s_getreg_b32 s2, hwreg(21) ; encoding: [0x15,0xf8,0x82,0xb8]
183 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI) ; encoding: [0x15,0xf8,0x02,0xb9]
185 s_getreg_b32 s2, hwreg(22)
186 // SICI: s_getreg_b32 s2, hwreg(22) ; encoding: [0x16,0xf8,0x02,0xb9]
187 // VI9: s_getreg_b32 s2, hwreg(22) ; encoding: [0x16,0xf8,0x82,0xb8]
188 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) ; encoding: [0x16,0xf8,0x02,0xb9]
190 s_getreg_b32 s2, hwreg(23)
191 // SICI: s_getreg_b32 s2, hwreg(23) ; encoding: [0x17,0xf8,0x02,0xb9]
192 // VI9: s_getreg_b32 s2, hwreg(23) ; encoding: [0x17,0xf8,0x82,0xb8]
193 // GFX10: s_getreg_b32 s2, hwreg(23) ; encoding: [0x17,0xf8,0x02,0xb9]
195 s_getreg_b32 s2, hwreg(24)
196 // SICI: s_getreg_b32 s2, hwreg(24) ; encoding: [0x18,0xf8,0x02,0xb9]
197 // VI9: s_getreg_b32 s2, hwreg(24) ; encoding: [0x18,0xf8,0x82,0xb8]
198 // GFX10: s_getreg_b32 s2, hwreg(24) ; encoding: [0x18,0xf8,0x02,0xb9]
200 s_getreg_b32 s2, hwreg(25)
201 // SICI: s_getreg_b32 s2, hwreg(25) ; encoding: [0x19,0xf8,0x02,0xb9]
202 // VI9: s_getreg_b32 s2, hwreg(25) ; encoding: [0x19,0xf8,0x82,0xb8]
203 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER) ; encoding: [0x19,0xf8,0x02,0xb9]
205 // raw number mapped to known HW register
206 s_setreg_b32 0x6, s2
207 // SICI: s_setreg_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), s2 ; encoding: [0x06,0x00,0x82,0xb9]
208 // VI9: s_setreg_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), s2 ; encoding: [0x06,0x00,0x02,0xb9]
209 // GFX10: s_setreg_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), s2 ; encoding: [0x06,0x00,0x82,0xb9]
211 // raw number mapped to unknown HW register
212 s_setreg_b32 0x33, s2
213 // SICI: s_setreg_b32 hwreg(51, 0, 1), s2 ; encoding: [0x33,0x00,0x82,0xb9]
214 // VI9: s_setreg_b32 hwreg(51, 0, 1), s2 ; encoding: [0x33,0x00,0x02,0xb9]
215 // GFX10: s_setreg_b32 hwreg(51, 0, 1), s2 ; encoding: [0x33,0x00,0x82,0xb9]
217 // raw number mapped to known HW register, default offset/width
218 s_setreg_b32 0xf803, s2
219 // SICI: s_setreg_b32 hwreg(HW_REG_TRAPSTS), s2 ; encoding: [0x03,0xf8,0x82,0xb9]
220 // VI9: s_setreg_b32 hwreg(HW_REG_TRAPSTS), s2 ; encoding: [0x03,0xf8,0x02,0xb9]
221 // GFX10: s_setreg_b32 hwreg(HW_REG_TRAPSTS), s2 ; encoding: [0x03,0xf8,0x82,0xb9]
223 // HW register identifier, default offset/width implied
224 s_setreg_b32 hwreg(HW_REG_HW_ID), s2
225 // SICI: s_setreg_b32 hwreg(HW_REG_HW_ID), s2 ; encoding: [0x04,0xf8,0x82,0xb9]
226 // VI9: s_setreg_b32 hwreg(HW_REG_HW_ID), s2 ; encoding: [0x04,0xf8,0x02,0xb9]
227 // GFX10: s_setreg_b32 hwreg(HW_REG_HW_ID), s2 ; encoding: [0x04,0xf8,0x82,0xb9]
229 // HW register identifier, non-default offset/width
230 s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2
231 // SICI: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9]
232 // VI9: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x02,0xb9]
233 // GFX10: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9]
235 // HW register code of unknown HW register, valid symbolic name range but no name available
236 s_setreg_b32 hwreg(10), s2
237 // SICI: s_setreg_b32 hwreg(10), s2 ; encoding: [0x0a,0xf8,0x82,0xb9]
238 // VI9: s_setreg_b32 hwreg(10), s2 ; encoding: [0x0a,0xf8,0x02,0xb9]
239 // GFX10: s_setreg_b32 hwreg(10), s2 ; encoding: [0x0a,0xf8,0x82,0xb9]
241 // HW_REG_SH_MEM_BASES valid starting from GFX9
242 s_setreg_b32 hwreg(15), s2
243 // SICI: s_setreg_b32 hwreg(15), s2 ; encoding: [0x0f,0xf8,0x82,0xb9]
244 // VI: s_setreg_b32 hwreg(15), s2 ; encoding: [0x0f,0xf8,0x02,0xb9]
245 // GFX9: s_setreg_b32 hwreg(HW_REG_SH_MEM_BASES), s2 ; encoding: [0x0f,0xf8,0x02,0xb9]
246 // GFX10: s_setreg_b32 hwreg(HW_REG_SH_MEM_BASES), s2 ; encoding: [0x0f,0xf8,0x82,0xb9]
248 // GFX10+ registers
249 s_setreg_b32 hwreg(16), s2
250 // SICI: s_setreg_b32 hwreg(16), s2 ; encoding: [0x10,0xf8,0x82,0xb9]
251 // VI9: s_setreg_b32 hwreg(16), s2 ; encoding: [0x10,0xf8,0x02,0xb9]
252 // GFX10: s_setreg_b32 hwreg(HW_REG_TBA_LO), s2 ; encoding: [0x10,0xf8,0x82,0xb9]
254 s_setreg_b32 hwreg(17), s2
255 // SICI: s_setreg_b32 hwreg(17), s2 ; encoding: [0x11,0xf8,0x82,0xb9]
256 // VI9: s_setreg_b32 hwreg(17), s2 ; encoding: [0x11,0xf8,0x02,0xb9]
257 // GFX10: s_setreg_b32 hwreg(HW_REG_TBA_HI), s2 ; encoding: [0x11,0xf8,0x82,0xb9]
259 s_setreg_b32 hwreg(18), s2
260 // SICI: s_setreg_b32 hwreg(18), s2 ; encoding: [0x12,0xf8,0x82,0xb9]
261 // VI9: s_setreg_b32 hwreg(18), s2 ; encoding: [0x12,0xf8,0x02,0xb9]
262 // GFX10: s_setreg_b32 hwreg(HW_REG_TMA_LO), s2 ; encoding: [0x12,0xf8,0x82,0xb9]
264 s_setreg_b32 hwreg(19), s2
265 // SICI: s_setreg_b32 hwreg(19), s2 ; encoding: [0x13,0xf8,0x82,0xb9]
266 // VI9: s_setreg_b32 hwreg(19), s2 ; encoding: [0x13,0xf8,0x02,0xb9]
267 // GFX10: s_setreg_b32 hwreg(HW_REG_TMA_HI), s2 ; encoding: [0x13,0xf8,0x82,0xb9]
269 s_setreg_b32 hwreg(20), s2
270 // SICI: s_setreg_b32 hwreg(20), s2 ; encoding: [0x14,0xf8,0x82,0xb9]
271 // VI9: s_setreg_b32 hwreg(20), s2 ; encoding: [0x14,0xf8,0x02,0xb9]
272 // GFX10: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; encoding: [0x14,0xf8,0x82,0xb9]
274 s_setreg_b32 hwreg(21), s2
275 // SICI: s_setreg_b32 hwreg(21), s2 ; encoding: [0x15,0xf8,0x82,0xb9]
276 // VI9: s_setreg_b32 hwreg(21), s2 ; encoding: [0x15,0xf8,0x02,0xb9]
277 // GFX10: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s2 ; encoding: [0x15,0xf8,0x82,0xb9]
279 s_setreg_b32 hwreg(22), s2
280 // SICI: s_setreg_b32 hwreg(22), s2 ; encoding: [0x16,0xf8,0x82,0xb9]
281 // VI9: s_setreg_b32 hwreg(22), s2 ; encoding: [0x16,0xf8,0x02,0xb9]
282 // GFX10: s_setreg_b32 hwreg(HW_REG_XNACK_MASK), s2 ; encoding: [0x16,0xf8,0x82,0xb9]
284 s_setreg_b32 hwreg(23), s2
285 // SICI: s_setreg_b32 hwreg(23), s2 ; encoding: [0x17,0xf8,0x82,0xb9]
286 // VI9: s_setreg_b32 hwreg(23), s2 ; encoding: [0x17,0xf8,0x02,0xb9]
287 // GFX10: s_setreg_b32 hwreg(23), s2 ; encoding: [0x17,0xf8,0x82,0xb9]
289 s_setreg_b32 hwreg(24), s2
290 // SICI: s_setreg_b32 hwreg(24), s2 ; encoding: [0x18,0xf8,0x82,0xb9]
291 // VI9: s_setreg_b32 hwreg(24), s2 ; encoding: [0x18,0xf8,0x02,0xb9]
292 // GFX10: s_setreg_b32 hwreg(24), s2 ; encoding: [0x18,0xf8,0x82,0xb9]
294 s_setreg_b32 hwreg(25), s2
295 // SICI: s_setreg_b32 hwreg(25), s2 ; encoding: [0x19,0xf8,0x82,0xb9]
296 // VI9: s_setreg_b32 hwreg(25), s2 ; encoding: [0x19,0xf8,0x02,0xb9]
297 // GFX10: s_setreg_b32 hwreg(HW_REG_POPS_PACKER), s2 ; encoding: [0x19,0xf8,0x82,0xb9]
299 // HW register code, non-default offset/width
300 s_setreg_b32 hwreg(5, 1, 31), s2
301 // SICI: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9]
302 // VI9: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x02,0xb9]
303 // GFX10: s_setreg_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), s2 ; encoding: [0x45,0xf0,0x82,0xb9]
305 // raw number mapped to known HW register
306 s_setreg_imm32_b32 0x6, 0xff
307 // SICI: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), 0xff ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00]
308 // VI9: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), 0xff ; encoding: [0x06,0x00,0x00,0xba,0xff,0x00,0x00,0x00]
309 // GFX10: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC, 0, 1), 0xff ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00]
311 // HW register identifier, non-default offset/width
312 s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff
313 // SICI: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x80,0xba,0xff,0x00,0x00,0x00]
314 // VI9: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x00,0xba,0xff,0x00,0x00,0x00]
315 // GFX10: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x80,0xba,0xff,0x00,0x00,0x00]
317 //===----------------------------------------------------------------------===//
318 // expressions and hwreg macro
319 //===----------------------------------------------------------------------===//
321 hwreg=6
322 s_getreg_b32 s2, hwreg
323 // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
324 // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
325 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
328 s_getreg_b32 s2, x+1
329 // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
330 // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
331 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
334 s_getreg_b32 s2, 1+x
335 // SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
336 // VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
337 // GFX10: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
339 reg=50
340 offset=2
341 width=30
342 s_getreg_b32 s2, hwreg(reg + 1, offset - 1, width + 1)
343 // SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
344 // VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8]
345 // GFX10: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
347 s_getreg_b32 s2, hwreg(1 + reg, -1 + offset, 1 + width)
348 // SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
349 // VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8]
350 // GFX10: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
352 //===----------------------------------------------------------------------===//
353 // Instructions
354 //===----------------------------------------------------------------------===//
356 s_endpgm_ordered_ps_done
357 // GFX9: s_endpgm_ordered_ps_done ; encoding: [0x00,0x00,0x9e,0xbf]
358 // NOSICIVI: error: instruction not supported on this GPU
359 // GFX10: s_endpgm_ordered_ps_done ; encoding: [0x00,0x00,0x9e,0xbf]
361 s_call_b64 null, 12609
362 // GFX10: s_call_b64 null, 12609 ; encoding: [0x41,0x31,0x7d,0xbb]
363 // NOSICIVI: error: instruction not supported on this GPU
364 // NOGFX9: error: 'null' operand is not supported on this GPU
366 s_call_b64 s[12:13], 12609
367 // GFX9: s_call_b64 s[12:13], 12609 ; encoding: [0x41,0x31,0x8c,0xba]
368 // NOSICIVI: error: instruction not supported on this GPU
369 // GFX10: s_call_b64 s[12:13], 12609 ; encoding: [0x41,0x31,0x0c,0xbb]
371 s_call_b64 s[100:101], 12609
372 // GFX9: s_call_b64 s[100:101], 12609 ; encoding: [0x41,0x31,0xe4,0xba]
373 // NOSICIVI: error: instruction not supported on this GPU
374 // GFX10: s_call_b64 s[100:101], 12609 ; encoding: [0x41,0x31,0x64,0xbb]
376 s_call_b64 s[10:11], 49617
377 // GFX9: s_call_b64 s[10:11], 49617 ; encoding: [0xd1,0xc1,0x8a,0xba]
378 // NOSICIVI: error: instruction not supported on this GPU
379 // GFX10: s_call_b64 s[10:11], 49617 ; encoding: [0xd1,0xc1,0x0a,0xbb]
381 offset = 4
382 s_call_b64 s[0:1], offset + 4
383 // GFX9: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x80,0xba]
384 // NOSICIVI: error: instruction not supported on this GPU
385 // GFX10: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x00,0xbb]
387 offset = 4
388 s_call_b64 s[0:1], 4 + offset
389 // GFX9: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x80,0xba]
390 // NOSICIVI: error: instruction not supported on this GPU
391 // GFX10: s_call_b64 s[0:1], 8 ; encoding: [0x08,0x00,0x00,0xbb]