1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7 define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
8 ; GCN-LABEL: dyn_extract_v8f32_const_s_v:
9 ; GCN: ; %bb.0: ; %entry
10 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
12 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
13 ; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
14 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
16 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
17 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000
18 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
19 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
20 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000
21 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
22 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
23 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000
24 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
25 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
26 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000
27 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
28 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
29 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
30 ; GCN-NEXT: s_setpc_b64 s[30:31]
32 ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v:
33 ; GFX10PLUS: ; %bb.0: ; %entry
34 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
36 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
37 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
38 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
39 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
40 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
41 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
42 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
43 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
44 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
45 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
46 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
47 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
48 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
49 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
51 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
55 define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
56 ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
57 ; GPRIDX: ; %bb.0: ; %entry
58 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1
59 ; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0
60 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2
61 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0
62 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3
63 ; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0
64 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4
65 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0
66 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5
67 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0
68 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6
69 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0
70 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7
71 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0
72 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
73 ; GPRIDX-NEXT: ; return to shader part epilog
75 ; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
76 ; MOVREL: ; %bb.0: ; %entry
77 ; MOVREL-NEXT: s_mov_b32 s4, 1.0
78 ; MOVREL-NEXT: s_mov_b32 m0, s2
79 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
80 ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
81 ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
82 ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
83 ; MOVREL-NEXT: s_mov_b32 s7, 4.0
84 ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
85 ; MOVREL-NEXT: s_mov_b32 s5, 2.0
86 ; MOVREL-NEXT: s_movrels_b32 s0, s4
87 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
88 ; MOVREL-NEXT: ; return to shader part epilog
90 ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s:
91 ; GFX10PLUS: ; %bb.0: ; %entry
92 ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0
93 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
94 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000
95 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000
96 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000
97 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000
98 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0
99 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000
100 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0
101 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4
102 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
103 ; GFX10PLUS-NEXT: ; return to shader part epilog
105 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
109 define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
110 ; GCN-LABEL: dyn_extract_v8f32_s_v:
111 ; GCN: ; %bb.0: ; %entry
112 ; GCN-NEXT: v_mov_b32_e32 v1, s2
113 ; GCN-NEXT: v_mov_b32_e32 v2, s3
114 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
115 ; GCN-NEXT: v_mov_b32_e32 v3, s4
116 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
117 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
118 ; GCN-NEXT: v_mov_b32_e32 v4, s5
119 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
120 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
121 ; GCN-NEXT: v_mov_b32_e32 v5, s6
122 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
123 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
124 ; GCN-NEXT: v_mov_b32_e32 v6, s7
125 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
126 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
127 ; GCN-NEXT: v_mov_b32_e32 v7, s8
128 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
129 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
130 ; GCN-NEXT: v_mov_b32_e32 v8, s9
131 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
132 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
133 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc
134 ; GCN-NEXT: ; return to shader part epilog
136 ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v:
137 ; GFX10PLUS: ; %bb.0: ; %entry
138 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
139 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
140 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
141 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
142 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
143 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
144 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
145 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
146 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
147 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
148 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo
149 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
150 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
151 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
152 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s9, vcc_lo
153 ; GFX10PLUS-NEXT: ; return to shader part epilog
155 %ext = extractelement <8 x float> %vec, i32 %sel
159 define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
160 ; GCN-LABEL: dyn_extract_v8f32_v_v:
161 ; GCN: ; %bb.0: ; %entry
162 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
164 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
165 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
166 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
167 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
168 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
169 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
170 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
171 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
172 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
173 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
174 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
175 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
176 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
177 ; GCN-NEXT: s_setpc_b64 s[30:31]
179 ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v:
180 ; GFX10PLUS: ; %bb.0: ; %entry
181 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
183 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
184 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
185 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
186 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
187 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
188 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
189 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
190 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
191 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
192 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
193 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
194 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
195 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
196 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
198 %ext = extractelement <8 x float> %vec, i32 %sel
202 define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
203 ; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
204 ; GPRIDX: ; %bb.0: ; %entry
205 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
206 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
207 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
208 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
209 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
210 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
211 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
212 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
213 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
214 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
215 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
216 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
217 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7
218 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
219 ; GPRIDX-NEXT: ; return to shader part epilog
221 ; MOVREL-LABEL: dyn_extract_v8f32_v_s:
222 ; MOVREL: ; %bb.0: ; %entry
223 ; MOVREL-NEXT: s_mov_b32 m0, s2
224 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
225 ; MOVREL-NEXT: ; return to shader part epilog
227 ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s:
228 ; GFX10PLUS: ; %bb.0: ; %entry
229 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
230 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
231 ; GFX10PLUS-NEXT: ; return to shader part epilog
233 %ext = extractelement <8 x float> %vec, i32 %sel
237 define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
238 ; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
239 ; GPRIDX: ; %bb.0: ; %entry
240 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
241 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2
242 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
243 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0
244 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
245 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0
246 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
247 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0
248 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
249 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0
250 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
251 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0
252 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7
253 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0
254 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
255 ; GPRIDX-NEXT: ; return to shader part epilog
257 ; MOVREL-LABEL: dyn_extract_v8f32_s_s:
258 ; MOVREL: ; %bb.0: ; %entry
259 ; MOVREL-NEXT: s_mov_b32 s0, s2
260 ; MOVREL-NEXT: s_mov_b32 m0, s10
261 ; MOVREL-NEXT: s_mov_b32 s1, s3
262 ; MOVREL-NEXT: s_mov_b32 s2, s4
263 ; MOVREL-NEXT: s_mov_b32 s3, s5
264 ; MOVREL-NEXT: s_mov_b32 s4, s6
265 ; MOVREL-NEXT: s_mov_b32 s5, s7
266 ; MOVREL-NEXT: s_mov_b32 s6, s8
267 ; MOVREL-NEXT: s_mov_b32 s7, s9
268 ; MOVREL-NEXT: s_movrels_b32 s0, s0
269 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
270 ; MOVREL-NEXT: ; return to shader part epilog
272 ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s:
273 ; GFX10PLUS: ; %bb.0: ; %entry
274 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
275 ; GFX10PLUS-NEXT: s_mov_b32 m0, s10
276 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
277 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
278 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
279 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
280 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
281 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
282 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
283 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s0
284 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
285 ; GFX10PLUS-NEXT: ; return to shader part epilog
287 %ext = extractelement <8 x float> %vec, i32 %sel
291 define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
292 ; GCN-LABEL: dyn_extract_v8i64_const_s_v:
293 ; GCN: ; %bb.0: ; %entry
294 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GCN-NEXT: s_mov_b64 s[4:5], 1
296 ; GCN-NEXT: s_mov_b64 s[6:7], 2
297 ; GCN-NEXT: v_mov_b32_e32 v1, s4
298 ; GCN-NEXT: v_mov_b32_e32 v2, s5
299 ; GCN-NEXT: v_mov_b32_e32 v3, s6
300 ; GCN-NEXT: v_mov_b32_e32 v4, s7
301 ; GCN-NEXT: s_mov_b64 s[8:9], 3
302 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
303 ; GCN-NEXT: v_mov_b32_e32 v5, s8
304 ; GCN-NEXT: v_mov_b32_e32 v6, s9
305 ; GCN-NEXT: s_mov_b64 s[10:11], 4
306 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
307 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
308 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
309 ; GCN-NEXT: s_mov_b64 s[12:13], 5
310 ; GCN-NEXT: v_mov_b32_e32 v7, s10
311 ; GCN-NEXT: v_mov_b32_e32 v8, s11
312 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
313 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
314 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
315 ; GCN-NEXT: s_mov_b64 s[14:15], 6
316 ; GCN-NEXT: v_mov_b32_e32 v9, s12
317 ; GCN-NEXT: v_mov_b32_e32 v10, s13
318 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
319 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
320 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
321 ; GCN-NEXT: s_mov_b64 s[16:17], 7
322 ; GCN-NEXT: v_mov_b32_e32 v11, s14
323 ; GCN-NEXT: v_mov_b32_e32 v12, s15
324 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
325 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
326 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
327 ; GCN-NEXT: s_mov_b64 s[18:19], 8
328 ; GCN-NEXT: v_mov_b32_e32 v13, s16
329 ; GCN-NEXT: v_mov_b32_e32 v14, s17
330 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
331 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
332 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
333 ; GCN-NEXT: v_mov_b32_e32 v15, s18
334 ; GCN-NEXT: v_mov_b32_e32 v16, s19
335 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
336 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
337 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
338 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
339 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
340 ; GCN-NEXT: s_setpc_b64 s[30:31]
342 ; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
343 ; GFX10: ; %bb.0: ; %entry
344 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX10-NEXT: s_mov_b64 s[6:7], 2
346 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
347 ; GFX10-NEXT: v_mov_b32_e32 v1, s6
348 ; GFX10-NEXT: v_mov_b32_e32 v2, s7
349 ; GFX10-NEXT: s_mov_b64 s[4:5], 1
350 ; GFX10-NEXT: s_mov_b64 s[8:9], 3
351 ; GFX10-NEXT: s_mov_b64 s[10:11], 4
352 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo
353 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo
354 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
355 ; GFX10-NEXT: s_mov_b64 s[12:13], 5
356 ; GFX10-NEXT: s_mov_b64 s[14:15], 6
357 ; GFX10-NEXT: s_mov_b64 s[16:17], 7
358 ; GFX10-NEXT: s_mov_b64 s[18:19], 8
359 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
360 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
361 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
362 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
363 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
364 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
365 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
366 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
367 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
368 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo
369 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
370 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
371 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo
372 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s17, vcc_lo
373 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
374 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s18, vcc_lo
375 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s19, vcc_lo
376 ; GFX10-NEXT: s_setpc_b64 s[30:31]
378 ; GFX11-LABEL: dyn_extract_v8i64_const_s_v:
379 ; GFX11: ; %bb.0: ; %entry
380 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381 ; GFX11-NEXT: s_mov_b64 s[2:3], 2
382 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
383 ; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
384 ; GFX11-NEXT: s_mov_b64 s[0:1], 1
385 ; GFX11-NEXT: s_mov_b64 s[4:5], 3
386 ; GFX11-NEXT: s_mov_b64 s[6:7], 4
387 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
388 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
389 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
390 ; GFX11-NEXT: s_mov_b64 s[8:9], 5
391 ; GFX11-NEXT: s_mov_b64 s[10:11], 6
392 ; GFX11-NEXT: s_mov_b64 s[12:13], 7
393 ; GFX11-NEXT: s_mov_b64 s[14:15], 8
394 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
395 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
396 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
397 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
398 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
399 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
400 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
401 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
402 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
403 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
404 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
405 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
406 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
407 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
408 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
409 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
410 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
411 ; GFX11-NEXT: s_setpc_b64 s[30:31]
413 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
417 define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
418 ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
419 ; GPRIDX: ; %bb.0: ; %entry
420 ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
421 ; GPRIDX-NEXT: s_mov_b32 m0, s2
422 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
423 ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
424 ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
425 ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
426 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
427 ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
428 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
429 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
430 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
431 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
432 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
433 ; GPRIDX-NEXT: s_endpgm
435 ; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
436 ; MOVREL: ; %bb.0: ; %entry
437 ; MOVREL-NEXT: s_mov_b64 s[4:5], 1
438 ; MOVREL-NEXT: s_mov_b32 m0, s2
439 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8
440 ; MOVREL-NEXT: s_mov_b64 s[16:17], 7
441 ; MOVREL-NEXT: s_mov_b64 s[14:15], 6
442 ; MOVREL-NEXT: s_mov_b64 s[12:13], 5
443 ; MOVREL-NEXT: s_mov_b64 s[10:11], 4
444 ; MOVREL-NEXT: s_mov_b64 s[8:9], 3
445 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2
446 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
447 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
448 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
449 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
450 ; MOVREL-NEXT: s_endpgm
452 ; GFX10-LABEL: dyn_extract_v8i64_const_s_s:
453 ; GFX10: ; %bb.0: ; %entry
454 ; GFX10-NEXT: s_mov_b64 s[4:5], 1
455 ; GFX10-NEXT: s_mov_b32 m0, s2
456 ; GFX10-NEXT: s_mov_b64 s[18:19], 8
457 ; GFX10-NEXT: s_mov_b64 s[16:17], 7
458 ; GFX10-NEXT: s_mov_b64 s[14:15], 6
459 ; GFX10-NEXT: s_mov_b64 s[12:13], 5
460 ; GFX10-NEXT: s_mov_b64 s[10:11], 4
461 ; GFX10-NEXT: s_mov_b64 s[8:9], 3
462 ; GFX10-NEXT: s_mov_b64 s[6:7], 2
463 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5]
464 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
465 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
466 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
467 ; GFX10-NEXT: s_endpgm
469 ; GFX11-LABEL: dyn_extract_v8i64_const_s_s:
470 ; GFX11: ; %bb.0: ; %entry
471 ; GFX11-NEXT: s_mov_b64 s[4:5], 1
472 ; GFX11-NEXT: s_mov_b32 m0, s2
473 ; GFX11-NEXT: s_mov_b64 s[18:19], 8
474 ; GFX11-NEXT: s_mov_b64 s[16:17], 7
475 ; GFX11-NEXT: s_mov_b64 s[14:15], 6
476 ; GFX11-NEXT: s_mov_b64 s[12:13], 5
477 ; GFX11-NEXT: s_mov_b64 s[10:11], 4
478 ; GFX11-NEXT: s_mov_b64 s[8:9], 3
479 ; GFX11-NEXT: s_mov_b64 s[6:7], 2
480 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[4:5]
481 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
482 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
483 ; GFX11-NEXT: s_nop 0
484 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
485 ; GFX11-NEXT: s_endpgm
487 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
488 store i64 %ext, ptr addrspace(1) undef
492 define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
493 ; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
494 ; GPRIDX: ; %bb.0: ; %entry
495 ; GPRIDX-NEXT: s_mov_b32 s0, s2
496 ; GPRIDX-NEXT: s_mov_b32 s1, s3
497 ; GPRIDX-NEXT: s_mov_b32 s2, s4
498 ; GPRIDX-NEXT: s_mov_b32 s3, s5
499 ; GPRIDX-NEXT: s_mov_b32 s4, s6
500 ; GPRIDX-NEXT: s_mov_b32 s5, s7
501 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s0
502 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s1
503 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s2
504 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s3
505 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
506 ; GPRIDX-NEXT: s_mov_b32 s6, s8
507 ; GPRIDX-NEXT: s_mov_b32 s7, s9
508 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s4
509 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s5
510 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
511 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
512 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
513 ; GPRIDX-NEXT: s_mov_b32 s8, s10
514 ; GPRIDX-NEXT: s_mov_b32 s9, s11
515 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s6
516 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s7
517 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
518 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
519 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
520 ; GPRIDX-NEXT: s_mov_b32 s10, s12
521 ; GPRIDX-NEXT: s_mov_b32 s11, s13
522 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s8
523 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s9
524 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
525 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
526 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
527 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s10
528 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s11
529 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
530 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
531 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
532 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s14
533 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s15
534 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
535 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
536 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
537 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s16
538 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s17
539 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
540 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
541 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
542 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
543 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
544 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
545 ; GPRIDX-NEXT: s_endpgm
547 ; MOVREL-LABEL: dyn_extract_v8i64_s_v:
548 ; MOVREL: ; %bb.0: ; %entry
549 ; MOVREL-NEXT: s_mov_b32 s0, s2
550 ; MOVREL-NEXT: s_mov_b32 s1, s3
551 ; MOVREL-NEXT: s_mov_b32 s2, s4
552 ; MOVREL-NEXT: s_mov_b32 s3, s5
553 ; MOVREL-NEXT: s_mov_b32 s4, s6
554 ; MOVREL-NEXT: s_mov_b32 s5, s7
555 ; MOVREL-NEXT: v_mov_b32_e32 v1, s0
556 ; MOVREL-NEXT: v_mov_b32_e32 v2, s1
557 ; MOVREL-NEXT: v_mov_b32_e32 v3, s2
558 ; MOVREL-NEXT: v_mov_b32_e32 v4, s3
559 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
560 ; MOVREL-NEXT: s_mov_b32 s6, s8
561 ; MOVREL-NEXT: s_mov_b32 s7, s9
562 ; MOVREL-NEXT: v_mov_b32_e32 v5, s4
563 ; MOVREL-NEXT: v_mov_b32_e32 v6, s5
564 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
565 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
566 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
567 ; MOVREL-NEXT: s_mov_b32 s8, s10
568 ; MOVREL-NEXT: s_mov_b32 s9, s11
569 ; MOVREL-NEXT: v_mov_b32_e32 v7, s6
570 ; MOVREL-NEXT: v_mov_b32_e32 v8, s7
571 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
572 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
573 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
574 ; MOVREL-NEXT: s_mov_b32 s10, s12
575 ; MOVREL-NEXT: s_mov_b32 s11, s13
576 ; MOVREL-NEXT: v_mov_b32_e32 v9, s8
577 ; MOVREL-NEXT: v_mov_b32_e32 v10, s9
578 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
579 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
580 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
581 ; MOVREL-NEXT: v_mov_b32_e32 v11, s10
582 ; MOVREL-NEXT: v_mov_b32_e32 v12, s11
583 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
584 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
585 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
586 ; MOVREL-NEXT: v_mov_b32_e32 v13, s14
587 ; MOVREL-NEXT: v_mov_b32_e32 v14, s15
588 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
589 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
590 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
591 ; MOVREL-NEXT: v_mov_b32_e32 v15, s16
592 ; MOVREL-NEXT: v_mov_b32_e32 v16, s17
593 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
594 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
595 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
596 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
597 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
598 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
599 ; MOVREL-NEXT: s_endpgm
601 ; GFX10-LABEL: dyn_extract_v8i64_s_v:
602 ; GFX10: ; %bb.0: ; %entry
603 ; GFX10-NEXT: s_mov_b32 s0, s2
604 ; GFX10-NEXT: s_mov_b32 s2, s4
605 ; GFX10-NEXT: s_mov_b32 s19, s5
606 ; GFX10-NEXT: v_mov_b32_e32 v1, s2
607 ; GFX10-NEXT: v_mov_b32_e32 v2, s19
608 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
609 ; GFX10-NEXT: s_mov_b32 s1, s3
610 ; GFX10-NEXT: s_mov_b32 s4, s6
611 ; GFX10-NEXT: s_mov_b32 s5, s7
612 ; GFX10-NEXT: s_mov_b32 s6, s8
613 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
614 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
615 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
616 ; GFX10-NEXT: s_mov_b32 s7, s9
617 ; GFX10-NEXT: s_mov_b32 s8, s10
618 ; GFX10-NEXT: s_mov_b32 s9, s11
619 ; GFX10-NEXT: s_mov_b32 s10, s12
620 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
621 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
622 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
623 ; GFX10-NEXT: s_mov_b32 s11, s13
624 ; GFX10-NEXT: s_mov_b32 s12, s14
625 ; GFX10-NEXT: s_mov_b32 s13, s15
626 ; GFX10-NEXT: s_mov_b32 s14, s16
627 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
628 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
629 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
630 ; GFX10-NEXT: s_mov_b32 s15, s17
631 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
632 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
633 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
634 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
635 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
636 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
637 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
638 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
639 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
640 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
641 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
642 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
643 ; GFX10-NEXT: s_endpgm
645 ; GFX11-LABEL: dyn_extract_v8i64_s_v:
646 ; GFX11: ; %bb.0: ; %entry
647 ; GFX11-NEXT: s_mov_b32 s0, s2
648 ; GFX11-NEXT: s_mov_b32 s2, s4
649 ; GFX11-NEXT: s_mov_b32 s19, s5
650 ; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19
651 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
652 ; GFX11-NEXT: s_mov_b32 s1, s3
653 ; GFX11-NEXT: s_mov_b32 s4, s6
654 ; GFX11-NEXT: s_mov_b32 s5, s7
655 ; GFX11-NEXT: s_mov_b32 s6, s8
656 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
657 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
658 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
659 ; GFX11-NEXT: s_mov_b32 s7, s9
660 ; GFX11-NEXT: s_mov_b32 s8, s10
661 ; GFX11-NEXT: s_mov_b32 s9, s11
662 ; GFX11-NEXT: s_mov_b32 s10, s12
663 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
664 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
665 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
666 ; GFX11-NEXT: s_mov_b32 s11, s13
667 ; GFX11-NEXT: s_mov_b32 s12, s14
668 ; GFX11-NEXT: s_mov_b32 s13, s15
669 ; GFX11-NEXT: s_mov_b32 s14, s16
670 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
671 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
672 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
673 ; GFX11-NEXT: s_mov_b32 s15, s17
674 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
675 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
676 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
677 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
678 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
679 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
680 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
681 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
682 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
683 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
684 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
685 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
686 ; GFX11-NEXT: s_nop 0
687 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
688 ; GFX11-NEXT: s_endpgm
690 %ext = extractelement <8 x i64> %vec, i32 %sel
691 store i64 %ext, ptr addrspace(1) undef
695 define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
696 ; GCN-LABEL: dyn_extract_v8i64_v_v:
697 ; GCN: ; %bb.0: ; %entry
698 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
700 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
701 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
702 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
703 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
704 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
705 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
706 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
707 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
708 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
709 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
710 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
711 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
712 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
713 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
714 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
715 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
716 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
717 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
718 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
719 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
720 ; GCN-NEXT: s_setpc_b64 s[30:31]
722 ; GFX10-LABEL: dyn_extract_v8i64_v_v:
723 ; GFX10: ; %bb.0: ; %entry
724 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
726 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
727 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
728 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
729 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
730 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
731 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
732 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
733 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
734 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
735 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
736 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
737 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
738 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
739 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
740 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
741 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
742 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
743 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
744 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
745 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
746 ; GFX10-NEXT: s_setpc_b64 s[30:31]
748 ; GFX11-LABEL: dyn_extract_v8i64_v_v:
749 ; GFX11: ; %bb.0: ; %entry
750 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
751 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
752 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
753 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
754 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
755 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
756 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
757 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
758 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
759 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
760 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
761 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
762 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
763 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
764 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
765 ; GFX11-NEXT: s_setpc_b64 s[30:31]
767 %ext = extractelement <8 x i64> %vec, i32 %sel
771 define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
772 ; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
773 ; GPRIDX: ; %bb.0: ; %entry
774 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
775 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
776 ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
777 ; GPRIDX-NEXT: v_mov_b32_e32 v17, v1
778 ; GPRIDX-NEXT: s_set_gpr_idx_off
779 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
780 ; GPRIDX-NEXT: s_endpgm
782 ; MOVREL-LABEL: dyn_extract_v8i64_v_s:
783 ; MOVREL: ; %bb.0: ; %entry
784 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
785 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
786 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v1
787 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
788 ; MOVREL-NEXT: s_endpgm
790 ; GFX10-LABEL: dyn_extract_v8i64_v_s:
791 ; GFX10: ; %bb.0: ; %entry
792 ; GFX10-NEXT: s_lshl_b32 m0, s2, 1
793 ; GFX10-NEXT: v_movrels_b32_e32 v16, v0
794 ; GFX10-NEXT: v_movrels_b32_e32 v17, v1
795 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
796 ; GFX10-NEXT: s_endpgm
798 ; GFX11-LABEL: dyn_extract_v8i64_v_s:
799 ; GFX11: ; %bb.0: ; %entry
800 ; GFX11-NEXT: s_lshl_b32 m0, s2, 1
801 ; GFX11-NEXT: v_movrels_b32_e32 v16, v0
802 ; GFX11-NEXT: v_movrels_b32_e32 v17, v1
803 ; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off
804 ; GFX11-NEXT: s_nop 0
805 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
806 ; GFX11-NEXT: s_endpgm
808 %ext = extractelement <8 x i64> %vec, i32 %sel
809 store i64 %ext, ptr addrspace(1) undef
813 define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
814 ; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
815 ; GPRIDX: ; %bb.0: ; %entry
816 ; GPRIDX-NEXT: s_mov_b32 s0, s2
817 ; GPRIDX-NEXT: s_mov_b32 s1, s3
818 ; GPRIDX-NEXT: s_mov_b32 m0, s18
819 ; GPRIDX-NEXT: s_mov_b32 s2, s4
820 ; GPRIDX-NEXT: s_mov_b32 s3, s5
821 ; GPRIDX-NEXT: s_mov_b32 s4, s6
822 ; GPRIDX-NEXT: s_mov_b32 s5, s7
823 ; GPRIDX-NEXT: s_mov_b32 s6, s8
824 ; GPRIDX-NEXT: s_mov_b32 s7, s9
825 ; GPRIDX-NEXT: s_mov_b32 s8, s10
826 ; GPRIDX-NEXT: s_mov_b32 s9, s11
827 ; GPRIDX-NEXT: s_mov_b32 s10, s12
828 ; GPRIDX-NEXT: s_mov_b32 s11, s13
829 ; GPRIDX-NEXT: s_mov_b32 s12, s14
830 ; GPRIDX-NEXT: s_mov_b32 s13, s15
831 ; GPRIDX-NEXT: s_mov_b32 s14, s16
832 ; GPRIDX-NEXT: s_mov_b32 s15, s17
833 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
834 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
835 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
836 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
837 ; GPRIDX-NEXT: s_endpgm
839 ; MOVREL-LABEL: dyn_extract_v8i64_s_s:
840 ; MOVREL: ; %bb.0: ; %entry
841 ; MOVREL-NEXT: s_mov_b32 s0, s2
842 ; MOVREL-NEXT: s_mov_b32 s1, s3
843 ; MOVREL-NEXT: s_mov_b32 m0, s18
844 ; MOVREL-NEXT: s_mov_b32 s2, s4
845 ; MOVREL-NEXT: s_mov_b32 s3, s5
846 ; MOVREL-NEXT: s_mov_b32 s4, s6
847 ; MOVREL-NEXT: s_mov_b32 s5, s7
848 ; MOVREL-NEXT: s_mov_b32 s6, s8
849 ; MOVREL-NEXT: s_mov_b32 s7, s9
850 ; MOVREL-NEXT: s_mov_b32 s8, s10
851 ; MOVREL-NEXT: s_mov_b32 s9, s11
852 ; MOVREL-NEXT: s_mov_b32 s10, s12
853 ; MOVREL-NEXT: s_mov_b32 s11, s13
854 ; MOVREL-NEXT: s_mov_b32 s12, s14
855 ; MOVREL-NEXT: s_mov_b32 s13, s15
856 ; MOVREL-NEXT: s_mov_b32 s14, s16
857 ; MOVREL-NEXT: s_mov_b32 s15, s17
858 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
859 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
860 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
861 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
862 ; MOVREL-NEXT: s_endpgm
864 ; GFX10-LABEL: dyn_extract_v8i64_s_s:
865 ; GFX10: ; %bb.0: ; %entry
866 ; GFX10-NEXT: s_mov_b32 s0, s2
867 ; GFX10-NEXT: s_mov_b32 s1, s3
868 ; GFX10-NEXT: s_mov_b32 m0, s18
869 ; GFX10-NEXT: s_mov_b32 s2, s4
870 ; GFX10-NEXT: s_mov_b32 s3, s5
871 ; GFX10-NEXT: s_mov_b32 s4, s6
872 ; GFX10-NEXT: s_mov_b32 s5, s7
873 ; GFX10-NEXT: s_mov_b32 s6, s8
874 ; GFX10-NEXT: s_mov_b32 s7, s9
875 ; GFX10-NEXT: s_mov_b32 s8, s10
876 ; GFX10-NEXT: s_mov_b32 s9, s11
877 ; GFX10-NEXT: s_mov_b32 s10, s12
878 ; GFX10-NEXT: s_mov_b32 s11, s13
879 ; GFX10-NEXT: s_mov_b32 s12, s14
880 ; GFX10-NEXT: s_mov_b32 s13, s15
881 ; GFX10-NEXT: s_mov_b32 s14, s16
882 ; GFX10-NEXT: s_mov_b32 s15, s17
883 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1]
884 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
885 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
886 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
887 ; GFX10-NEXT: s_endpgm
889 ; GFX11-LABEL: dyn_extract_v8i64_s_s:
890 ; GFX11: ; %bb.0: ; %entry
891 ; GFX11-NEXT: s_mov_b32 s0, s2
892 ; GFX11-NEXT: s_mov_b32 s1, s3
893 ; GFX11-NEXT: s_mov_b32 m0, s18
894 ; GFX11-NEXT: s_mov_b32 s2, s4
895 ; GFX11-NEXT: s_mov_b32 s3, s5
896 ; GFX11-NEXT: s_mov_b32 s4, s6
897 ; GFX11-NEXT: s_mov_b32 s5, s7
898 ; GFX11-NEXT: s_mov_b32 s6, s8
899 ; GFX11-NEXT: s_mov_b32 s7, s9
900 ; GFX11-NEXT: s_mov_b32 s8, s10
901 ; GFX11-NEXT: s_mov_b32 s9, s11
902 ; GFX11-NEXT: s_mov_b32 s10, s12
903 ; GFX11-NEXT: s_mov_b32 s11, s13
904 ; GFX11-NEXT: s_mov_b32 s12, s14
905 ; GFX11-NEXT: s_mov_b32 s13, s15
906 ; GFX11-NEXT: s_mov_b32 s14, s16
907 ; GFX11-NEXT: s_mov_b32 s15, s17
908 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1]
909 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
910 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
911 ; GFX11-NEXT: s_nop 0
912 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
913 ; GFX11-NEXT: s_endpgm
915 %ext = extractelement <8 x i64> %vec, i32 %sel
916 store i64 %ext, ptr addrspace(1) undef
920 define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
921 ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
922 ; GPRIDX: ; %bb.0: ; %entry
923 ; GPRIDX-NEXT: s_add_i32 s10, s10, 3
924 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
925 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2
926 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
927 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0
928 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
929 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0
930 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
931 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0
932 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
933 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0
934 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
935 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0
936 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7
937 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0
938 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
939 ; GPRIDX-NEXT: ; return to shader part epilog
941 ; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
942 ; MOVREL: ; %bb.0: ; %entry
943 ; MOVREL-NEXT: s_mov_b32 s0, s2
944 ; MOVREL-NEXT: s_mov_b32 s1, s3
945 ; MOVREL-NEXT: s_mov_b32 s3, s5
946 ; MOVREL-NEXT: s_mov_b32 m0, s10
947 ; MOVREL-NEXT: s_mov_b32 s2, s4
948 ; MOVREL-NEXT: s_mov_b32 s4, s6
949 ; MOVREL-NEXT: s_mov_b32 s5, s7
950 ; MOVREL-NEXT: s_mov_b32 s6, s8
951 ; MOVREL-NEXT: s_mov_b32 s7, s9
952 ; MOVREL-NEXT: s_movrels_b32 s0, s3
953 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
954 ; MOVREL-NEXT: ; return to shader part epilog
956 ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3:
957 ; GFX10PLUS: ; %bb.0: ; %entry
958 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
959 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
960 ; GFX10PLUS-NEXT: s_mov_b32 m0, s10
961 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
962 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
963 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
964 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
965 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
966 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
967 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s3
968 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
969 ; GFX10PLUS-NEXT: ; return to shader part epilog
971 %add = add i32 %sel, 3
972 %ext = extractelement <8 x float> %vec, i32 %add
976 define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
977 ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
978 ; GPRIDX: ; %bb.0: ; %entry
979 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
980 ; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8
981 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
982 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
983 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
984 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
985 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
986 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
987 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
988 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
989 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
990 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
991 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
992 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
993 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
994 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
995 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
997 ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
998 ; MOVREL: ; %bb.0: ; %entry
999 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000 ; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8
1001 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
1002 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1003 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
1004 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1005 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
1006 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1007 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
1008 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1009 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
1010 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1011 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
1012 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1013 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
1014 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1015 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
1017 ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3:
1018 ; GFX10PLUS: ; %bb.0: ; %entry
1019 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020 ; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8
1021 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
1022 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1023 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
1024 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1025 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
1026 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1027 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
1028 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1029 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
1030 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1031 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
1032 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1033 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
1034 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1035 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1037 %add = add i32 %sel, 3
1038 %ext = extractelement <8 x float> %vec, i32 %add
1042 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
1043 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset1:
1044 ; GCN: ; %bb.0: ; %entry
1045 ; GCN-NEXT: s_mov_b32 s0, s2
1046 ; GCN-NEXT: s_mov_b32 s1, s3
1047 ; GCN-NEXT: s_mov_b32 s2, s4
1048 ; GCN-NEXT: s_mov_b32 s3, s5
1049 ; GCN-NEXT: s_mov_b32 m0, s18
1050 ; GCN-NEXT: s_mov_b32 s4, s6
1051 ; GCN-NEXT: s_mov_b32 s5, s7
1052 ; GCN-NEXT: s_mov_b32 s6, s8
1053 ; GCN-NEXT: s_mov_b32 s7, s9
1054 ; GCN-NEXT: s_mov_b32 s8, s10
1055 ; GCN-NEXT: s_mov_b32 s9, s11
1056 ; GCN-NEXT: s_mov_b32 s10, s12
1057 ; GCN-NEXT: s_mov_b32 s11, s13
1058 ; GCN-NEXT: s_mov_b32 s12, s14
1059 ; GCN-NEXT: s_mov_b32 s13, s15
1060 ; GCN-NEXT: s_mov_b32 s14, s16
1061 ; GCN-NEXT: s_mov_b32 s15, s17
1062 ; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3]
1063 ; GCN-NEXT: ; return to shader part epilog
1065 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1:
1066 ; GFX10PLUS: ; %bb.0: ; %entry
1067 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1068 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1069 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1070 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1071 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1072 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1073 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1074 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1075 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1076 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1077 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1078 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1079 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1080 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1081 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1082 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1083 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1084 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[2:3]
1085 ; GFX10PLUS-NEXT: ; return to shader part epilog
1087 %add = add i32 %sel, 1
1088 %ext = extractelement <8 x double> %vec, i32 %add
1092 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
1093 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset2:
1094 ; GCN: ; %bb.0: ; %entry
1095 ; GCN-NEXT: s_mov_b32 s0, s2
1096 ; GCN-NEXT: s_mov_b32 s1, s3
1097 ; GCN-NEXT: s_mov_b32 s2, s4
1098 ; GCN-NEXT: s_mov_b32 s3, s5
1099 ; GCN-NEXT: s_mov_b32 s4, s6
1100 ; GCN-NEXT: s_mov_b32 s5, s7
1101 ; GCN-NEXT: s_mov_b32 m0, s18
1102 ; GCN-NEXT: s_mov_b32 s6, s8
1103 ; GCN-NEXT: s_mov_b32 s7, s9
1104 ; GCN-NEXT: s_mov_b32 s8, s10
1105 ; GCN-NEXT: s_mov_b32 s9, s11
1106 ; GCN-NEXT: s_mov_b32 s10, s12
1107 ; GCN-NEXT: s_mov_b32 s11, s13
1108 ; GCN-NEXT: s_mov_b32 s12, s14
1109 ; GCN-NEXT: s_mov_b32 s13, s15
1110 ; GCN-NEXT: s_mov_b32 s14, s16
1111 ; GCN-NEXT: s_mov_b32 s15, s17
1112 ; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5]
1113 ; GCN-NEXT: ; return to shader part epilog
1115 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2:
1116 ; GFX10PLUS: ; %bb.0: ; %entry
1117 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1118 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1119 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1120 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1121 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1122 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1123 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1124 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1125 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1126 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1127 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1128 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1129 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1130 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1131 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1132 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1133 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1134 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[4:5]
1135 ; GFX10PLUS-NEXT: ; return to shader part epilog
1137 %add = add i32 %sel, 2
1138 %ext = extractelement <8 x double> %vec, i32 %add
1142 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
1143 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset3:
1144 ; GCN: ; %bb.0: ; %entry
1145 ; GCN-NEXT: s_mov_b32 s0, s2
1146 ; GCN-NEXT: s_mov_b32 s1, s3
1147 ; GCN-NEXT: s_mov_b32 s2, s4
1148 ; GCN-NEXT: s_mov_b32 s3, s5
1149 ; GCN-NEXT: s_mov_b32 s4, s6
1150 ; GCN-NEXT: s_mov_b32 s5, s7
1151 ; GCN-NEXT: s_mov_b32 s6, s8
1152 ; GCN-NEXT: s_mov_b32 s7, s9
1153 ; GCN-NEXT: s_mov_b32 m0, s18
1154 ; GCN-NEXT: s_mov_b32 s8, s10
1155 ; GCN-NEXT: s_mov_b32 s9, s11
1156 ; GCN-NEXT: s_mov_b32 s10, s12
1157 ; GCN-NEXT: s_mov_b32 s11, s13
1158 ; GCN-NEXT: s_mov_b32 s12, s14
1159 ; GCN-NEXT: s_mov_b32 s13, s15
1160 ; GCN-NEXT: s_mov_b32 s14, s16
1161 ; GCN-NEXT: s_mov_b32 s15, s17
1162 ; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7]
1163 ; GCN-NEXT: ; return to shader part epilog
1165 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3:
1166 ; GFX10PLUS: ; %bb.0: ; %entry
1167 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1168 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1169 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1170 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1171 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1172 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1173 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1174 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1175 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1176 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1177 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1178 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1179 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1180 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1181 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1182 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1183 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1184 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[6:7]
1185 ; GFX10PLUS-NEXT: ; return to shader part epilog
1187 %add = add i32 %sel, 3
1188 %ext = extractelement <8 x double> %vec, i32 %add
1192 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
1193 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset4:
1194 ; GCN: ; %bb.0: ; %entry
1195 ; GCN-NEXT: s_mov_b32 s0, s2
1196 ; GCN-NEXT: s_mov_b32 s1, s3
1197 ; GCN-NEXT: s_mov_b32 s2, s4
1198 ; GCN-NEXT: s_mov_b32 s3, s5
1199 ; GCN-NEXT: s_mov_b32 s4, s6
1200 ; GCN-NEXT: s_mov_b32 s5, s7
1201 ; GCN-NEXT: s_mov_b32 s6, s8
1202 ; GCN-NEXT: s_mov_b32 s7, s9
1203 ; GCN-NEXT: s_mov_b32 s8, s10
1204 ; GCN-NEXT: s_mov_b32 s9, s11
1205 ; GCN-NEXT: s_mov_b32 m0, s18
1206 ; GCN-NEXT: s_mov_b32 s10, s12
1207 ; GCN-NEXT: s_mov_b32 s11, s13
1208 ; GCN-NEXT: s_mov_b32 s12, s14
1209 ; GCN-NEXT: s_mov_b32 s13, s15
1210 ; GCN-NEXT: s_mov_b32 s14, s16
1211 ; GCN-NEXT: s_mov_b32 s15, s17
1212 ; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9]
1213 ; GCN-NEXT: ; return to shader part epilog
1215 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4:
1216 ; GFX10PLUS: ; %bb.0: ; %entry
1217 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1218 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1219 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1220 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1221 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1222 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1223 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1224 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1225 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1226 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1227 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1228 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1229 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1230 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1231 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1232 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1233 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1234 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[8:9]
1235 ; GFX10PLUS-NEXT: ; return to shader part epilog
1237 %add = add i32 %sel, 4
1238 %ext = extractelement <8 x double> %vec, i32 %add
1242 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
1243 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset5:
1244 ; GCN: ; %bb.0: ; %entry
1245 ; GCN-NEXT: s_mov_b32 s0, s2
1246 ; GCN-NEXT: s_mov_b32 s1, s3
1247 ; GCN-NEXT: s_mov_b32 s2, s4
1248 ; GCN-NEXT: s_mov_b32 s3, s5
1249 ; GCN-NEXT: s_mov_b32 s4, s6
1250 ; GCN-NEXT: s_mov_b32 s5, s7
1251 ; GCN-NEXT: s_mov_b32 s6, s8
1252 ; GCN-NEXT: s_mov_b32 s7, s9
1253 ; GCN-NEXT: s_mov_b32 s8, s10
1254 ; GCN-NEXT: s_mov_b32 s9, s11
1255 ; GCN-NEXT: s_mov_b32 s10, s12
1256 ; GCN-NEXT: s_mov_b32 s11, s13
1257 ; GCN-NEXT: s_mov_b32 m0, s18
1258 ; GCN-NEXT: s_mov_b32 s12, s14
1259 ; GCN-NEXT: s_mov_b32 s13, s15
1260 ; GCN-NEXT: s_mov_b32 s14, s16
1261 ; GCN-NEXT: s_mov_b32 s15, s17
1262 ; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11]
1263 ; GCN-NEXT: ; return to shader part epilog
1265 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5:
1266 ; GFX10PLUS: ; %bb.0: ; %entry
1267 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1268 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1269 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1270 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1271 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1272 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1273 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1274 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1275 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1276 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1277 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1278 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1279 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1280 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1281 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1282 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1283 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1284 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[10:11]
1285 ; GFX10PLUS-NEXT: ; return to shader part epilog
1287 %add = add i32 %sel, 5
1288 %ext = extractelement <8 x double> %vec, i32 %add
1292 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
1293 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset6:
1294 ; GCN: ; %bb.0: ; %entry
1295 ; GCN-NEXT: s_mov_b32 s0, s2
1296 ; GCN-NEXT: s_mov_b32 s1, s3
1297 ; GCN-NEXT: s_mov_b32 s2, s4
1298 ; GCN-NEXT: s_mov_b32 s3, s5
1299 ; GCN-NEXT: s_mov_b32 s4, s6
1300 ; GCN-NEXT: s_mov_b32 s5, s7
1301 ; GCN-NEXT: s_mov_b32 s6, s8
1302 ; GCN-NEXT: s_mov_b32 s7, s9
1303 ; GCN-NEXT: s_mov_b32 s8, s10
1304 ; GCN-NEXT: s_mov_b32 s9, s11
1305 ; GCN-NEXT: s_mov_b32 s10, s12
1306 ; GCN-NEXT: s_mov_b32 s11, s13
1307 ; GCN-NEXT: s_mov_b32 s12, s14
1308 ; GCN-NEXT: s_mov_b32 s13, s15
1309 ; GCN-NEXT: s_mov_b32 m0, s18
1310 ; GCN-NEXT: s_mov_b32 s14, s16
1311 ; GCN-NEXT: s_mov_b32 s15, s17
1312 ; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13]
1313 ; GCN-NEXT: ; return to shader part epilog
1315 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6:
1316 ; GFX10PLUS: ; %bb.0: ; %entry
1317 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1318 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1319 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1320 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1321 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1322 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1323 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1324 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1325 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1326 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1327 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1328 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1329 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1330 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1331 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1332 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1333 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1334 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[12:13]
1335 ; GFX10PLUS-NEXT: ; return to shader part epilog
1337 %add = add i32 %sel, 6
1338 %ext = extractelement <8 x double> %vec, i32 %add
1342 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
1343 ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
1344 ; GPRIDX: ; %bb.0: ; %entry
1345 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1346 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1347 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1348 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1349 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1350 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1351 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1352 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1353 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1354 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1355 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1356 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1357 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1358 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1359 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1360 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1361 ; GPRIDX-NEXT: s_mov_b32 m0, s18
1362 ; GPRIDX-NEXT: s_nop 0
1363 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15]
1364 ; GPRIDX-NEXT: ; return to shader part epilog
1366 ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
1367 ; MOVREL: ; %bb.0: ; %entry
1368 ; MOVREL-NEXT: s_mov_b32 s0, s2
1369 ; MOVREL-NEXT: s_mov_b32 s1, s3
1370 ; MOVREL-NEXT: s_mov_b32 s2, s4
1371 ; MOVREL-NEXT: s_mov_b32 s3, s5
1372 ; MOVREL-NEXT: s_mov_b32 s4, s6
1373 ; MOVREL-NEXT: s_mov_b32 s5, s7
1374 ; MOVREL-NEXT: s_mov_b32 s6, s8
1375 ; MOVREL-NEXT: s_mov_b32 s7, s9
1376 ; MOVREL-NEXT: s_mov_b32 s8, s10
1377 ; MOVREL-NEXT: s_mov_b32 s9, s11
1378 ; MOVREL-NEXT: s_mov_b32 s10, s12
1379 ; MOVREL-NEXT: s_mov_b32 s11, s13
1380 ; MOVREL-NEXT: s_mov_b32 s12, s14
1381 ; MOVREL-NEXT: s_mov_b32 s13, s15
1382 ; MOVREL-NEXT: s_mov_b32 s14, s16
1383 ; MOVREL-NEXT: s_mov_b32 s15, s17
1384 ; MOVREL-NEXT: s_mov_b32 m0, s18
1385 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15]
1386 ; MOVREL-NEXT: ; return to shader part epilog
1388 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7:
1389 ; GFX10PLUS: ; %bb.0: ; %entry
1390 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1391 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1392 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1393 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1394 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1395 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1396 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1397 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1398 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1399 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1400 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1401 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1402 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1403 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1404 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1405 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1406 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1407 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[14:15]
1408 ; GFX10PLUS-NEXT: ; return to shader part epilog
1410 %add = add i32 %sel, 7
1411 %ext = extractelement <8 x double> %vec, i32 %add
1415 define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
1416 ; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1417 ; GCN: ; %bb.0: ; %entry
1418 ; GCN-NEXT: s_mov_b32 s0, s2
1419 ; GCN-NEXT: s_mov_b32 s1, s3
1420 ; GCN-NEXT: s_add_i32 m0, s18, -1
1421 ; GCN-NEXT: s_mov_b32 s2, s4
1422 ; GCN-NEXT: s_mov_b32 s3, s5
1423 ; GCN-NEXT: s_mov_b32 s4, s6
1424 ; GCN-NEXT: s_mov_b32 s5, s7
1425 ; GCN-NEXT: s_mov_b32 s6, s8
1426 ; GCN-NEXT: s_mov_b32 s7, s9
1427 ; GCN-NEXT: s_mov_b32 s8, s10
1428 ; GCN-NEXT: s_mov_b32 s9, s11
1429 ; GCN-NEXT: s_mov_b32 s10, s12
1430 ; GCN-NEXT: s_mov_b32 s11, s13
1431 ; GCN-NEXT: s_mov_b32 s12, s14
1432 ; GCN-NEXT: s_mov_b32 s13, s15
1433 ; GCN-NEXT: s_mov_b32 s14, s16
1434 ; GCN-NEXT: s_mov_b32 s15, s17
1435 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1]
1436 ; GCN-NEXT: ; return to shader part epilog
1438 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1439 ; GFX10PLUS: ; %bb.0: ; %entry
1440 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1441 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1442 ; GFX10PLUS-NEXT: s_add_i32 m0, s18, -1
1443 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1444 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1445 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1446 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1447 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1448 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1449 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1450 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1451 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1452 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1453 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1454 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1455 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1456 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1457 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1]
1458 ; GFX10PLUS-NEXT: ; return to shader part epilog
1460 %add = add i32 %sel, -1
1461 %ext = extractelement <8 x double> %vec, i32 %add
1465 define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
1466 ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
1467 ; GPRIDX: ; %bb.0: ; %entry
1468 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1469 ; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16
1470 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1471 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1472 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1473 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1474 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1475 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1476 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1477 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1478 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1479 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1480 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1481 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
1482 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1483 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
1484 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
1485 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1486 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
1487 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
1488 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1489 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
1490 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
1491 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
1493 ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
1494 ; MOVREL: ; %bb.0: ; %entry
1495 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1496 ; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16
1497 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1498 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1499 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1500 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1501 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1502 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1503 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1504 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1505 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1506 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1507 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1508 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
1509 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1510 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
1511 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
1512 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1513 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
1514 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
1515 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1516 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
1517 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
1518 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
1520 ; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
1521 ; GFX10: ; %bb.0: ; %entry
1522 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16
1524 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1525 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1526 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1527 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1528 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1529 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1530 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1531 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1532 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1533 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1534 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1535 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1536 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1537 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1538 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1539 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1540 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1541 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1542 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1543 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1544 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1545 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1547 ; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3:
1548 ; GFX11: ; %bb.0: ; %entry
1549 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550 ; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16
1551 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1552 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
1553 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1554 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
1555 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1556 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
1557 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1558 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
1559 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1560 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
1561 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1562 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
1563 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1564 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
1565 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1567 %add = add i32 %sel, 3
1568 %ext = extractelement <8 x double> %vec, i32 %add
1572 define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) {
1573 ; GCN-LABEL: dyn_extract_v8p3_v_v:
1574 ; GCN: ; %bb.0: ; %entry
1575 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1576 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
1577 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1578 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
1579 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1580 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
1581 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1582 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
1583 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1584 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
1585 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1586 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
1587 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1588 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
1589 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1590 ; GCN-NEXT: s_setpc_b64 s[30:31]
1592 ; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v:
1593 ; GFX10PLUS: ; %bb.0: ; %entry
1594 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
1596 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1597 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
1598 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1599 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
1600 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1601 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
1602 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1603 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
1604 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1605 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
1606 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1607 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
1608 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1609 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1611 %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
1612 ret ptr addrspace(3) %ext
1615 define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) {
1616 ; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
1617 ; GPRIDX: ; %bb.0: ; %entry
1618 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
1619 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2
1620 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
1621 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0
1622 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
1623 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0
1624 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
1625 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0
1626 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
1627 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0
1628 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
1629 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0
1630 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7
1631 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0
1632 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
1633 ; GPRIDX-NEXT: ds_write_b32 v0, v0
1634 ; GPRIDX-NEXT: s_endpgm
1636 ; MOVREL-LABEL: dyn_extract_v8p3_s_s:
1637 ; MOVREL: ; %bb.0: ; %entry
1638 ; MOVREL-NEXT: s_mov_b32 s0, s2
1639 ; MOVREL-NEXT: s_mov_b32 m0, s10
1640 ; MOVREL-NEXT: s_mov_b32 s1, s3
1641 ; MOVREL-NEXT: s_mov_b32 s2, s4
1642 ; MOVREL-NEXT: s_mov_b32 s3, s5
1643 ; MOVREL-NEXT: s_mov_b32 s4, s6
1644 ; MOVREL-NEXT: s_mov_b32 s5, s7
1645 ; MOVREL-NEXT: s_mov_b32 s6, s8
1646 ; MOVREL-NEXT: s_mov_b32 s7, s9
1647 ; MOVREL-NEXT: s_movrels_b32 s0, s0
1648 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
1649 ; MOVREL-NEXT: s_mov_b32 m0, -1
1650 ; MOVREL-NEXT: ds_write_b32 v0, v0
1651 ; MOVREL-NEXT: s_endpgm
1653 ; GFX10-LABEL: dyn_extract_v8p3_s_s:
1654 ; GFX10: ; %bb.0: ; %entry
1655 ; GFX10-NEXT: s_mov_b32 s0, s2
1656 ; GFX10-NEXT: s_mov_b32 m0, s10
1657 ; GFX10-NEXT: s_mov_b32 s1, s3
1658 ; GFX10-NEXT: s_mov_b32 s2, s4
1659 ; GFX10-NEXT: s_mov_b32 s3, s5
1660 ; GFX10-NEXT: s_mov_b32 s4, s6
1661 ; GFX10-NEXT: s_mov_b32 s5, s7
1662 ; GFX10-NEXT: s_mov_b32 s6, s8
1663 ; GFX10-NEXT: s_mov_b32 s7, s9
1664 ; GFX10-NEXT: s_movrels_b32 s0, s0
1665 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1666 ; GFX10-NEXT: ds_write_b32 v0, v0
1667 ; GFX10-NEXT: s_endpgm
1669 ; GFX11-LABEL: dyn_extract_v8p3_s_s:
1670 ; GFX11: ; %bb.0: ; %entry
1671 ; GFX11-NEXT: s_mov_b32 s0, s2
1672 ; GFX11-NEXT: s_mov_b32 m0, s10
1673 ; GFX11-NEXT: s_mov_b32 s1, s3
1674 ; GFX11-NEXT: s_mov_b32 s2, s4
1675 ; GFX11-NEXT: s_mov_b32 s3, s5
1676 ; GFX11-NEXT: s_mov_b32 s4, s6
1677 ; GFX11-NEXT: s_mov_b32 s5, s7
1678 ; GFX11-NEXT: s_mov_b32 s6, s8
1679 ; GFX11-NEXT: s_mov_b32 s7, s9
1680 ; GFX11-NEXT: s_movrels_b32 s0, s0
1681 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
1682 ; GFX11-NEXT: ds_store_b32 v0, v0
1683 ; GFX11-NEXT: s_endpgm
1685 %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
1686 store ptr addrspace(3) %ext, ptr addrspace(3) undef
1690 define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) {
1691 ; GCN-LABEL: dyn_extract_v8p1_v_v:
1692 ; GCN: ; %bb.0: ; %entry
1693 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1695 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1696 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1697 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1698 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1699 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1700 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1701 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1702 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1703 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1704 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1705 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
1706 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1707 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
1708 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
1709 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1710 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
1711 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
1712 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1713 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
1714 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
1715 ; GCN-NEXT: s_setpc_b64 s[30:31]
1717 ; GFX10-LABEL: dyn_extract_v8p1_v_v:
1718 ; GFX10: ; %bb.0: ; %entry
1719 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1720 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1721 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1722 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1723 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1724 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1725 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1726 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1727 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1728 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1729 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1730 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1731 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1732 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1733 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1734 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1735 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1736 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1737 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1738 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1739 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1740 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1741 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1743 ; GFX11-LABEL: dyn_extract_v8p1_v_v:
1744 ; GFX11: ; %bb.0: ; %entry
1745 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1746 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1747 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
1748 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1749 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
1750 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1751 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
1752 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1753 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
1754 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1755 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
1756 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1757 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
1758 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1759 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
1760 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1762 %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
1763 ret ptr addrspace(1) %ext
1766 define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) {
1767 ; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
1768 ; GPRIDX: ; %bb.0: ; %entry
1769 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1770 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1771 ; GPRIDX-NEXT: s_mov_b32 m0, s18
1772 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1773 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1774 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1775 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1776 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1777 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1778 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1779 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1780 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1781 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1782 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1783 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1784 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1785 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1786 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
1787 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
1788 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
1789 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1790 ; GPRIDX-NEXT: s_endpgm
1792 ; MOVREL-LABEL: dyn_extract_v8p1_s_s:
1793 ; MOVREL: ; %bb.0: ; %entry
1794 ; MOVREL-NEXT: s_mov_b32 s0, s2
1795 ; MOVREL-NEXT: s_mov_b32 s1, s3
1796 ; MOVREL-NEXT: s_mov_b32 m0, s18
1797 ; MOVREL-NEXT: s_mov_b32 s2, s4
1798 ; MOVREL-NEXT: s_mov_b32 s3, s5
1799 ; MOVREL-NEXT: s_mov_b32 s4, s6
1800 ; MOVREL-NEXT: s_mov_b32 s5, s7
1801 ; MOVREL-NEXT: s_mov_b32 s6, s8
1802 ; MOVREL-NEXT: s_mov_b32 s7, s9
1803 ; MOVREL-NEXT: s_mov_b32 s8, s10
1804 ; MOVREL-NEXT: s_mov_b32 s9, s11
1805 ; MOVREL-NEXT: s_mov_b32 s10, s12
1806 ; MOVREL-NEXT: s_mov_b32 s11, s13
1807 ; MOVREL-NEXT: s_mov_b32 s12, s14
1808 ; MOVREL-NEXT: s_mov_b32 s13, s15
1809 ; MOVREL-NEXT: s_mov_b32 s14, s16
1810 ; MOVREL-NEXT: s_mov_b32 s15, s17
1811 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
1812 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
1813 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
1814 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
1815 ; MOVREL-NEXT: s_endpgm
1817 ; GFX10-LABEL: dyn_extract_v8p1_s_s:
1818 ; GFX10: ; %bb.0: ; %entry
1819 ; GFX10-NEXT: s_mov_b32 s0, s2
1820 ; GFX10-NEXT: s_mov_b32 s1, s3
1821 ; GFX10-NEXT: s_mov_b32 m0, s18
1822 ; GFX10-NEXT: s_mov_b32 s2, s4
1823 ; GFX10-NEXT: s_mov_b32 s3, s5
1824 ; GFX10-NEXT: s_mov_b32 s4, s6
1825 ; GFX10-NEXT: s_mov_b32 s5, s7
1826 ; GFX10-NEXT: s_mov_b32 s6, s8
1827 ; GFX10-NEXT: s_mov_b32 s7, s9
1828 ; GFX10-NEXT: s_mov_b32 s8, s10
1829 ; GFX10-NEXT: s_mov_b32 s9, s11
1830 ; GFX10-NEXT: s_mov_b32 s10, s12
1831 ; GFX10-NEXT: s_mov_b32 s11, s13
1832 ; GFX10-NEXT: s_mov_b32 s12, s14
1833 ; GFX10-NEXT: s_mov_b32 s13, s15
1834 ; GFX10-NEXT: s_mov_b32 s14, s16
1835 ; GFX10-NEXT: s_mov_b32 s15, s17
1836 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1]
1837 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1838 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1839 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1840 ; GFX10-NEXT: s_endpgm
1842 ; GFX11-LABEL: dyn_extract_v8p1_s_s:
1843 ; GFX11: ; %bb.0: ; %entry
1844 ; GFX11-NEXT: s_mov_b32 s0, s2
1845 ; GFX11-NEXT: s_mov_b32 s1, s3
1846 ; GFX11-NEXT: s_mov_b32 m0, s18
1847 ; GFX11-NEXT: s_mov_b32 s2, s4
1848 ; GFX11-NEXT: s_mov_b32 s3, s5
1849 ; GFX11-NEXT: s_mov_b32 s4, s6
1850 ; GFX11-NEXT: s_mov_b32 s5, s7
1851 ; GFX11-NEXT: s_mov_b32 s6, s8
1852 ; GFX11-NEXT: s_mov_b32 s7, s9
1853 ; GFX11-NEXT: s_mov_b32 s8, s10
1854 ; GFX11-NEXT: s_mov_b32 s9, s11
1855 ; GFX11-NEXT: s_mov_b32 s10, s12
1856 ; GFX11-NEXT: s_mov_b32 s11, s13
1857 ; GFX11-NEXT: s_mov_b32 s12, s14
1858 ; GFX11-NEXT: s_mov_b32 s13, s15
1859 ; GFX11-NEXT: s_mov_b32 s14, s16
1860 ; GFX11-NEXT: s_mov_b32 s15, s17
1861 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1]
1862 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1863 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
1864 ; GFX11-NEXT: s_nop 0
1865 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1866 ; GFX11-NEXT: s_endpgm
1868 %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
1869 store ptr addrspace(1) %ext, ptr addrspace(1) undef
1873 define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) {
1874 ; GPRIDX-LABEL: dyn_extract_v16f32_v_s:
1875 ; GPRIDX: ; %bb.0: ; %entry
1876 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
1877 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
1878 ; GPRIDX-NEXT: s_set_gpr_idx_off
1879 ; GPRIDX-NEXT: ; return to shader part epilog
1881 ; MOVREL-LABEL: dyn_extract_v16f32_v_s:
1882 ; MOVREL: ; %bb.0: ; %entry
1883 ; MOVREL-NEXT: s_mov_b32 m0, s2
1884 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
1885 ; MOVREL-NEXT: ; return to shader part epilog
1887 ; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s:
1888 ; GFX10PLUS: ; %bb.0: ; %entry
1889 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1890 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
1891 ; GFX10PLUS-NEXT: ; return to shader part epilog
1893 %ext = extractelement <16 x float> %vec, i32 %sel
1897 define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) {
1898 ; GPRIDX-LABEL: dyn_extract_v32f32_v_s:
1899 ; GPRIDX: ; %bb.0: ; %entry
1900 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
1901 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
1902 ; GPRIDX-NEXT: s_set_gpr_idx_off
1903 ; GPRIDX-NEXT: ; return to shader part epilog
1905 ; MOVREL-LABEL: dyn_extract_v32f32_v_s:
1906 ; MOVREL: ; %bb.0: ; %entry
1907 ; MOVREL-NEXT: s_mov_b32 m0, s2
1908 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
1909 ; MOVREL-NEXT: ; return to shader part epilog
1911 ; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s:
1912 ; GFX10PLUS: ; %bb.0: ; %entry
1913 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1914 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
1915 ; GFX10PLUS-NEXT: ; return to shader part epilog
1917 %ext = extractelement <32 x float> %vec, i32 %sel
1921 define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) {
1922 ; GPRIDX-LABEL: dyn_extract_v16f64_v_s:
1923 ; GPRIDX: ; %bb.0: ; %entry
1924 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
1925 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
1926 ; GPRIDX-NEXT: v_mov_b32_e32 v32, v0
1927 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
1928 ; GPRIDX-NEXT: s_set_gpr_idx_off
1929 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32
1930 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0
1931 ; GPRIDX-NEXT: ; return to shader part epilog
1933 ; MOVREL-LABEL: dyn_extract_v16f64_v_s:
1934 ; MOVREL: ; %bb.0: ; %entry
1935 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
1936 ; MOVREL-NEXT: v_movrels_b32_e32 v32, v0
1937 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1
1938 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v32
1939 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
1940 ; MOVREL-NEXT: ; return to shader part epilog
1942 ; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s:
1943 ; GFX10PLUS: ; %bb.0: ; %entry
1944 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
1945 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v32, v0
1946 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1
1947 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v32
1948 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0
1949 ; GFX10PLUS-NEXT: ; return to shader part epilog
1951 %ext = extractelement <16 x double> %vec, i32 %sel
1955 define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) {
1956 ; GCN-LABEL: dyn_extract_v16f32_s_s:
1957 ; GCN: ; %bb.0: ; %entry
1958 ; GCN-NEXT: s_mov_b32 s4, 1.0
1959 ; GCN-NEXT: s_mov_b32 m0, s2
1960 ; GCN-NEXT: s_mov_b32 s19, 0x41800000
1961 ; GCN-NEXT: s_mov_b32 s18, 0x41700000
1962 ; GCN-NEXT: s_mov_b32 s17, 0x41600000
1963 ; GCN-NEXT: s_mov_b32 s16, 0x41500000
1964 ; GCN-NEXT: s_mov_b32 s15, 0x41400000
1965 ; GCN-NEXT: s_mov_b32 s14, 0x41300000
1966 ; GCN-NEXT: s_mov_b32 s13, 0x41200000
1967 ; GCN-NEXT: s_mov_b32 s12, 0x41100000
1968 ; GCN-NEXT: s_mov_b32 s11, 0x41000000
1969 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000
1970 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000
1971 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000
1972 ; GCN-NEXT: s_mov_b32 s7, 4.0
1973 ; GCN-NEXT: s_mov_b32 s6, 0x40400000
1974 ; GCN-NEXT: s_mov_b32 s5, 2.0
1975 ; GCN-NEXT: s_movrels_b32 s0, s4
1976 ; GCN-NEXT: v_mov_b32_e32 v0, s0
1977 ; GCN-NEXT: ; return to shader part epilog
1979 ; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s:
1980 ; GFX10PLUS: ; %bb.0: ; %entry
1981 ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0
1982 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1983 ; GFX10PLUS-NEXT: s_mov_b32 s19, 0x41800000
1984 ; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000
1985 ; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000
1986 ; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000
1987 ; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000
1988 ; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000
1989 ; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000
1990 ; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000
1991 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000
1992 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000
1993 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000
1994 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000
1995 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0
1996 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000
1997 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0
1998 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4
1999 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2000 ; GFX10PLUS-NEXT: ; return to shader part epilog
2002 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
2006 define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) {
2007 ; GCN-LABEL: dyn_extract_v32f32_s_s:
2008 ; GCN: ; %bb.0: ; %entry
2009 ; GCN-NEXT: s_mov_b32 s36, 1.0
2010 ; GCN-NEXT: s_mov_b32 m0, s2
2011 ; GCN-NEXT: s_mov_b32 s67, 0x42000000
2012 ; GCN-NEXT: s_mov_b32 s66, 0x41f80000
2013 ; GCN-NEXT: s_mov_b32 s65, 0x41f00000
2014 ; GCN-NEXT: s_mov_b32 s64, 0x41e80000
2015 ; GCN-NEXT: s_mov_b32 s63, 0x41e00000
2016 ; GCN-NEXT: s_mov_b32 s62, 0x41d80000
2017 ; GCN-NEXT: s_mov_b32 s61, 0x41d00000
2018 ; GCN-NEXT: s_mov_b32 s60, 0x41c80000
2019 ; GCN-NEXT: s_mov_b32 s59, 0x41c00000
2020 ; GCN-NEXT: s_mov_b32 s58, 0x41b80000
2021 ; GCN-NEXT: s_mov_b32 s57, 0x41b00000
2022 ; GCN-NEXT: s_mov_b32 s56, 0x41a80000
2023 ; GCN-NEXT: s_mov_b32 s55, 0x41a00000
2024 ; GCN-NEXT: s_mov_b32 s54, 0x41980000
2025 ; GCN-NEXT: s_mov_b32 s53, 0x41900000
2026 ; GCN-NEXT: s_mov_b32 s52, 0x41880000
2027 ; GCN-NEXT: s_mov_b32 s51, 0x41800000
2028 ; GCN-NEXT: s_mov_b32 s50, 0x41700000
2029 ; GCN-NEXT: s_mov_b32 s49, 0x41600000
2030 ; GCN-NEXT: s_mov_b32 s48, 0x41500000
2031 ; GCN-NEXT: s_mov_b32 s47, 0x41400000
2032 ; GCN-NEXT: s_mov_b32 s46, 0x41300000
2033 ; GCN-NEXT: s_mov_b32 s45, 0x41200000
2034 ; GCN-NEXT: s_mov_b32 s44, 0x41100000
2035 ; GCN-NEXT: s_mov_b32 s43, 0x41000000
2036 ; GCN-NEXT: s_mov_b32 s42, 0x40e00000
2037 ; GCN-NEXT: s_mov_b32 s41, 0x40c00000
2038 ; GCN-NEXT: s_mov_b32 s40, 0x40a00000
2039 ; GCN-NEXT: s_mov_b32 s39, 4.0
2040 ; GCN-NEXT: s_mov_b32 s38, 0x40400000
2041 ; GCN-NEXT: s_mov_b32 s37, 2.0
2042 ; GCN-NEXT: s_movrels_b32 s0, s36
2043 ; GCN-NEXT: v_mov_b32_e32 v0, s0
2044 ; GCN-NEXT: ; return to shader part epilog
2046 ; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s:
2047 ; GFX10PLUS: ; %bb.0: ; %entry
2048 ; GFX10PLUS-NEXT: s_mov_b32 s36, 1.0
2049 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2050 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x42000000
2051 ; GFX10PLUS-NEXT: s_mov_b32 s66, 0x41f80000
2052 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x41f00000
2053 ; GFX10PLUS-NEXT: s_mov_b32 s64, 0x41e80000
2054 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x41e00000
2055 ; GFX10PLUS-NEXT: s_mov_b32 s62, 0x41d80000
2056 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x41d00000
2057 ; GFX10PLUS-NEXT: s_mov_b32 s60, 0x41c80000
2058 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x41c00000
2059 ; GFX10PLUS-NEXT: s_mov_b32 s58, 0x41b80000
2060 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x41b00000
2061 ; GFX10PLUS-NEXT: s_mov_b32 s56, 0x41a80000
2062 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x41a00000
2063 ; GFX10PLUS-NEXT: s_mov_b32 s54, 0x41980000
2064 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x41900000
2065 ; GFX10PLUS-NEXT: s_mov_b32 s52, 0x41880000
2066 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x41800000
2067 ; GFX10PLUS-NEXT: s_mov_b32 s50, 0x41700000
2068 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x41600000
2069 ; GFX10PLUS-NEXT: s_mov_b32 s48, 0x41500000
2070 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x41400000
2071 ; GFX10PLUS-NEXT: s_mov_b32 s46, 0x41300000
2072 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x41200000
2073 ; GFX10PLUS-NEXT: s_mov_b32 s44, 0x41100000
2074 ; GFX10PLUS-NEXT: s_mov_b32 s43, 0x41000000
2075 ; GFX10PLUS-NEXT: s_mov_b32 s42, 0x40e00000
2076 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40c00000
2077 ; GFX10PLUS-NEXT: s_mov_b32 s40, 0x40a00000
2078 ; GFX10PLUS-NEXT: s_mov_b32 s39, 4.0
2079 ; GFX10PLUS-NEXT: s_mov_b32 s38, 0x40400000
2080 ; GFX10PLUS-NEXT: s_mov_b32 s37, 2.0
2081 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s36
2082 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2083 ; GFX10PLUS-NEXT: ; return to shader part epilog
2085 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
2089 define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
2090 ; GCN-LABEL: dyn_extract_v16f64_s_s:
2091 ; GCN: ; %bb.0: ; %entry
2092 ; GCN-NEXT: s_mov_b32 s66, 0
2093 ; GCN-NEXT: s_mov_b32 s64, 0
2094 ; GCN-NEXT: s_mov_b32 s62, 0
2095 ; GCN-NEXT: s_mov_b32 s60, 0
2096 ; GCN-NEXT: s_mov_b32 s58, 0
2097 ; GCN-NEXT: s_mov_b32 s56, 0
2098 ; GCN-NEXT: s_mov_b32 s54, 0
2099 ; GCN-NEXT: s_mov_b32 s52, 0
2100 ; GCN-NEXT: s_mov_b32 s50, 0
2101 ; GCN-NEXT: s_mov_b32 s48, 0
2102 ; GCN-NEXT: s_mov_b32 s46, 0
2103 ; GCN-NEXT: s_mov_b32 s44, 0
2104 ; GCN-NEXT: s_mov_b32 s40, 0
2105 ; GCN-NEXT: s_mov_b64 s[36:37], 1.0
2106 ; GCN-NEXT: s_mov_b32 m0, s2
2107 ; GCN-NEXT: s_mov_b32 s67, 0x40300000
2108 ; GCN-NEXT: s_mov_b32 s65, 0x402e0000
2109 ; GCN-NEXT: s_mov_b32 s63, 0x402c0000
2110 ; GCN-NEXT: s_mov_b32 s61, 0x402a0000
2111 ; GCN-NEXT: s_mov_b32 s59, 0x40280000
2112 ; GCN-NEXT: s_mov_b32 s57, 0x40260000
2113 ; GCN-NEXT: s_mov_b32 s55, 0x40240000
2114 ; GCN-NEXT: s_mov_b32 s53, 0x40220000
2115 ; GCN-NEXT: s_mov_b32 s51, 0x40200000
2116 ; GCN-NEXT: s_mov_b32 s49, 0x401c0000
2117 ; GCN-NEXT: s_mov_b32 s47, 0x40180000
2118 ; GCN-NEXT: s_mov_b32 s45, 0x40140000
2119 ; GCN-NEXT: s_mov_b64 s[42:43], 4.0
2120 ; GCN-NEXT: s_mov_b32 s41, 0x40080000
2121 ; GCN-NEXT: s_mov_b64 s[38:39], 2.0
2122 ; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37]
2123 ; GCN-NEXT: ; return to shader part epilog
2125 ; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s:
2126 ; GFX10PLUS: ; %bb.0: ; %entry
2127 ; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0
2128 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2129 ; GFX10PLUS-NEXT: s_mov_b32 s66, 0
2130 ; GFX10PLUS-NEXT: s_mov_b32 s64, 0
2131 ; GFX10PLUS-NEXT: s_mov_b32 s62, 0
2132 ; GFX10PLUS-NEXT: s_mov_b32 s60, 0
2133 ; GFX10PLUS-NEXT: s_mov_b32 s58, 0
2134 ; GFX10PLUS-NEXT: s_mov_b32 s56, 0
2135 ; GFX10PLUS-NEXT: s_mov_b32 s54, 0
2136 ; GFX10PLUS-NEXT: s_mov_b32 s52, 0
2137 ; GFX10PLUS-NEXT: s_mov_b32 s50, 0
2138 ; GFX10PLUS-NEXT: s_mov_b32 s48, 0
2139 ; GFX10PLUS-NEXT: s_mov_b32 s46, 0
2140 ; GFX10PLUS-NEXT: s_mov_b32 s44, 0
2141 ; GFX10PLUS-NEXT: s_mov_b32 s40, 0
2142 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000
2143 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000
2144 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000
2145 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000
2146 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000
2147 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000
2148 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000
2149 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000
2150 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000
2151 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000
2152 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000
2153 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000
2154 ; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0
2155 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000
2156 ; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0
2157 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37]
2158 ; GFX10PLUS-NEXT: ; return to shader part epilog
2160 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
2164 define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) {
2165 ; GCN-LABEL: dyn_extract_v6f32_s_v:
2166 ; GCN: ; %bb.0: ; %entry
2167 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2168 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2169 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2170 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2171 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2172 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2173 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2174 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2175 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2176 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2177 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
2178 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2179 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2180 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2181 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2182 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc
2183 ; GCN-NEXT: ; return to shader part epilog
2185 ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v:
2186 ; GFX10PLUS: ; %bb.0: ; %entry
2187 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
2188 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2189 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2190 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2191 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2192 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2193 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2194 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2195 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2196 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2197 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo
2198 ; GFX10PLUS-NEXT: ; return to shader part epilog
2200 %ext = extractelement <6 x float> %vec, i32 %sel
2204 define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) {
2205 ; GCN-LABEL: dyn_extract_v6f32_v_v:
2206 ; GCN: ; %bb.0: ; %entry
2207 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
2209 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2210 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6
2211 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2212 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6
2213 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2214 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6
2215 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2216 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6
2217 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2218 ; GCN-NEXT: s_setpc_b64 s[30:31]
2220 ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v:
2221 ; GFX10PLUS: ; %bb.0: ; %entry
2222 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
2224 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2225 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6
2226 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2227 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6
2228 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2229 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6
2230 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2231 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6
2232 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2233 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
2235 %ext = extractelement <6 x float> %vec, i32 %sel
2239 define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) {
2240 ; GCN-LABEL: dyn_extract_v6f32_v_s:
2241 ; GCN: ; %bb.0: ; %entry
2242 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
2243 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2244 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
2245 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2246 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
2247 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2248 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
2249 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2250 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
2251 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2252 ; GCN-NEXT: ; return to shader part epilog
2254 ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s:
2255 ; GFX10PLUS: ; %bb.0: ; %entry
2256 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
2257 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2258 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
2259 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2260 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
2261 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2262 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
2263 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2264 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5
2265 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2266 ; GFX10PLUS-NEXT: ; return to shader part epilog
2268 %ext = extractelement <6 x float> %vec, i32 %sel
2272 define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) {
2273 ; GCN-LABEL: dyn_extract_v6f32_s_s:
2274 ; GCN: ; %bb.0: ; %entry
2275 ; GCN-NEXT: s_cmp_eq_u32 s8, 1
2276 ; GCN-NEXT: s_cselect_b32 s0, s3, s2
2277 ; GCN-NEXT: s_cmp_eq_u32 s8, 2
2278 ; GCN-NEXT: s_cselect_b32 s0, s4, s0
2279 ; GCN-NEXT: s_cmp_eq_u32 s8, 3
2280 ; GCN-NEXT: s_cselect_b32 s0, s5, s0
2281 ; GCN-NEXT: s_cmp_eq_u32 s8, 4
2282 ; GCN-NEXT: s_cselect_b32 s0, s6, s0
2283 ; GCN-NEXT: s_cmp_eq_u32 s8, 5
2284 ; GCN-NEXT: s_cselect_b32 s0, s7, s0
2285 ; GCN-NEXT: v_mov_b32_e32 v0, s0
2286 ; GCN-NEXT: ; return to shader part epilog
2288 ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s:
2289 ; GFX10PLUS: ; %bb.0: ; %entry
2290 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1
2291 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2
2292 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2
2293 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0
2294 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3
2295 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0
2296 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4
2297 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0
2298 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 5
2299 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0
2300 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2301 ; GFX10PLUS-NEXT: ; return to shader part epilog
2303 %ext = extractelement <6 x float> %vec, i32 %sel
2307 define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) {
2308 ; GCN-LABEL: dyn_extract_v7f32_s_v:
2309 ; GCN: ; %bb.0: ; %entry
2310 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2311 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2312 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2313 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2314 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2315 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2316 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2317 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2318 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2319 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2320 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
2321 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2322 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2323 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2324 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2325 ; GCN-NEXT: v_mov_b32_e32 v7, s8
2326 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
2327 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2328 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc
2329 ; GCN-NEXT: ; return to shader part epilog
2331 ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v:
2332 ; GFX10PLUS: ; %bb.0: ; %entry
2333 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
2334 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2335 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2336 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2337 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2338 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2339 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2340 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2341 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2342 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2343 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo
2344 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2345 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s8, vcc_lo
2346 ; GFX10PLUS-NEXT: ; return to shader part epilog
2348 %ext = extractelement <7 x float> %vec, i32 %sel
2352 define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) {
2353 ; GCN-LABEL: dyn_extract_v7f32_v_v:
2354 ; GCN: ; %bb.0: ; %entry
2355 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
2357 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2358 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7
2359 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2360 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7
2361 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2362 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7
2363 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2364 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7
2365 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2366 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7
2367 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2368 ; GCN-NEXT: s_setpc_b64 s[30:31]
2370 ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v:
2371 ; GFX10PLUS: ; %bb.0: ; %entry
2372 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
2374 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2375 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7
2376 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2377 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7
2378 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2379 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7
2380 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2381 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7
2382 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2383 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7
2384 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2385 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
2387 %ext = extractelement <7 x float> %vec, i32 %sel
2391 define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) {
2392 ; GCN-LABEL: dyn_extract_v7f32_v_s:
2393 ; GCN: ; %bb.0: ; %entry
2394 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
2395 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2396 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
2397 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2398 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
2399 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2400 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
2401 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2402 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
2403 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2404 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
2405 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2406 ; GCN-NEXT: ; return to shader part epilog
2408 ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s:
2409 ; GFX10PLUS: ; %bb.0: ; %entry
2410 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
2411 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2412 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
2413 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2414 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
2415 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2416 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
2417 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2418 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5
2419 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2420 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6
2421 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2422 ; GFX10PLUS-NEXT: ; return to shader part epilog
2424 %ext = extractelement <7 x float> %vec, i32 %sel
2428 define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) {
2429 ; GCN-LABEL: dyn_extract_v7f32_s_s:
2430 ; GCN: ; %bb.0: ; %entry
2431 ; GCN-NEXT: s_cmp_eq_u32 s9, 1
2432 ; GCN-NEXT: s_cselect_b32 s0, s3, s2
2433 ; GCN-NEXT: s_cmp_eq_u32 s9, 2
2434 ; GCN-NEXT: s_cselect_b32 s0, s4, s0
2435 ; GCN-NEXT: s_cmp_eq_u32 s9, 3
2436 ; GCN-NEXT: s_cselect_b32 s0, s5, s0
2437 ; GCN-NEXT: s_cmp_eq_u32 s9, 4
2438 ; GCN-NEXT: s_cselect_b32 s0, s6, s0
2439 ; GCN-NEXT: s_cmp_eq_u32 s9, 5
2440 ; GCN-NEXT: s_cselect_b32 s0, s7, s0
2441 ; GCN-NEXT: s_cmp_eq_u32 s9, 6
2442 ; GCN-NEXT: s_cselect_b32 s0, s8, s0
2443 ; GCN-NEXT: v_mov_b32_e32 v0, s0
2444 ; GCN-NEXT: ; return to shader part epilog
2446 ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s:
2447 ; GFX10PLUS: ; %bb.0: ; %entry
2448 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 1
2449 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2
2450 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 2
2451 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0
2452 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 3
2453 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0
2454 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 4
2455 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0
2456 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 5
2457 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0
2458 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 6
2459 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s8, s0
2460 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2461 ; GFX10PLUS-NEXT: ; return to shader part epilog
2463 %ext = extractelement <7 x float> %vec, i32 %sel
2467 define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) {
2468 ; GCN-LABEL: dyn_extract_v6f64_s_v:
2469 ; GCN: ; %bb.0: ; %entry
2470 ; GCN-NEXT: s_mov_b32 s0, s2
2471 ; GCN-NEXT: s_mov_b32 s1, s3
2472 ; GCN-NEXT: s_mov_b32 s2, s4
2473 ; GCN-NEXT: s_mov_b32 s3, s5
2474 ; GCN-NEXT: s_mov_b32 s4, s6
2475 ; GCN-NEXT: s_mov_b32 s5, s7
2476 ; GCN-NEXT: v_mov_b32_e32 v1, s0
2477 ; GCN-NEXT: v_mov_b32_e32 v2, s1
2478 ; GCN-NEXT: v_mov_b32_e32 v3, s2
2479 ; GCN-NEXT: v_mov_b32_e32 v4, s3
2480 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2481 ; GCN-NEXT: s_mov_b32 s6, s8
2482 ; GCN-NEXT: s_mov_b32 s7, s9
2483 ; GCN-NEXT: v_mov_b32_e32 v5, s4
2484 ; GCN-NEXT: v_mov_b32_e32 v6, s5
2485 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2486 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2487 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2488 ; GCN-NEXT: v_mov_b32_e32 v7, s6
2489 ; GCN-NEXT: v_mov_b32_e32 v8, s7
2490 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2491 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2492 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2493 ; GCN-NEXT: v_mov_b32_e32 v9, s10
2494 ; GCN-NEXT: v_mov_b32_e32 v10, s11
2495 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2496 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2497 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2498 ; GCN-NEXT: v_mov_b32_e32 v11, s12
2499 ; GCN-NEXT: v_mov_b32_e32 v12, s13
2500 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2501 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2502 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2503 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc
2504 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc
2505 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2506 ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2507 ; GCN-NEXT: ; return to shader part epilog
2509 ; GFX10-LABEL: dyn_extract_v6f64_s_v:
2510 ; GFX10: ; %bb.0: ; %entry
2511 ; GFX10-NEXT: s_mov_b32 s0, s2
2512 ; GFX10-NEXT: s_mov_b32 s2, s4
2513 ; GFX10-NEXT: s_mov_b32 s15, s5
2514 ; GFX10-NEXT: v_mov_b32_e32 v1, s2
2515 ; GFX10-NEXT: v_mov_b32_e32 v2, s15
2516 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2517 ; GFX10-NEXT: s_mov_b32 s1, s3
2518 ; GFX10-NEXT: s_mov_b32 s4, s6
2519 ; GFX10-NEXT: s_mov_b32 s5, s7
2520 ; GFX10-NEXT: s_mov_b32 s6, s8
2521 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2522 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2523 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2524 ; GFX10-NEXT: s_mov_b32 s7, s9
2525 ; GFX10-NEXT: s_mov_b32 s8, s10
2526 ; GFX10-NEXT: s_mov_b32 s9, s11
2527 ; GFX10-NEXT: s_mov_b32 s10, s12
2528 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2529 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2530 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2531 ; GFX10-NEXT: s_mov_b32 s11, s13
2532 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2533 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2534 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2535 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2536 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2537 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2538 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s10, vcc_lo
2539 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s11, vcc_lo
2540 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2541 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2542 ; GFX10-NEXT: ; return to shader part epilog
2544 ; GFX11-LABEL: dyn_extract_v6f64_s_v:
2545 ; GFX11: ; %bb.0: ; %entry
2546 ; GFX11-NEXT: s_mov_b32 s0, s2
2547 ; GFX11-NEXT: s_mov_b32 s2, s4
2548 ; GFX11-NEXT: s_mov_b32 s15, s5
2549 ; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s15
2550 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2551 ; GFX11-NEXT: s_mov_b32 s1, s3
2552 ; GFX11-NEXT: s_mov_b32 s4, s6
2553 ; GFX11-NEXT: s_mov_b32 s5, s7
2554 ; GFX11-NEXT: s_mov_b32 s6, s8
2555 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2556 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2557 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2558 ; GFX11-NEXT: s_mov_b32 s7, s9
2559 ; GFX11-NEXT: s_mov_b32 s8, s10
2560 ; GFX11-NEXT: s_mov_b32 s9, s11
2561 ; GFX11-NEXT: s_mov_b32 s10, s12
2562 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2563 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2564 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2565 ; GFX11-NEXT: s_mov_b32 s11, s13
2566 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2567 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2568 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2569 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2570 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2571 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2572 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s10, vcc_lo
2573 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s11, vcc_lo
2574 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2575 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2576 ; GFX11-NEXT: ; return to shader part epilog
2578 %ext = extractelement <6 x double> %vec, i32 %sel
2582 define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) {
2583 ; GCN-LABEL: dyn_extract_v6f64_v_v:
2584 ; GCN: ; %bb.0: ; %entry
2585 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2586 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12
2587 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2588 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2589 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12
2590 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2591 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2592 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12
2593 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2594 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2595 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12
2596 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
2597 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2598 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12
2599 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
2600 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2601 ; GCN-NEXT: s_setpc_b64 s[30:31]
2603 ; GFX10-LABEL: dyn_extract_v6f64_v_v:
2604 ; GFX10: ; %bb.0: ; %entry
2605 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2606 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
2607 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2608 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2609 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
2610 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2611 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2612 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
2613 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2614 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2615 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
2616 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2617 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2618 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12
2619 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2620 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2621 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2623 ; GFX11-LABEL: dyn_extract_v6f64_v_v:
2624 ; GFX11: ; %bb.0: ; %entry
2625 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2626 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
2627 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2628 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
2629 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2630 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
2631 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2632 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
2633 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2634 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12
2635 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2636 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2638 %ext = extractelement <6 x double> %vec, i32 %sel
2642 define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) {
2643 ; GPRIDX-LABEL: dyn_extract_v6f64_v_s:
2644 ; GPRIDX: ; %bb.0: ; %entry
2645 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
2646 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
2647 ; GPRIDX-NEXT: v_mov_b32_e32 v12, v0
2648 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
2649 ; GPRIDX-NEXT: s_set_gpr_idx_off
2650 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12
2651 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0
2652 ; GPRIDX-NEXT: ; return to shader part epilog
2654 ; MOVREL-LABEL: dyn_extract_v6f64_v_s:
2655 ; MOVREL: ; %bb.0: ; %entry
2656 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
2657 ; MOVREL-NEXT: v_movrels_b32_e32 v12, v0
2658 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1
2659 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v12
2660 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
2661 ; MOVREL-NEXT: ; return to shader part epilog
2663 ; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s:
2664 ; GFX10PLUS: ; %bb.0: ; %entry
2665 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
2666 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v12, v0
2667 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1
2668 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v12
2669 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0
2670 ; GFX10PLUS-NEXT: ; return to shader part epilog
2672 %ext = extractelement <6 x double> %vec, i32 %sel
2676 define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) {
2677 ; GCN-LABEL: dyn_extract_v6f64_s_s:
2678 ; GCN: ; %bb.0: ; %entry
2679 ; GCN-NEXT: s_mov_b32 s0, s2
2680 ; GCN-NEXT: s_mov_b32 s1, s3
2681 ; GCN-NEXT: s_mov_b32 m0, s14
2682 ; GCN-NEXT: s_mov_b32 s2, s4
2683 ; GCN-NEXT: s_mov_b32 s3, s5
2684 ; GCN-NEXT: s_mov_b32 s4, s6
2685 ; GCN-NEXT: s_mov_b32 s5, s7
2686 ; GCN-NEXT: s_mov_b32 s6, s8
2687 ; GCN-NEXT: s_mov_b32 s7, s9
2688 ; GCN-NEXT: s_mov_b32 s8, s10
2689 ; GCN-NEXT: s_mov_b32 s9, s11
2690 ; GCN-NEXT: s_mov_b32 s10, s12
2691 ; GCN-NEXT: s_mov_b32 s11, s13
2692 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1]
2693 ; GCN-NEXT: ; return to shader part epilog
2695 ; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s:
2696 ; GFX10PLUS: ; %bb.0: ; %entry
2697 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2698 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2699 ; GFX10PLUS-NEXT: s_mov_b32 m0, s14
2700 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2701 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2702 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2703 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2704 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2705 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2706 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
2707 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
2708 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
2709 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
2710 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1]
2711 ; GFX10PLUS-NEXT: ; return to shader part epilog
2713 %ext = extractelement <6 x double> %vec, i32 %sel
2717 define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) {
2718 ; GCN-LABEL: dyn_extract_v7f64_s_v:
2719 ; GCN: ; %bb.0: ; %entry
2720 ; GCN-NEXT: s_mov_b32 s0, s2
2721 ; GCN-NEXT: s_mov_b32 s1, s3
2722 ; GCN-NEXT: s_mov_b32 s2, s4
2723 ; GCN-NEXT: s_mov_b32 s3, s5
2724 ; GCN-NEXT: s_mov_b32 s4, s6
2725 ; GCN-NEXT: s_mov_b32 s5, s7
2726 ; GCN-NEXT: v_mov_b32_e32 v1, s0
2727 ; GCN-NEXT: v_mov_b32_e32 v2, s1
2728 ; GCN-NEXT: v_mov_b32_e32 v3, s2
2729 ; GCN-NEXT: v_mov_b32_e32 v4, s3
2730 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2731 ; GCN-NEXT: s_mov_b32 s6, s8
2732 ; GCN-NEXT: s_mov_b32 s7, s9
2733 ; GCN-NEXT: v_mov_b32_e32 v5, s4
2734 ; GCN-NEXT: v_mov_b32_e32 v6, s5
2735 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2736 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2737 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2738 ; GCN-NEXT: s_mov_b32 s8, s10
2739 ; GCN-NEXT: s_mov_b32 s9, s11
2740 ; GCN-NEXT: v_mov_b32_e32 v7, s6
2741 ; GCN-NEXT: v_mov_b32_e32 v8, s7
2742 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2743 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2744 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2745 ; GCN-NEXT: v_mov_b32_e32 v9, s8
2746 ; GCN-NEXT: v_mov_b32_e32 v10, s9
2747 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2748 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2749 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2750 ; GCN-NEXT: v_mov_b32_e32 v11, s12
2751 ; GCN-NEXT: v_mov_b32_e32 v12, s13
2752 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2753 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2754 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2755 ; GCN-NEXT: v_mov_b32_e32 v13, s14
2756 ; GCN-NEXT: v_mov_b32_e32 v14, s15
2757 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2758 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
2759 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2760 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2761 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
2762 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
2763 ; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr14 killed $exec
2764 ; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr15 killed $exec
2765 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
2766 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
2767 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2768 ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2769 ; GCN-NEXT: ; return to shader part epilog
2771 ; GFX10-LABEL: dyn_extract_v7f64_s_v:
2772 ; GFX10: ; %bb.0: ; %entry
2773 ; GFX10-NEXT: s_mov_b32 s0, s2
2774 ; GFX10-NEXT: s_mov_b32 s2, s4
2775 ; GFX10-NEXT: s_mov_b32 s19, s5
2776 ; GFX10-NEXT: v_mov_b32_e32 v1, s2
2777 ; GFX10-NEXT: v_mov_b32_e32 v2, s19
2778 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2779 ; GFX10-NEXT: s_mov_b32 s1, s3
2780 ; GFX10-NEXT: s_mov_b32 s4, s6
2781 ; GFX10-NEXT: s_mov_b32 s5, s7
2782 ; GFX10-NEXT: s_mov_b32 s6, s8
2783 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2784 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2785 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2786 ; GFX10-NEXT: s_mov_b32 s7, s9
2787 ; GFX10-NEXT: s_mov_b32 s8, s10
2788 ; GFX10-NEXT: s_mov_b32 s9, s11
2789 ; GFX10-NEXT: s_mov_b32 s10, s12
2790 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2791 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2792 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2793 ; GFX10-NEXT: s_mov_b32 s11, s13
2794 ; GFX10-NEXT: s_mov_b32 s12, s14
2795 ; GFX10-NEXT: s_mov_b32 s13, s15
2796 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2797 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2798 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2799 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2800 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2801 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2802 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2803 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2804 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2805 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2806 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2807 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2808 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
2809 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
2810 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2811 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2812 ; GFX10-NEXT: ; return to shader part epilog
2814 ; GFX11-LABEL: dyn_extract_v7f64_s_v:
2815 ; GFX11: ; %bb.0: ; %entry
2816 ; GFX11-NEXT: s_mov_b32 s0, s2
2817 ; GFX11-NEXT: s_mov_b32 s2, s4
2818 ; GFX11-NEXT: s_mov_b32 s19, s5
2819 ; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19
2820 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2821 ; GFX11-NEXT: s_mov_b32 s1, s3
2822 ; GFX11-NEXT: s_mov_b32 s4, s6
2823 ; GFX11-NEXT: s_mov_b32 s5, s7
2824 ; GFX11-NEXT: s_mov_b32 s6, s8
2825 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2826 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2827 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2828 ; GFX11-NEXT: s_mov_b32 s7, s9
2829 ; GFX11-NEXT: s_mov_b32 s8, s10
2830 ; GFX11-NEXT: s_mov_b32 s9, s11
2831 ; GFX11-NEXT: s_mov_b32 s10, s12
2832 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2833 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2834 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2835 ; GFX11-NEXT: s_mov_b32 s11, s13
2836 ; GFX11-NEXT: s_mov_b32 s12, s14
2837 ; GFX11-NEXT: s_mov_b32 s13, s15
2838 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2839 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2840 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2841 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2842 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2843 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2844 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2845 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2846 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2847 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2848 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2849 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2850 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
2851 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
2852 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2853 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2854 ; GFX11-NEXT: ; return to shader part epilog
2856 %ext = extractelement <7 x double> %vec, i32 %sel
2860 define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
2861 ; GCN-LABEL: dyn_extract_v7f64_v_v:
2862 ; GCN: ; %bb.0: ; %entry
2863 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2864 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14
2865 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2866 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2867 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14
2868 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2869 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2870 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14
2871 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2872 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2873 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14
2874 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
2875 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2876 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14
2877 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
2878 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2879 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14
2880 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
2881 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2882 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14
2883 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
2884 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
2885 ; GCN-NEXT: s_setpc_b64 s[30:31]
2887 ; GFX10-LABEL: dyn_extract_v7f64_v_v:
2888 ; GFX10: ; %bb.0: ; %entry
2889 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2890 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
2891 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2892 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2893 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14
2894 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2895 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2896 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14
2897 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2898 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2899 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14
2900 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2901 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2902 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14
2903 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2904 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2905 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14
2906 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
2907 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
2908 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14
2909 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
2910 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
2911 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2913 ; GFX11-LABEL: dyn_extract_v7f64_v_v:
2914 ; GFX11: ; %bb.0: ; %entry
2915 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2916 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
2917 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2918 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14
2919 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2920 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14
2921 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2922 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14
2923 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2924 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14
2925 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2926 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14
2927 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
2928 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14
2929 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
2930 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2932 %ext = extractelement <7 x double> %vec, i32 %sel
2936 define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) {
2937 ; GPRIDX-LABEL: dyn_extract_v7f64_v_s:
2938 ; GPRIDX: ; %bb.0: ; %entry
2939 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
2940 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
2941 ; GPRIDX-NEXT: v_mov_b32_e32 v14, v0
2942 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
2943 ; GPRIDX-NEXT: s_set_gpr_idx_off
2944 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14
2945 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0
2946 ; GPRIDX-NEXT: ; return to shader part epilog
2948 ; MOVREL-LABEL: dyn_extract_v7f64_v_s:
2949 ; MOVREL: ; %bb.0: ; %entry
2950 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
2951 ; MOVREL-NEXT: v_movrels_b32_e32 v14, v0
2952 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1
2953 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v14
2954 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
2955 ; MOVREL-NEXT: ; return to shader part epilog
2957 ; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s:
2958 ; GFX10PLUS: ; %bb.0: ; %entry
2959 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
2960 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v14, v0
2961 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1
2962 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v14
2963 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0
2964 ; GFX10PLUS-NEXT: ; return to shader part epilog
2966 %ext = extractelement <7 x double> %vec, i32 %sel
2970 define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) {
2971 ; GCN-LABEL: dyn_extract_v7f64_s_s:
2972 ; GCN: ; %bb.0: ; %entry
2973 ; GCN-NEXT: s_mov_b32 s0, s2
2974 ; GCN-NEXT: s_mov_b32 s1, s3
2975 ; GCN-NEXT: s_mov_b32 m0, s16
2976 ; GCN-NEXT: s_mov_b32 s2, s4
2977 ; GCN-NEXT: s_mov_b32 s3, s5
2978 ; GCN-NEXT: s_mov_b32 s4, s6
2979 ; GCN-NEXT: s_mov_b32 s5, s7
2980 ; GCN-NEXT: s_mov_b32 s6, s8
2981 ; GCN-NEXT: s_mov_b32 s7, s9
2982 ; GCN-NEXT: s_mov_b32 s8, s10
2983 ; GCN-NEXT: s_mov_b32 s9, s11
2984 ; GCN-NEXT: s_mov_b32 s10, s12
2985 ; GCN-NEXT: s_mov_b32 s11, s13
2986 ; GCN-NEXT: s_mov_b32 s12, s14
2987 ; GCN-NEXT: s_mov_b32 s13, s15
2988 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1]
2989 ; GCN-NEXT: ; return to shader part epilog
2991 ; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s:
2992 ; GFX10PLUS: ; %bb.0: ; %entry
2993 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2994 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2995 ; GFX10PLUS-NEXT: s_mov_b32 m0, s16
2996 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2997 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2998 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2999 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3000 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3001 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3002 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3003 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3004 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3005 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3006 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3007 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3008 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1]
3009 ; GFX10PLUS-NEXT: ; return to shader part epilog
3011 %ext = extractelement <7 x double> %vec, i32 %sel
3015 define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) {
3016 ; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
3017 ; GPRIDX: .amd_kernel_code_t
3018 ; GPRIDX-NEXT: amd_code_version_major = 1
3019 ; GPRIDX-NEXT: amd_code_version_minor = 2
3020 ; GPRIDX-NEXT: amd_machine_kind = 1
3021 ; GPRIDX-NEXT: amd_machine_version_major = 9
3022 ; GPRIDX-NEXT: amd_machine_version_minor = 0
3023 ; GPRIDX-NEXT: amd_machine_version_stepping = 0
3024 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
3025 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
3026 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
3027 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
3028 ; GPRIDX-NEXT: priority = 0
3029 ; GPRIDX-NEXT: float_mode = 240
3030 ; GPRIDX-NEXT: priv = 0
3031 ; GPRIDX-NEXT: enable_dx10_clamp = 1
3032 ; GPRIDX-NEXT: debug_mode = 0
3033 ; GPRIDX-NEXT: enable_ieee_mode = 1
3034 ; GPRIDX-NEXT: enable_wgp_mode = 0
3035 ; GPRIDX-NEXT: enable_mem_ordered = 0
3036 ; GPRIDX-NEXT: enable_fwd_progress = 0
3037 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3038 ; GPRIDX-NEXT: user_sgpr_count = 6
3039 ; GPRIDX-NEXT: enable_trap_handler = 0
3040 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1
3041 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0
3042 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0
3043 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0
3044 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0
3045 ; GPRIDX-NEXT: enable_exception_msb = 0
3046 ; GPRIDX-NEXT: granulated_lds_size = 0
3047 ; GPRIDX-NEXT: enable_exception = 0
3048 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1
3049 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0
3050 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0
3051 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3052 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0
3053 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0
3054 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0
3055 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3056 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3057 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3058 ; GPRIDX-NEXT: enable_wavefront_size32 = 0
3059 ; GPRIDX-NEXT: enable_ordered_append_gds = 0
3060 ; GPRIDX-NEXT: private_element_size = 1
3061 ; GPRIDX-NEXT: is_ptr64 = 1
3062 ; GPRIDX-NEXT: is_dynamic_callstack = 0
3063 ; GPRIDX-NEXT: is_debug_enabled = 0
3064 ; GPRIDX-NEXT: is_xnack_enabled = 1
3065 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
3066 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
3067 ; GPRIDX-NEXT: gds_segment_byte_size = 0
3068 ; GPRIDX-NEXT: kernarg_segment_byte_size = 12
3069 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0
3070 ; GPRIDX-NEXT: wavefront_sgpr_count = 13
3071 ; GPRIDX-NEXT: workitem_vgpr_count = 3
3072 ; GPRIDX-NEXT: reserved_vgpr_first = 0
3073 ; GPRIDX-NEXT: reserved_vgpr_count = 0
3074 ; GPRIDX-NEXT: reserved_sgpr_first = 0
3075 ; GPRIDX-NEXT: reserved_sgpr_count = 0
3076 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3077 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0
3078 ; GPRIDX-NEXT: kernarg_segment_alignment = 4
3079 ; GPRIDX-NEXT: group_segment_alignment = 4
3080 ; GPRIDX-NEXT: private_segment_alignment = 4
3081 ; GPRIDX-NEXT: wavefront_size = 6
3082 ; GPRIDX-NEXT: call_convention = -1
3083 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0
3084 ; GPRIDX-NEXT: .end_amd_kernel_code_t
3085 ; GPRIDX-NEXT: ; %bb.0: ; %entry
3086 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3087 ; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8
3088 ; GPRIDX-NEXT: s_mov_b32 s4, 0
3089 ; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000
3090 ; GPRIDX-NEXT: s_mov_b32 s2, 0
3091 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000
3092 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
3093 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1
3094 ; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
3095 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2
3096 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
3097 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3
3098 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5]
3099 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4
3100 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3101 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
3102 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
3103 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0
3104 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
3105 ; GPRIDX-NEXT: s_endpgm
3107 ; MOVREL-LABEL: dyn_extract_v5f64_s_s:
3108 ; MOVREL: .amd_kernel_code_t
3109 ; MOVREL-NEXT: amd_code_version_major = 1
3110 ; MOVREL-NEXT: amd_code_version_minor = 2
3111 ; MOVREL-NEXT: amd_machine_kind = 1
3112 ; MOVREL-NEXT: amd_machine_version_major = 8
3113 ; MOVREL-NEXT: amd_machine_version_minor = 0
3114 ; MOVREL-NEXT: amd_machine_version_stepping = 3
3115 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256
3116 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0
3117 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0
3118 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1
3119 ; MOVREL-NEXT: priority = 0
3120 ; MOVREL-NEXT: float_mode = 240
3121 ; MOVREL-NEXT: priv = 0
3122 ; MOVREL-NEXT: enable_dx10_clamp = 1
3123 ; MOVREL-NEXT: debug_mode = 0
3124 ; MOVREL-NEXT: enable_ieee_mode = 1
3125 ; MOVREL-NEXT: enable_wgp_mode = 0
3126 ; MOVREL-NEXT: enable_mem_ordered = 0
3127 ; MOVREL-NEXT: enable_fwd_progress = 0
3128 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3129 ; MOVREL-NEXT: user_sgpr_count = 6
3130 ; MOVREL-NEXT: enable_trap_handler = 0
3131 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1
3132 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0
3133 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0
3134 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0
3135 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0
3136 ; MOVREL-NEXT: enable_exception_msb = 0
3137 ; MOVREL-NEXT: granulated_lds_size = 0
3138 ; MOVREL-NEXT: enable_exception = 0
3139 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1
3140 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0
3141 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0
3142 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3143 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0
3144 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0
3145 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0
3146 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3147 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3148 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3149 ; MOVREL-NEXT: enable_wavefront_size32 = 0
3150 ; MOVREL-NEXT: enable_ordered_append_gds = 0
3151 ; MOVREL-NEXT: private_element_size = 1
3152 ; MOVREL-NEXT: is_ptr64 = 1
3153 ; MOVREL-NEXT: is_dynamic_callstack = 0
3154 ; MOVREL-NEXT: is_debug_enabled = 0
3155 ; MOVREL-NEXT: is_xnack_enabled = 0
3156 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0
3157 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
3158 ; MOVREL-NEXT: gds_segment_byte_size = 0
3159 ; MOVREL-NEXT: kernarg_segment_byte_size = 12
3160 ; MOVREL-NEXT: workgroup_fbarrier_count = 0
3161 ; MOVREL-NEXT: wavefront_sgpr_count = 9
3162 ; MOVREL-NEXT: workitem_vgpr_count = 4
3163 ; MOVREL-NEXT: reserved_vgpr_first = 0
3164 ; MOVREL-NEXT: reserved_vgpr_count = 0
3165 ; MOVREL-NEXT: reserved_sgpr_first = 0
3166 ; MOVREL-NEXT: reserved_sgpr_count = 0
3167 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3168 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0
3169 ; MOVREL-NEXT: kernarg_segment_alignment = 4
3170 ; MOVREL-NEXT: group_segment_alignment = 4
3171 ; MOVREL-NEXT: private_segment_alignment = 4
3172 ; MOVREL-NEXT: wavefront_size = 6
3173 ; MOVREL-NEXT: call_convention = -1
3174 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0
3175 ; MOVREL-NEXT: .end_amd_kernel_code_t
3176 ; MOVREL-NEXT: ; %bb.0: ; %entry
3177 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3178 ; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8
3179 ; MOVREL-NEXT: s_mov_b32 s4, 0
3180 ; MOVREL-NEXT: s_mov_b32 s5, 0x40080000
3181 ; MOVREL-NEXT: s_mov_b32 s2, 0
3182 ; MOVREL-NEXT: s_mov_b32 s3, 0x40140000
3183 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
3184 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1
3185 ; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
3186 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 2
3187 ; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
3188 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 3
3189 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5]
3190 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 4
3191 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3192 ; MOVREL-NEXT: v_mov_b32_e32 v0, s2
3193 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1
3194 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3
3195 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0
3196 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
3197 ; MOVREL-NEXT: s_endpgm
3199 ; GFX10-LABEL: dyn_extract_v5f64_s_s:
3200 ; GFX10: .amd_kernel_code_t
3201 ; GFX10-NEXT: amd_code_version_major = 1
3202 ; GFX10-NEXT: amd_code_version_minor = 2
3203 ; GFX10-NEXT: amd_machine_kind = 1
3204 ; GFX10-NEXT: amd_machine_version_major = 10
3205 ; GFX10-NEXT: amd_machine_version_minor = 1
3206 ; GFX10-NEXT: amd_machine_version_stepping = 0
3207 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256
3208 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
3209 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0
3210 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
3211 ; GFX10-NEXT: priority = 0
3212 ; GFX10-NEXT: float_mode = 240
3213 ; GFX10-NEXT: priv = 0
3214 ; GFX10-NEXT: enable_dx10_clamp = 1
3215 ; GFX10-NEXT: debug_mode = 0
3216 ; GFX10-NEXT: enable_ieee_mode = 1
3217 ; GFX10-NEXT: enable_wgp_mode = 1
3218 ; GFX10-NEXT: enable_mem_ordered = 1
3219 ; GFX10-NEXT: enable_fwd_progress = 0
3220 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3221 ; GFX10-NEXT: user_sgpr_count = 6
3222 ; GFX10-NEXT: enable_trap_handler = 0
3223 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1
3224 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0
3225 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0
3226 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0
3227 ; GFX10-NEXT: enable_vgpr_workitem_id = 0
3228 ; GFX10-NEXT: enable_exception_msb = 0
3229 ; GFX10-NEXT: granulated_lds_size = 0
3230 ; GFX10-NEXT: enable_exception = 0
3231 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1
3232 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0
3233 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0
3234 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3235 ; GFX10-NEXT: enable_sgpr_dispatch_id = 0
3236 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0
3237 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0
3238 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3239 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3240 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3241 ; GFX10-NEXT: enable_wavefront_size32 = 1
3242 ; GFX10-NEXT: enable_ordered_append_gds = 0
3243 ; GFX10-NEXT: private_element_size = 1
3244 ; GFX10-NEXT: is_ptr64 = 1
3245 ; GFX10-NEXT: is_dynamic_callstack = 0
3246 ; GFX10-NEXT: is_debug_enabled = 0
3247 ; GFX10-NEXT: is_xnack_enabled = 1
3248 ; GFX10-NEXT: workitem_private_segment_byte_size = 0
3249 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0
3250 ; GFX10-NEXT: gds_segment_byte_size = 0
3251 ; GFX10-NEXT: kernarg_segment_byte_size = 12
3252 ; GFX10-NEXT: workgroup_fbarrier_count = 0
3253 ; GFX10-NEXT: wavefront_sgpr_count = 7
3254 ; GFX10-NEXT: workitem_vgpr_count = 3
3255 ; GFX10-NEXT: reserved_vgpr_first = 0
3256 ; GFX10-NEXT: reserved_vgpr_count = 0
3257 ; GFX10-NEXT: reserved_sgpr_first = 0
3258 ; GFX10-NEXT: reserved_sgpr_count = 0
3259 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3260 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0
3261 ; GFX10-NEXT: kernarg_segment_alignment = 4
3262 ; GFX10-NEXT: group_segment_alignment = 4
3263 ; GFX10-NEXT: private_segment_alignment = 4
3264 ; GFX10-NEXT: wavefront_size = 5
3265 ; GFX10-NEXT: call_convention = -1
3266 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0
3267 ; GFX10-NEXT: .end_amd_kernel_code_t
3268 ; GFX10-NEXT: ; %bb.0: ; %entry
3269 ; GFX10-NEXT: s_clause 0x1
3270 ; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8
3271 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3272 ; GFX10-NEXT: s_mov_b32 s2, 0
3273 ; GFX10-NEXT: s_mov_b32 s3, 0x40080000
3274 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
3275 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3276 ; GFX10-NEXT: s_cmp_eq_u32 s6, 1
3277 ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
3278 ; GFX10-NEXT: s_cmp_eq_u32 s6, 2
3279 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3280 ; GFX10-NEXT: s_cmp_eq_u32 s6, 3
3281 ; GFX10-NEXT: s_mov_b32 s4, 0
3282 ; GFX10-NEXT: s_mov_b32 s5, 0x40140000
3283 ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
3284 ; GFX10-NEXT: s_cmp_eq_u32 s6, 4
3285 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
3286 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
3287 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
3288 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
3289 ; GFX10-NEXT: s_endpgm
3291 ; GFX11-LABEL: dyn_extract_v5f64_s_s:
3292 ; GFX11: .amd_kernel_code_t
3293 ; GFX11-NEXT: amd_code_version_major = 1
3294 ; GFX11-NEXT: amd_code_version_minor = 2
3295 ; GFX11-NEXT: amd_machine_kind = 1
3296 ; GFX11-NEXT: amd_machine_version_major = 11
3297 ; GFX11-NEXT: amd_machine_version_minor = 0
3298 ; GFX11-NEXT: amd_machine_version_stepping = 0
3299 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256
3300 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
3301 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0
3302 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
3303 ; GFX11-NEXT: priority = 0
3304 ; GFX11-NEXT: float_mode = 240
3305 ; GFX11-NEXT: priv = 0
3306 ; GFX11-NEXT: enable_dx10_clamp = 1
3307 ; GFX11-NEXT: debug_mode = 0
3308 ; GFX11-NEXT: enable_ieee_mode = 1
3309 ; GFX11-NEXT: enable_wgp_mode = 1
3310 ; GFX11-NEXT: enable_mem_ordered = 1
3311 ; GFX11-NEXT: enable_fwd_progress = 0
3312 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3313 ; GFX11-NEXT: user_sgpr_count = 15
3314 ; GFX11-NEXT: enable_trap_handler = 0
3315 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1
3316 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0
3317 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0
3318 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0
3319 ; GFX11-NEXT: enable_vgpr_workitem_id = 0
3320 ; GFX11-NEXT: enable_exception_msb = 0
3321 ; GFX11-NEXT: granulated_lds_size = 0
3322 ; GFX11-NEXT: enable_exception = 0
3323 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0
3324 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0
3325 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0
3326 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3327 ; GFX11-NEXT: enable_sgpr_dispatch_id = 0
3328 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0
3329 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0
3330 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3331 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3332 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3333 ; GFX11-NEXT: enable_wavefront_size32 = 1
3334 ; GFX11-NEXT: enable_ordered_append_gds = 0
3335 ; GFX11-NEXT: private_element_size = 1
3336 ; GFX11-NEXT: is_ptr64 = 1
3337 ; GFX11-NEXT: is_dynamic_callstack = 0
3338 ; GFX11-NEXT: is_debug_enabled = 0
3339 ; GFX11-NEXT: is_xnack_enabled = 0
3340 ; GFX11-NEXT: workitem_private_segment_byte_size = 0
3341 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0
3342 ; GFX11-NEXT: gds_segment_byte_size = 0
3343 ; GFX11-NEXT: kernarg_segment_byte_size = 12
3344 ; GFX11-NEXT: workgroup_fbarrier_count = 0
3345 ; GFX11-NEXT: wavefront_sgpr_count = 7
3346 ; GFX11-NEXT: workitem_vgpr_count = 3
3347 ; GFX11-NEXT: reserved_vgpr_first = 0
3348 ; GFX11-NEXT: reserved_vgpr_count = 0
3349 ; GFX11-NEXT: reserved_sgpr_first = 0
3350 ; GFX11-NEXT: reserved_sgpr_count = 0
3351 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3352 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0
3353 ; GFX11-NEXT: kernarg_segment_alignment = 4
3354 ; GFX11-NEXT: group_segment_alignment = 4
3355 ; GFX11-NEXT: private_segment_alignment = 4
3356 ; GFX11-NEXT: wavefront_size = 5
3357 ; GFX11-NEXT: call_convention = -1
3358 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0
3359 ; GFX11-NEXT: .end_amd_kernel_code_t
3360 ; GFX11-NEXT: ; %bb.0: ; %entry
3361 ; GFX11-NEXT: s_clause 0x1
3362 ; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x8
3363 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
3364 ; GFX11-NEXT: s_mov_b32 s2, 0
3365 ; GFX11-NEXT: s_mov_b32 s3, 0x40080000
3366 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
3367 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3368 ; GFX11-NEXT: s_cmp_eq_u32 s6, 1
3369 ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
3370 ; GFX11-NEXT: s_cmp_eq_u32 s6, 2
3371 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3372 ; GFX11-NEXT: s_cmp_eq_u32 s6, 3
3373 ; GFX11-NEXT: s_mov_b32 s4, 0
3374 ; GFX11-NEXT: s_mov_b32 s5, 0x40140000
3375 ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
3376 ; GFX11-NEXT: s_cmp_eq_u32 s6, 4
3377 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
3378 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3379 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
3380 ; GFX11-NEXT: s_nop 0
3381 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3382 ; GFX11-NEXT: s_endpgm
3384 %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
3385 store double %ext, ptr addrspace(1) %out
3389 define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
3390 ; GCN-LABEL: dyn_extract_v15f32_const_s_v:
3391 ; GCN: ; %bb.0: ; %entry
3392 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3393 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
3394 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
3395 ; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
3396 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
3397 ; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
3398 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
3399 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000
3400 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
3401 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
3402 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000
3403 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
3404 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
3405 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000
3406 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
3407 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
3408 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000
3409 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
3410 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
3411 ; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000
3412 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
3413 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0
3414 ; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000
3415 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
3416 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0
3417 ; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000
3418 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
3419 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0
3420 ; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000
3421 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
3422 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0
3423 ; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000
3424 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
3425 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0
3426 ; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000
3427 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
3428 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0
3429 ; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000
3430 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
3431 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0
3432 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
3433 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0
3434 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
3435 ; GCN-NEXT: s_setpc_b64 s[30:31]
3437 ; GFX10-LABEL: dyn_extract_v15f32_const_s_v:
3438 ; GFX10: ; %bb.0: ; %entry
3439 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3440 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3441 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3442 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3443 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3444 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3445 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3446 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3447 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3448 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3449 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3450 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3451 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3452 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3453 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3454 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3455 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3456 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3457 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3458 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3459 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3460 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3461 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3462 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3463 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3464 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3465 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3466 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3467 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3468 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
3469 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo
3470 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3472 ; GFX11-LABEL: dyn_extract_v15f32_const_s_v:
3473 ; GFX11: ; %bb.0: ; %entry
3474 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3475 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3476 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3477 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3478 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3479 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3480 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3481 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3482 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3483 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3484 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3485 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3486 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3487 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3488 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3489 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3490 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3491 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3492 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3493 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3494 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3495 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3496 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3497 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3498 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3499 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3500 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3501 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3502 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3503 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
3504 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
3505 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3507 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3511 define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) {
3512 ; GCN-LABEL: dyn_extract_v15f32_const_s_s:
3513 ; GCN: ; %bb.0: ; %entry
3514 ; GCN-NEXT: s_mov_b32 s4, 1.0
3515 ; GCN-NEXT: s_mov_b32 m0, s2
3516 ; GCN-NEXT: s_mov_b32 s18, 0x41700000
3517 ; GCN-NEXT: s_mov_b32 s17, 0x41600000
3518 ; GCN-NEXT: s_mov_b32 s16, 0x41500000
3519 ; GCN-NEXT: s_mov_b32 s15, 0x41400000
3520 ; GCN-NEXT: s_mov_b32 s14, 0x41300000
3521 ; GCN-NEXT: s_mov_b32 s13, 0x41200000
3522 ; GCN-NEXT: s_mov_b32 s12, 0x41100000
3523 ; GCN-NEXT: s_mov_b32 s11, 0x41000000
3524 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000
3525 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000
3526 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000
3527 ; GCN-NEXT: s_mov_b32 s7, 4.0
3528 ; GCN-NEXT: s_mov_b32 s6, 0x40400000
3529 ; GCN-NEXT: s_mov_b32 s5, 2.0
3530 ; GCN-NEXT: s_movrels_b32 s0, s4
3531 ; GCN-NEXT: v_mov_b32_e32 v0, s0
3532 ; GCN-NEXT: ; return to shader part epilog
3534 ; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s:
3535 ; GFX10PLUS: ; %bb.0: ; %entry
3536 ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0
3537 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
3538 ; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000
3539 ; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000
3540 ; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000
3541 ; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000
3542 ; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000
3543 ; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000
3544 ; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000
3545 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000
3546 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000
3547 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000
3548 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000
3549 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0
3550 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000
3551 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0
3552 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4
3553 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
3554 ; GFX10PLUS-NEXT: ; return to shader part epilog
3556 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3560 define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) {
3561 ; GCN-LABEL: dyn_extract_v15f32_s_v:
3562 ; GCN: ; %bb.0: ; %entry
3563 ; GCN-NEXT: v_mov_b32_e32 v1, s2
3564 ; GCN-NEXT: v_mov_b32_e32 v2, s3
3565 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
3566 ; GCN-NEXT: v_mov_b32_e32 v3, s4
3567 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
3568 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
3569 ; GCN-NEXT: v_mov_b32_e32 v4, s5
3570 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
3571 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
3572 ; GCN-NEXT: v_mov_b32_e32 v5, s6
3573 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
3574 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
3575 ; GCN-NEXT: v_mov_b32_e32 v6, s7
3576 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
3577 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
3578 ; GCN-NEXT: v_mov_b32_e32 v7, s8
3579 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
3580 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
3581 ; GCN-NEXT: v_mov_b32_e32 v8, s9
3582 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
3583 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
3584 ; GCN-NEXT: v_mov_b32_e32 v9, s10
3585 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
3586 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0
3587 ; GCN-NEXT: v_mov_b32_e32 v10, s11
3588 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
3589 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0
3590 ; GCN-NEXT: v_mov_b32_e32 v11, s12
3591 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
3592 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0
3593 ; GCN-NEXT: v_mov_b32_e32 v12, s13
3594 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
3595 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0
3596 ; GCN-NEXT: v_mov_b32_e32 v13, s14
3597 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
3598 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0
3599 ; GCN-NEXT: v_mov_b32_e32 v14, s15
3600 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
3601 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0
3602 ; GCN-NEXT: v_mov_b32_e32 v15, s16
3603 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
3604 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0
3605 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
3606 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0
3607 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
3608 ; GCN-NEXT: ; return to shader part epilog
3610 ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v:
3611 ; GFX10PLUS: ; %bb.0: ; %entry
3612 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
3613 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3614 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
3615 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3616 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
3617 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3618 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
3619 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3620 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
3621 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3622 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo
3623 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3624 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
3625 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3626 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo
3627 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3628 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
3629 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3630 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo
3631 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3632 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
3633 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3634 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo
3635 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3636 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo
3637 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3638 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo
3639 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3640 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo
3641 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
3642 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
3643 ; GFX10PLUS-NEXT: ; return to shader part epilog
3645 %ext = extractelement <15 x float> %vec, i32 %sel
3649 define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) {
3650 ; GCN-LABEL: dyn_extract_v15f32_v_v:
3651 ; GCN: ; %bb.0: ; %entry
3652 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3653 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15
3654 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3655 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15
3656 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3657 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15
3658 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
3659 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15
3660 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
3661 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15
3662 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
3663 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15
3664 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
3665 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15
3666 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
3667 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15
3668 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
3669 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15
3670 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
3671 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15
3672 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
3673 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15
3674 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3675 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15
3676 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3677 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15
3678 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc
3679 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15
3680 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
3681 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15
3682 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
3683 ; GCN-NEXT: s_setpc_b64 s[30:31]
3685 ; GFX10-LABEL: dyn_extract_v15f32_v_v:
3686 ; GFX10: ; %bb.0: ; %entry
3687 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3688 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3689 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3690 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3691 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3692 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
3693 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3694 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
3695 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3696 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
3697 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3698 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
3699 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3700 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
3701 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3702 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
3703 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3704 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
3705 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3706 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
3707 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3708 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
3709 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3710 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
3711 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3712 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
3713 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3714 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
3715 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3716 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
3717 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo
3718 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3720 ; GFX11-LABEL: dyn_extract_v15f32_v_v:
3721 ; GFX11: ; %bb.0: ; %entry
3722 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3723 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3724 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3725 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3726 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3727 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
3728 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3729 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
3730 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3731 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
3732 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3733 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
3734 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3735 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
3736 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3737 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
3738 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3739 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
3740 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3741 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
3742 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3743 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
3744 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3745 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
3746 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3747 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
3748 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3749 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
3750 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3751 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
3752 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo
3753 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3755 %ext = extractelement <15 x float> %vec, i32 %sel
3759 define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) {
3760 ; GPRIDX-LABEL: dyn_extract_v15f32_v_s:
3761 ; GPRIDX: ; %bb.0: ; %entry
3762 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
3763 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
3764 ; GPRIDX-NEXT: s_set_gpr_idx_off
3765 ; GPRIDX-NEXT: ; return to shader part epilog
3767 ; MOVREL-LABEL: dyn_extract_v15f32_v_s:
3768 ; MOVREL: ; %bb.0: ; %entry
3769 ; MOVREL-NEXT: s_mov_b32 m0, s2
3770 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
3771 ; MOVREL-NEXT: ; return to shader part epilog
3773 ; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s:
3774 ; GFX10PLUS: ; %bb.0: ; %entry
3775 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
3776 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
3777 ; GFX10PLUS-NEXT: ; return to shader part epilog
3779 %ext = extractelement <15 x float> %vec, i32 %sel
3783 define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) {
3784 ; GCN-LABEL: dyn_extract_v15f32_s_s:
3785 ; GCN: ; %bb.0: ; %entry
3786 ; GCN-NEXT: s_mov_b32 s0, s2
3787 ; GCN-NEXT: s_mov_b32 m0, s17
3788 ; GCN-NEXT: s_mov_b32 s1, s3
3789 ; GCN-NEXT: s_mov_b32 s2, s4
3790 ; GCN-NEXT: s_mov_b32 s3, s5
3791 ; GCN-NEXT: s_mov_b32 s4, s6
3792 ; GCN-NEXT: s_mov_b32 s5, s7
3793 ; GCN-NEXT: s_mov_b32 s6, s8
3794 ; GCN-NEXT: s_mov_b32 s7, s9
3795 ; GCN-NEXT: s_mov_b32 s8, s10
3796 ; GCN-NEXT: s_mov_b32 s9, s11
3797 ; GCN-NEXT: s_mov_b32 s10, s12
3798 ; GCN-NEXT: s_mov_b32 s11, s13
3799 ; GCN-NEXT: s_mov_b32 s12, s14
3800 ; GCN-NEXT: s_mov_b32 s13, s15
3801 ; GCN-NEXT: s_mov_b32 s14, s16
3802 ; GCN-NEXT: s_movrels_b32 s0, s0
3803 ; GCN-NEXT: v_mov_b32_e32 v0, s0
3804 ; GCN-NEXT: ; return to shader part epilog
3806 ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s:
3807 ; GFX10PLUS: ; %bb.0: ; %entry
3808 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3809 ; GFX10PLUS-NEXT: s_mov_b32 m0, s17
3810 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3811 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3812 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3813 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3814 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3815 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3816 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3817 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3818 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3819 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3820 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3821 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3822 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3823 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
3824 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s0
3825 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
3826 ; GFX10PLUS-NEXT: ; return to shader part epilog
3828 %ext = extractelement <15 x float> %vec, i32 %sel
3832 define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) {
3833 ; GCN-LABEL: dyn_extract_v15f32_s_s_offset3:
3834 ; GCN: ; %bb.0: ; %entry
3835 ; GCN-NEXT: s_mov_b32 s0, s2
3836 ; GCN-NEXT: s_mov_b32 s1, s3
3837 ; GCN-NEXT: s_mov_b32 s3, s5
3838 ; GCN-NEXT: s_mov_b32 m0, s17
3839 ; GCN-NEXT: s_mov_b32 s2, s4
3840 ; GCN-NEXT: s_mov_b32 s4, s6
3841 ; GCN-NEXT: s_mov_b32 s5, s7
3842 ; GCN-NEXT: s_mov_b32 s6, s8
3843 ; GCN-NEXT: s_mov_b32 s7, s9
3844 ; GCN-NEXT: s_mov_b32 s8, s10
3845 ; GCN-NEXT: s_mov_b32 s9, s11
3846 ; GCN-NEXT: s_mov_b32 s10, s12
3847 ; GCN-NEXT: s_mov_b32 s11, s13
3848 ; GCN-NEXT: s_mov_b32 s12, s14
3849 ; GCN-NEXT: s_mov_b32 s13, s15
3850 ; GCN-NEXT: s_mov_b32 s14, s16
3851 ; GCN-NEXT: s_movrels_b32 s0, s3
3852 ; GCN-NEXT: v_mov_b32_e32 v0, s0
3853 ; GCN-NEXT: ; return to shader part epilog
3855 ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3:
3856 ; GFX10PLUS: ; %bb.0: ; %entry
3857 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3858 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3859 ; GFX10PLUS-NEXT: s_mov_b32 m0, s17
3860 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3861 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3862 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3863 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3864 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3865 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3866 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3867 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3868 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3869 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3870 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3871 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3872 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
3873 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s3
3874 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
3875 ; GFX10PLUS-NEXT: ; return to shader part epilog
3877 %add = add i32 %sel, 3
3878 %ext = extractelement <15 x float> %vec, i32 %add
3882 define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) {
3883 ; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3:
3884 ; GPRIDX: ; %bb.0: ; %entry
3885 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3886 ; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15
3887 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15
3888 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3889 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15
3890 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3891 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15
3892 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
3893 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15
3894 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
3895 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15
3896 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
3897 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15
3898 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
3899 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15
3900 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
3901 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15
3902 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
3903 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15
3904 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
3905 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15
3906 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
3907 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15
3908 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3909 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15
3910 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3911 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15
3912 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc
3913 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15
3914 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
3915 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15
3916 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
3917 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
3919 ; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3:
3920 ; MOVREL: ; %bb.0: ; %entry
3921 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3922 ; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15
3923 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15
3924 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3925 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15
3926 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3927 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15
3928 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
3929 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15
3930 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
3931 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15
3932 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
3933 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15
3934 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
3935 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15
3936 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
3937 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15
3938 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
3939 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15
3940 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
3941 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15
3942 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
3943 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15
3944 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3945 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15
3946 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3947 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15
3948 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc
3949 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15
3950 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
3951 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15
3952 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
3953 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
3955 ; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3:
3956 ; GFX10: ; %bb.0: ; %entry
3957 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3958 ; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15
3959 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3960 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3961 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3962 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3963 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
3964 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3965 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
3966 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3967 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
3968 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3969 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
3970 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3971 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
3972 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3973 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
3974 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3975 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
3976 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3977 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
3978 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3979 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
3980 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3981 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
3982 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3983 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
3984 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3985 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
3986 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3987 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
3988 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo
3989 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3991 ; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3:
3992 ; GFX11: ; %bb.0: ; %entry
3993 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3994 ; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15
3995 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3996 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3997 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3998 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3999 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
4000 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
4001 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
4002 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
4003 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
4004 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
4005 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
4006 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
4007 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
4008 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
4009 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
4010 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
4011 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
4012 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
4013 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
4014 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
4015 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
4016 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
4017 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
4018 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
4019 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
4020 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
4021 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
4022 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
4023 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
4024 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo
4025 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4027 %add = add i32 %sel, 3
4028 %ext = extractelement <15 x float> %vec, i32 %add
4032 define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) {
4033 ; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
4034 ; GPRIDX: .amd_kernel_code_t
4035 ; GPRIDX-NEXT: amd_code_version_major = 1
4036 ; GPRIDX-NEXT: amd_code_version_minor = 2
4037 ; GPRIDX-NEXT: amd_machine_kind = 1
4038 ; GPRIDX-NEXT: amd_machine_version_major = 9
4039 ; GPRIDX-NEXT: amd_machine_version_minor = 0
4040 ; GPRIDX-NEXT: amd_machine_version_stepping = 0
4041 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
4042 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
4043 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
4044 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
4045 ; GPRIDX-NEXT: priority = 0
4046 ; GPRIDX-NEXT: float_mode = 240
4047 ; GPRIDX-NEXT: priv = 0
4048 ; GPRIDX-NEXT: enable_dx10_clamp = 1
4049 ; GPRIDX-NEXT: debug_mode = 0
4050 ; GPRIDX-NEXT: enable_ieee_mode = 1
4051 ; GPRIDX-NEXT: enable_wgp_mode = 0
4052 ; GPRIDX-NEXT: enable_mem_ordered = 0
4053 ; GPRIDX-NEXT: enable_fwd_progress = 0
4054 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4055 ; GPRIDX-NEXT: user_sgpr_count = 6
4056 ; GPRIDX-NEXT: enable_trap_handler = 0
4057 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1
4058 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0
4059 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0
4060 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0
4061 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0
4062 ; GPRIDX-NEXT: enable_exception_msb = 0
4063 ; GPRIDX-NEXT: granulated_lds_size = 0
4064 ; GPRIDX-NEXT: enable_exception = 0
4065 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1
4066 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0
4067 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0
4068 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4069 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0
4070 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0
4071 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0
4072 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4073 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4074 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4075 ; GPRIDX-NEXT: enable_wavefront_size32 = 0
4076 ; GPRIDX-NEXT: enable_ordered_append_gds = 0
4077 ; GPRIDX-NEXT: private_element_size = 1
4078 ; GPRIDX-NEXT: is_ptr64 = 1
4079 ; GPRIDX-NEXT: is_dynamic_callstack = 0
4080 ; GPRIDX-NEXT: is_debug_enabled = 0
4081 ; GPRIDX-NEXT: is_xnack_enabled = 1
4082 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
4083 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
4084 ; GPRIDX-NEXT: gds_segment_byte_size = 0
4085 ; GPRIDX-NEXT: kernarg_segment_byte_size = 12
4086 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0
4087 ; GPRIDX-NEXT: wavefront_sgpr_count = 10
4088 ; GPRIDX-NEXT: workitem_vgpr_count = 2
4089 ; GPRIDX-NEXT: reserved_vgpr_first = 0
4090 ; GPRIDX-NEXT: reserved_vgpr_count = 0
4091 ; GPRIDX-NEXT: reserved_sgpr_first = 0
4092 ; GPRIDX-NEXT: reserved_sgpr_count = 0
4093 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4094 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0
4095 ; GPRIDX-NEXT: kernarg_segment_alignment = 4
4096 ; GPRIDX-NEXT: group_segment_alignment = 4
4097 ; GPRIDX-NEXT: private_segment_alignment = 4
4098 ; GPRIDX-NEXT: wavefront_size = 6
4099 ; GPRIDX-NEXT: call_convention = -1
4100 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0
4101 ; GPRIDX-NEXT: .end_amd_kernel_code_t
4102 ; GPRIDX-NEXT: ; %bb.0: ; %entry
4103 ; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8
4104 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4105 ; GPRIDX-NEXT: v_mov_b32_e32 v1, 0
4106 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
4107 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1
4108 ; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0
4109 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2
4110 ; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3
4111 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3
4112 ; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3
4113 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
4114 ; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1]
4115 ; GPRIDX-NEXT: s_endpgm
4117 ; MOVREL-LABEL: dyn_extract_v4f32_s_s_s:
4118 ; MOVREL: .amd_kernel_code_t
4119 ; MOVREL-NEXT: amd_code_version_major = 1
4120 ; MOVREL-NEXT: amd_code_version_minor = 2
4121 ; MOVREL-NEXT: amd_machine_kind = 1
4122 ; MOVREL-NEXT: amd_machine_version_major = 8
4123 ; MOVREL-NEXT: amd_machine_version_minor = 0
4124 ; MOVREL-NEXT: amd_machine_version_stepping = 3
4125 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256
4126 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0
4127 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0
4128 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0
4129 ; MOVREL-NEXT: priority = 0
4130 ; MOVREL-NEXT: float_mode = 240
4131 ; MOVREL-NEXT: priv = 0
4132 ; MOVREL-NEXT: enable_dx10_clamp = 1
4133 ; MOVREL-NEXT: debug_mode = 0
4134 ; MOVREL-NEXT: enable_ieee_mode = 1
4135 ; MOVREL-NEXT: enable_wgp_mode = 0
4136 ; MOVREL-NEXT: enable_mem_ordered = 0
4137 ; MOVREL-NEXT: enable_fwd_progress = 0
4138 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4139 ; MOVREL-NEXT: user_sgpr_count = 6
4140 ; MOVREL-NEXT: enable_trap_handler = 0
4141 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1
4142 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0
4143 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0
4144 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0
4145 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0
4146 ; MOVREL-NEXT: enable_exception_msb = 0
4147 ; MOVREL-NEXT: granulated_lds_size = 0
4148 ; MOVREL-NEXT: enable_exception = 0
4149 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1
4150 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0
4151 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0
4152 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4153 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0
4154 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0
4155 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0
4156 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4157 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4158 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4159 ; MOVREL-NEXT: enable_wavefront_size32 = 0
4160 ; MOVREL-NEXT: enable_ordered_append_gds = 0
4161 ; MOVREL-NEXT: private_element_size = 1
4162 ; MOVREL-NEXT: is_ptr64 = 1
4163 ; MOVREL-NEXT: is_dynamic_callstack = 0
4164 ; MOVREL-NEXT: is_debug_enabled = 0
4165 ; MOVREL-NEXT: is_xnack_enabled = 0
4166 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0
4167 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
4168 ; MOVREL-NEXT: gds_segment_byte_size = 0
4169 ; MOVREL-NEXT: kernarg_segment_byte_size = 12
4170 ; MOVREL-NEXT: workgroup_fbarrier_count = 0
4171 ; MOVREL-NEXT: wavefront_sgpr_count = 6
4172 ; MOVREL-NEXT: workitem_vgpr_count = 3
4173 ; MOVREL-NEXT: reserved_vgpr_first = 0
4174 ; MOVREL-NEXT: reserved_vgpr_count = 0
4175 ; MOVREL-NEXT: reserved_sgpr_first = 0
4176 ; MOVREL-NEXT: reserved_sgpr_count = 0
4177 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4178 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0
4179 ; MOVREL-NEXT: kernarg_segment_alignment = 4
4180 ; MOVREL-NEXT: group_segment_alignment = 4
4181 ; MOVREL-NEXT: private_segment_alignment = 4
4182 ; MOVREL-NEXT: wavefront_size = 6
4183 ; MOVREL-NEXT: call_convention = -1
4184 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0
4185 ; MOVREL-NEXT: .end_amd_kernel_code_t
4186 ; MOVREL-NEXT: ; %bb.0: ; %entry
4187 ; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8
4188 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4189 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
4190 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 1
4191 ; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0
4192 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 2
4193 ; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3
4194 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 3
4195 ; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3
4196 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
4197 ; MOVREL-NEXT: v_mov_b32_e32 v2, s2
4198 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
4199 ; MOVREL-NEXT: flat_store_dword v[0:1], v2
4200 ; MOVREL-NEXT: s_endpgm
4202 ; GFX10-LABEL: dyn_extract_v4f32_s_s_s:
4203 ; GFX10: .amd_kernel_code_t
4204 ; GFX10-NEXT: amd_code_version_major = 1
4205 ; GFX10-NEXT: amd_code_version_minor = 2
4206 ; GFX10-NEXT: amd_machine_kind = 1
4207 ; GFX10-NEXT: amd_machine_version_major = 10
4208 ; GFX10-NEXT: amd_machine_version_minor = 1
4209 ; GFX10-NEXT: amd_machine_version_stepping = 0
4210 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256
4211 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
4212 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0
4213 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
4214 ; GFX10-NEXT: priority = 0
4215 ; GFX10-NEXT: float_mode = 240
4216 ; GFX10-NEXT: priv = 0
4217 ; GFX10-NEXT: enable_dx10_clamp = 1
4218 ; GFX10-NEXT: debug_mode = 0
4219 ; GFX10-NEXT: enable_ieee_mode = 1
4220 ; GFX10-NEXT: enable_wgp_mode = 1
4221 ; GFX10-NEXT: enable_mem_ordered = 1
4222 ; GFX10-NEXT: enable_fwd_progress = 0
4223 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4224 ; GFX10-NEXT: user_sgpr_count = 6
4225 ; GFX10-NEXT: enable_trap_handler = 0
4226 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1
4227 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0
4228 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0
4229 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0
4230 ; GFX10-NEXT: enable_vgpr_workitem_id = 0
4231 ; GFX10-NEXT: enable_exception_msb = 0
4232 ; GFX10-NEXT: granulated_lds_size = 0
4233 ; GFX10-NEXT: enable_exception = 0
4234 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1
4235 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0
4236 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0
4237 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4238 ; GFX10-NEXT: enable_sgpr_dispatch_id = 0
4239 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0
4240 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0
4241 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4242 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4243 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4244 ; GFX10-NEXT: enable_wavefront_size32 = 1
4245 ; GFX10-NEXT: enable_ordered_append_gds = 0
4246 ; GFX10-NEXT: private_element_size = 1
4247 ; GFX10-NEXT: is_ptr64 = 1
4248 ; GFX10-NEXT: is_dynamic_callstack = 0
4249 ; GFX10-NEXT: is_debug_enabled = 0
4250 ; GFX10-NEXT: is_xnack_enabled = 1
4251 ; GFX10-NEXT: workitem_private_segment_byte_size = 0
4252 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0
4253 ; GFX10-NEXT: gds_segment_byte_size = 0
4254 ; GFX10-NEXT: kernarg_segment_byte_size = 12
4255 ; GFX10-NEXT: workgroup_fbarrier_count = 0
4256 ; GFX10-NEXT: wavefront_sgpr_count = 6
4257 ; GFX10-NEXT: workitem_vgpr_count = 2
4258 ; GFX10-NEXT: reserved_vgpr_first = 0
4259 ; GFX10-NEXT: reserved_vgpr_count = 0
4260 ; GFX10-NEXT: reserved_sgpr_first = 0
4261 ; GFX10-NEXT: reserved_sgpr_count = 0
4262 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4263 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0
4264 ; GFX10-NEXT: kernarg_segment_alignment = 4
4265 ; GFX10-NEXT: group_segment_alignment = 4
4266 ; GFX10-NEXT: private_segment_alignment = 4
4267 ; GFX10-NEXT: wavefront_size = 5
4268 ; GFX10-NEXT: call_convention = -1
4269 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0
4270 ; GFX10-NEXT: .end_amd_kernel_code_t
4271 ; GFX10-NEXT: ; %bb.0: ; %entry
4272 ; GFX10-NEXT: s_clause 0x1
4273 ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
4274 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4275 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
4276 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4277 ; GFX10-NEXT: s_cmp_eq_u32 s2, 1
4278 ; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0
4279 ; GFX10-NEXT: s_cmp_eq_u32 s2, 2
4280 ; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3
4281 ; GFX10-NEXT: s_cmp_eq_u32 s2, 3
4282 ; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3
4283 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
4284 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
4285 ; GFX10-NEXT: s_endpgm
4287 ; GFX11-LABEL: dyn_extract_v4f32_s_s_s:
4288 ; GFX11: .amd_kernel_code_t
4289 ; GFX11-NEXT: amd_code_version_major = 1
4290 ; GFX11-NEXT: amd_code_version_minor = 2
4291 ; GFX11-NEXT: amd_machine_kind = 1
4292 ; GFX11-NEXT: amd_machine_version_major = 11
4293 ; GFX11-NEXT: amd_machine_version_minor = 0
4294 ; GFX11-NEXT: amd_machine_version_stepping = 0
4295 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256
4296 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
4297 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0
4298 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
4299 ; GFX11-NEXT: priority = 0
4300 ; GFX11-NEXT: float_mode = 240
4301 ; GFX11-NEXT: priv = 0
4302 ; GFX11-NEXT: enable_dx10_clamp = 1
4303 ; GFX11-NEXT: debug_mode = 0
4304 ; GFX11-NEXT: enable_ieee_mode = 1
4305 ; GFX11-NEXT: enable_wgp_mode = 1
4306 ; GFX11-NEXT: enable_mem_ordered = 1
4307 ; GFX11-NEXT: enable_fwd_progress = 0
4308 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4309 ; GFX11-NEXT: user_sgpr_count = 15
4310 ; GFX11-NEXT: enable_trap_handler = 0
4311 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1
4312 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0
4313 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0
4314 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0
4315 ; GFX11-NEXT: enable_vgpr_workitem_id = 0
4316 ; GFX11-NEXT: enable_exception_msb = 0
4317 ; GFX11-NEXT: granulated_lds_size = 0
4318 ; GFX11-NEXT: enable_exception = 0
4319 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0
4320 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0
4321 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0
4322 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4323 ; GFX11-NEXT: enable_sgpr_dispatch_id = 0
4324 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0
4325 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0
4326 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4327 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4328 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4329 ; GFX11-NEXT: enable_wavefront_size32 = 1
4330 ; GFX11-NEXT: enable_ordered_append_gds = 0
4331 ; GFX11-NEXT: private_element_size = 1
4332 ; GFX11-NEXT: is_ptr64 = 1
4333 ; GFX11-NEXT: is_dynamic_callstack = 0
4334 ; GFX11-NEXT: is_debug_enabled = 0
4335 ; GFX11-NEXT: is_xnack_enabled = 0
4336 ; GFX11-NEXT: workitem_private_segment_byte_size = 0
4337 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0
4338 ; GFX11-NEXT: gds_segment_byte_size = 0
4339 ; GFX11-NEXT: kernarg_segment_byte_size = 12
4340 ; GFX11-NEXT: workgroup_fbarrier_count = 0
4341 ; GFX11-NEXT: wavefront_sgpr_count = 4
4342 ; GFX11-NEXT: workitem_vgpr_count = 2
4343 ; GFX11-NEXT: reserved_vgpr_first = 0
4344 ; GFX11-NEXT: reserved_vgpr_count = 0
4345 ; GFX11-NEXT: reserved_sgpr_first = 0
4346 ; GFX11-NEXT: reserved_sgpr_count = 0
4347 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4348 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0
4349 ; GFX11-NEXT: kernarg_segment_alignment = 4
4350 ; GFX11-NEXT: group_segment_alignment = 4
4351 ; GFX11-NEXT: private_segment_alignment = 4
4352 ; GFX11-NEXT: wavefront_size = 5
4353 ; GFX11-NEXT: call_convention = -1
4354 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0
4355 ; GFX11-NEXT: .end_amd_kernel_code_t
4356 ; GFX11-NEXT: ; %bb.0: ; %entry
4357 ; GFX11-NEXT: s_clause 0x1
4358 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
4359 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
4360 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
4361 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4362 ; GFX11-NEXT: s_cmp_eq_u32 s2, 1
4363 ; GFX11-NEXT: s_cselect_b32 s3, 2.0, 1.0
4364 ; GFX11-NEXT: s_cmp_eq_u32 s2, 2
4365 ; GFX11-NEXT: s_cselect_b32 s3, 0x40400000, s3
4366 ; GFX11-NEXT: s_cmp_eq_u32 s2, 3
4367 ; GFX11-NEXT: s_cselect_b32 s2, 4.0, s3
4368 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
4369 ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
4370 ; GFX11-NEXT: s_nop 0
4371 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4372 ; GFX11-NEXT: s_endpgm
4374 %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
4375 store float %ext, ptr addrspace(1) %out
4379 define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) {
4380 ; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
4381 ; GPRIDX: .amd_kernel_code_t
4382 ; GPRIDX-NEXT: amd_code_version_major = 1
4383 ; GPRIDX-NEXT: amd_code_version_minor = 2
4384 ; GPRIDX-NEXT: amd_machine_kind = 1
4385 ; GPRIDX-NEXT: amd_machine_version_major = 9
4386 ; GPRIDX-NEXT: amd_machine_version_minor = 0
4387 ; GPRIDX-NEXT: amd_machine_version_stepping = 0
4388 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
4389 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
4390 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
4391 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
4392 ; GPRIDX-NEXT: priority = 0
4393 ; GPRIDX-NEXT: float_mode = 240
4394 ; GPRIDX-NEXT: priv = 0
4395 ; GPRIDX-NEXT: enable_dx10_clamp = 1
4396 ; GPRIDX-NEXT: debug_mode = 0
4397 ; GPRIDX-NEXT: enable_ieee_mode = 1
4398 ; GPRIDX-NEXT: enable_wgp_mode = 0
4399 ; GPRIDX-NEXT: enable_mem_ordered = 0
4400 ; GPRIDX-NEXT: enable_fwd_progress = 0
4401 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4402 ; GPRIDX-NEXT: user_sgpr_count = 6
4403 ; GPRIDX-NEXT: enable_trap_handler = 0
4404 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1
4405 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0
4406 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0
4407 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0
4408 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0
4409 ; GPRIDX-NEXT: enable_exception_msb = 0
4410 ; GPRIDX-NEXT: granulated_lds_size = 0
4411 ; GPRIDX-NEXT: enable_exception = 0
4412 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1
4413 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0
4414 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0
4415 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4416 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0
4417 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0
4418 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0
4419 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4420 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4421 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4422 ; GPRIDX-NEXT: enable_wavefront_size32 = 0
4423 ; GPRIDX-NEXT: enable_ordered_append_gds = 0
4424 ; GPRIDX-NEXT: private_element_size = 1
4425 ; GPRIDX-NEXT: is_ptr64 = 1
4426 ; GPRIDX-NEXT: is_dynamic_callstack = 0
4427 ; GPRIDX-NEXT: is_debug_enabled = 0
4428 ; GPRIDX-NEXT: is_xnack_enabled = 1
4429 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
4430 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
4431 ; GPRIDX-NEXT: gds_segment_byte_size = 0
4432 ; GPRIDX-NEXT: kernarg_segment_byte_size = 12
4433 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0
4434 ; GPRIDX-NEXT: wavefront_sgpr_count = 11
4435 ; GPRIDX-NEXT: workitem_vgpr_count = 3
4436 ; GPRIDX-NEXT: reserved_vgpr_first = 0
4437 ; GPRIDX-NEXT: reserved_vgpr_count = 0
4438 ; GPRIDX-NEXT: reserved_sgpr_first = 0
4439 ; GPRIDX-NEXT: reserved_sgpr_count = 0
4440 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4441 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0
4442 ; GPRIDX-NEXT: kernarg_segment_alignment = 4
4443 ; GPRIDX-NEXT: group_segment_alignment = 4
4444 ; GPRIDX-NEXT: private_segment_alignment = 4
4445 ; GPRIDX-NEXT: wavefront_size = 6
4446 ; GPRIDX-NEXT: call_convention = -1
4447 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0
4448 ; GPRIDX-NEXT: .end_amd_kernel_code_t
4449 ; GPRIDX-NEXT: ; %bb.0: ; %entry
4450 ; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8
4451 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4452 ; GPRIDX-NEXT: s_mov_b32 s2, 0
4453 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000
4454 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0
4455 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
4456 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1
4457 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4458 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2
4459 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4460 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3
4461 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4462 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
4463 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
4464 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
4465 ; GPRIDX-NEXT: s_endpgm
4467 ; MOVREL-LABEL: dyn_extract_v4f64_s_s_s:
4468 ; MOVREL: .amd_kernel_code_t
4469 ; MOVREL-NEXT: amd_code_version_major = 1
4470 ; MOVREL-NEXT: amd_code_version_minor = 2
4471 ; MOVREL-NEXT: amd_machine_kind = 1
4472 ; MOVREL-NEXT: amd_machine_version_major = 8
4473 ; MOVREL-NEXT: amd_machine_version_minor = 0
4474 ; MOVREL-NEXT: amd_machine_version_stepping = 3
4475 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256
4476 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0
4477 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0
4478 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0
4479 ; MOVREL-NEXT: priority = 0
4480 ; MOVREL-NEXT: float_mode = 240
4481 ; MOVREL-NEXT: priv = 0
4482 ; MOVREL-NEXT: enable_dx10_clamp = 1
4483 ; MOVREL-NEXT: debug_mode = 0
4484 ; MOVREL-NEXT: enable_ieee_mode = 1
4485 ; MOVREL-NEXT: enable_wgp_mode = 0
4486 ; MOVREL-NEXT: enable_mem_ordered = 0
4487 ; MOVREL-NEXT: enable_fwd_progress = 0
4488 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4489 ; MOVREL-NEXT: user_sgpr_count = 6
4490 ; MOVREL-NEXT: enable_trap_handler = 0
4491 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1
4492 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0
4493 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0
4494 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0
4495 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0
4496 ; MOVREL-NEXT: enable_exception_msb = 0
4497 ; MOVREL-NEXT: granulated_lds_size = 0
4498 ; MOVREL-NEXT: enable_exception = 0
4499 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1
4500 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0
4501 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0
4502 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4503 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0
4504 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0
4505 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0
4506 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4507 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4508 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4509 ; MOVREL-NEXT: enable_wavefront_size32 = 0
4510 ; MOVREL-NEXT: enable_ordered_append_gds = 0
4511 ; MOVREL-NEXT: private_element_size = 1
4512 ; MOVREL-NEXT: is_ptr64 = 1
4513 ; MOVREL-NEXT: is_dynamic_callstack = 0
4514 ; MOVREL-NEXT: is_debug_enabled = 0
4515 ; MOVREL-NEXT: is_xnack_enabled = 0
4516 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0
4517 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
4518 ; MOVREL-NEXT: gds_segment_byte_size = 0
4519 ; MOVREL-NEXT: kernarg_segment_byte_size = 12
4520 ; MOVREL-NEXT: workgroup_fbarrier_count = 0
4521 ; MOVREL-NEXT: wavefront_sgpr_count = 7
4522 ; MOVREL-NEXT: workitem_vgpr_count = 4
4523 ; MOVREL-NEXT: reserved_vgpr_first = 0
4524 ; MOVREL-NEXT: reserved_vgpr_count = 0
4525 ; MOVREL-NEXT: reserved_sgpr_first = 0
4526 ; MOVREL-NEXT: reserved_sgpr_count = 0
4527 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4528 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0
4529 ; MOVREL-NEXT: kernarg_segment_alignment = 4
4530 ; MOVREL-NEXT: group_segment_alignment = 4
4531 ; MOVREL-NEXT: private_segment_alignment = 4
4532 ; MOVREL-NEXT: wavefront_size = 6
4533 ; MOVREL-NEXT: call_convention = -1
4534 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0
4535 ; MOVREL-NEXT: .end_amd_kernel_code_t
4536 ; MOVREL-NEXT: ; %bb.0: ; %entry
4537 ; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8
4538 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4539 ; MOVREL-NEXT: s_mov_b32 s2, 0
4540 ; MOVREL-NEXT: s_mov_b32 s3, 0x40080000
4541 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
4542 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 1
4543 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4544 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 2
4545 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4546 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 3
4547 ; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4548 ; MOVREL-NEXT: v_mov_b32_e32 v0, s2
4549 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1
4550 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3
4551 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0
4552 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
4553 ; MOVREL-NEXT: s_endpgm
4555 ; GFX10-LABEL: dyn_extract_v4f64_s_s_s:
4556 ; GFX10: .amd_kernel_code_t
4557 ; GFX10-NEXT: amd_code_version_major = 1
4558 ; GFX10-NEXT: amd_code_version_minor = 2
4559 ; GFX10-NEXT: amd_machine_kind = 1
4560 ; GFX10-NEXT: amd_machine_version_major = 10
4561 ; GFX10-NEXT: amd_machine_version_minor = 1
4562 ; GFX10-NEXT: amd_machine_version_stepping = 0
4563 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256
4564 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
4565 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0
4566 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
4567 ; GFX10-NEXT: priority = 0
4568 ; GFX10-NEXT: float_mode = 240
4569 ; GFX10-NEXT: priv = 0
4570 ; GFX10-NEXT: enable_dx10_clamp = 1
4571 ; GFX10-NEXT: debug_mode = 0
4572 ; GFX10-NEXT: enable_ieee_mode = 1
4573 ; GFX10-NEXT: enable_wgp_mode = 1
4574 ; GFX10-NEXT: enable_mem_ordered = 1
4575 ; GFX10-NEXT: enable_fwd_progress = 0
4576 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4577 ; GFX10-NEXT: user_sgpr_count = 6
4578 ; GFX10-NEXT: enable_trap_handler = 0
4579 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1
4580 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0
4581 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0
4582 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0
4583 ; GFX10-NEXT: enable_vgpr_workitem_id = 0
4584 ; GFX10-NEXT: enable_exception_msb = 0
4585 ; GFX10-NEXT: granulated_lds_size = 0
4586 ; GFX10-NEXT: enable_exception = 0
4587 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1
4588 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0
4589 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0
4590 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4591 ; GFX10-NEXT: enable_sgpr_dispatch_id = 0
4592 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0
4593 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0
4594 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4595 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4596 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4597 ; GFX10-NEXT: enable_wavefront_size32 = 1
4598 ; GFX10-NEXT: enable_ordered_append_gds = 0
4599 ; GFX10-NEXT: private_element_size = 1
4600 ; GFX10-NEXT: is_ptr64 = 1
4601 ; GFX10-NEXT: is_dynamic_callstack = 0
4602 ; GFX10-NEXT: is_debug_enabled = 0
4603 ; GFX10-NEXT: is_xnack_enabled = 1
4604 ; GFX10-NEXT: workitem_private_segment_byte_size = 0
4605 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0
4606 ; GFX10-NEXT: gds_segment_byte_size = 0
4607 ; GFX10-NEXT: kernarg_segment_byte_size = 12
4608 ; GFX10-NEXT: workgroup_fbarrier_count = 0
4609 ; GFX10-NEXT: wavefront_sgpr_count = 7
4610 ; GFX10-NEXT: workitem_vgpr_count = 3
4611 ; GFX10-NEXT: reserved_vgpr_first = 0
4612 ; GFX10-NEXT: reserved_vgpr_count = 0
4613 ; GFX10-NEXT: reserved_sgpr_first = 0
4614 ; GFX10-NEXT: reserved_sgpr_count = 0
4615 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4616 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0
4617 ; GFX10-NEXT: kernarg_segment_alignment = 4
4618 ; GFX10-NEXT: group_segment_alignment = 4
4619 ; GFX10-NEXT: private_segment_alignment = 4
4620 ; GFX10-NEXT: wavefront_size = 5
4621 ; GFX10-NEXT: call_convention = -1
4622 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0
4623 ; GFX10-NEXT: .end_amd_kernel_code_t
4624 ; GFX10-NEXT: ; %bb.0: ; %entry
4625 ; GFX10-NEXT: s_clause 0x1
4626 ; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8
4627 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
4628 ; GFX10-NEXT: s_mov_b32 s2, 0
4629 ; GFX10-NEXT: s_mov_b32 s3, 0x40080000
4630 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
4631 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4632 ; GFX10-NEXT: s_cmp_eq_u32 s6, 1
4633 ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4634 ; GFX10-NEXT: s_cmp_eq_u32 s6, 2
4635 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4636 ; GFX10-NEXT: s_cmp_eq_u32 s6, 3
4637 ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4638 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
4639 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
4640 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
4641 ; GFX10-NEXT: s_endpgm
4643 ; GFX11-LABEL: dyn_extract_v4f64_s_s_s:
4644 ; GFX11: .amd_kernel_code_t
4645 ; GFX11-NEXT: amd_code_version_major = 1
4646 ; GFX11-NEXT: amd_code_version_minor = 2
4647 ; GFX11-NEXT: amd_machine_kind = 1
4648 ; GFX11-NEXT: amd_machine_version_major = 11
4649 ; GFX11-NEXT: amd_machine_version_minor = 0
4650 ; GFX11-NEXT: amd_machine_version_stepping = 0
4651 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256
4652 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
4653 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0
4654 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
4655 ; GFX11-NEXT: priority = 0
4656 ; GFX11-NEXT: float_mode = 240
4657 ; GFX11-NEXT: priv = 0
4658 ; GFX11-NEXT: enable_dx10_clamp = 1
4659 ; GFX11-NEXT: debug_mode = 0
4660 ; GFX11-NEXT: enable_ieee_mode = 1
4661 ; GFX11-NEXT: enable_wgp_mode = 1
4662 ; GFX11-NEXT: enable_mem_ordered = 1
4663 ; GFX11-NEXT: enable_fwd_progress = 0
4664 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4665 ; GFX11-NEXT: user_sgpr_count = 15
4666 ; GFX11-NEXT: enable_trap_handler = 0
4667 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1
4668 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0
4669 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0
4670 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0
4671 ; GFX11-NEXT: enable_vgpr_workitem_id = 0
4672 ; GFX11-NEXT: enable_exception_msb = 0
4673 ; GFX11-NEXT: granulated_lds_size = 0
4674 ; GFX11-NEXT: enable_exception = 0
4675 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0
4676 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0
4677 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0
4678 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4679 ; GFX11-NEXT: enable_sgpr_dispatch_id = 0
4680 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0
4681 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0
4682 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4683 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4684 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4685 ; GFX11-NEXT: enable_wavefront_size32 = 1
4686 ; GFX11-NEXT: enable_ordered_append_gds = 0
4687 ; GFX11-NEXT: private_element_size = 1
4688 ; GFX11-NEXT: is_ptr64 = 1
4689 ; GFX11-NEXT: is_dynamic_callstack = 0
4690 ; GFX11-NEXT: is_debug_enabled = 0
4691 ; GFX11-NEXT: is_xnack_enabled = 0
4692 ; GFX11-NEXT: workitem_private_segment_byte_size = 0
4693 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0
4694 ; GFX11-NEXT: gds_segment_byte_size = 0
4695 ; GFX11-NEXT: kernarg_segment_byte_size = 12
4696 ; GFX11-NEXT: workgroup_fbarrier_count = 0
4697 ; GFX11-NEXT: wavefront_sgpr_count = 7
4698 ; GFX11-NEXT: workitem_vgpr_count = 3
4699 ; GFX11-NEXT: reserved_vgpr_first = 0
4700 ; GFX11-NEXT: reserved_vgpr_count = 0
4701 ; GFX11-NEXT: reserved_sgpr_first = 0
4702 ; GFX11-NEXT: reserved_sgpr_count = 0
4703 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4704 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0
4705 ; GFX11-NEXT: kernarg_segment_alignment = 4
4706 ; GFX11-NEXT: group_segment_alignment = 4
4707 ; GFX11-NEXT: private_segment_alignment = 4
4708 ; GFX11-NEXT: wavefront_size = 5
4709 ; GFX11-NEXT: call_convention = -1
4710 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0
4711 ; GFX11-NEXT: .end_amd_kernel_code_t
4712 ; GFX11-NEXT: ; %bb.0: ; %entry
4713 ; GFX11-NEXT: s_clause 0x1
4714 ; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x8
4715 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
4716 ; GFX11-NEXT: s_mov_b32 s2, 0
4717 ; GFX11-NEXT: s_mov_b32 s3, 0x40080000
4718 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
4719 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4720 ; GFX11-NEXT: s_cmp_eq_u32 s6, 1
4721 ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4722 ; GFX11-NEXT: s_cmp_eq_u32 s6, 2
4723 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4724 ; GFX11-NEXT: s_cmp_eq_u32 s6, 3
4725 ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4726 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
4727 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
4728 ; GFX11-NEXT: s_nop 0
4729 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4730 ; GFX11-NEXT: s_endpgm
4732 %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
4733 store double %ext, ptr addrspace(1) %out
4737 define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) {
4738 ; GPRIDX-LABEL: v_extract_v64i32_7:
4740 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4741 ; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
4742 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4743 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v7
4744 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4746 ; MOVREL-LABEL: v_extract_v64i32_7:
4748 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4749 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
4750 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4751 ; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
4752 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4753 ; MOVREL-NEXT: v_mov_b32_e32 v0, v7
4754 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4756 ; GFX10-LABEL: v_extract_v64i32_7:
4758 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4759 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
4760 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4761 ; GFX10-NEXT: v_mov_b32_e32 v0, v7
4762 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4764 ; GFX11-LABEL: v_extract_v64i32_7:
4766 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4767 ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
4768 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4769 ; GFX11-NEXT: v_mov_b32_e32 v0, v7
4770 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4771 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4772 %elt = extractelement <64 x i32> %vec, i32 7
4776 define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) {
4777 ; GPRIDX-LABEL: v_extract_v64i32_32:
4779 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4780 ; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
4781 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4782 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4784 ; MOVREL-LABEL: v_extract_v64i32_32:
4786 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4787 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0
4788 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4789 ; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
4790 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4791 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4793 ; GFX10-LABEL: v_extract_v64i32_32:
4795 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4796 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
4797 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4798 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4800 ; GFX11-LABEL: v_extract_v64i32_32:
4802 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4803 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
4804 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4805 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4806 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4807 %elt = extractelement <64 x i32> %vec, i32 32
4811 define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) {
4812 ; GPRIDX-LABEL: v_extract_v64i32_33:
4814 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4815 ; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
4816 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4817 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
4818 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4820 ; MOVREL-LABEL: v_extract_v64i32_33:
4822 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4823 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0
4824 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4825 ; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
4826 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4827 ; MOVREL-NEXT: v_mov_b32_e32 v0, v1
4828 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4830 ; GFX10-LABEL: v_extract_v64i32_33:
4832 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4833 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
4834 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4835 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
4836 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4838 ; GFX11-LABEL: v_extract_v64i32_33:
4840 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4841 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
4842 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4843 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
4844 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4845 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4846 %elt = extractelement <64 x i32> %vec, i32 33
4850 define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) {
4851 ; GPRIDX-LABEL: v_extract_v64i32_37:
4853 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4854 ; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144
4855 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4856 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v5
4857 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4859 ; MOVREL-LABEL: v_extract_v64i32_37:
4861 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4862 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x90, v0
4863 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4864 ; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
4865 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4866 ; MOVREL-NEXT: v_mov_b32_e32 v0, v5
4867 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4869 ; GFX10-LABEL: v_extract_v64i32_37:
4871 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4872 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144
4873 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4874 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
4875 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4877 ; GFX11-LABEL: v_extract_v64i32_37:
4879 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4880 ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:144
4881 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4882 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
4883 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4884 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4885 %elt = extractelement <64 x i32> %vec, i32 37