1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7 define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
8 ; GCN-LABEL: dyn_extract_v8f32_const_s_v:
9 ; GCN: ; %bb.0: ; %entry
10 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
12 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
13 ; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
14 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
16 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
17 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000
18 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
19 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
20 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000
21 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
22 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
23 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000
24 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
25 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
26 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000
27 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
28 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
29 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
30 ; GCN-NEXT: s_setpc_b64 s[30:31]
32 ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v:
33 ; GFX10PLUS: ; %bb.0: ; %entry
34 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
36 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
37 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
38 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
39 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
40 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
41 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
42 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
43 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
44 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
45 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
46 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
47 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
48 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
49 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
51 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
55 define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
56 ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
57 ; GPRIDX: ; %bb.0: ; %entry
58 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1
59 ; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0
60 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2
61 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0
62 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3
63 ; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0
64 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4
65 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0
66 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5
67 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0
68 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6
69 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0
70 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7
71 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0
72 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
73 ; GPRIDX-NEXT: ; return to shader part epilog
75 ; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
76 ; MOVREL: ; %bb.0: ; %entry
77 ; MOVREL-NEXT: s_mov_b32 s4, 1.0
78 ; MOVREL-NEXT: s_mov_b32 m0, s2
79 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
80 ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
81 ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
82 ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
83 ; MOVREL-NEXT: s_mov_b32 s7, 4.0
84 ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
85 ; MOVREL-NEXT: s_mov_b32 s5, 2.0
86 ; MOVREL-NEXT: s_movrels_b32 s0, s4
87 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
88 ; MOVREL-NEXT: ; return to shader part epilog
90 ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s:
91 ; GFX10PLUS: ; %bb.0: ; %entry
92 ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0
93 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
94 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000
95 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000
96 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000
97 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000
98 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0
99 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000
100 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0
101 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4
102 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
103 ; GFX10PLUS-NEXT: ; return to shader part epilog
105 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
109 define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
110 ; GCN-LABEL: dyn_extract_v8f32_s_v:
111 ; GCN: ; %bb.0: ; %entry
112 ; GCN-NEXT: v_mov_b32_e32 v1, s2
113 ; GCN-NEXT: v_mov_b32_e32 v2, s3
114 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
115 ; GCN-NEXT: v_mov_b32_e32 v3, s4
116 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
117 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
118 ; GCN-NEXT: v_mov_b32_e32 v4, s5
119 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
120 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
121 ; GCN-NEXT: v_mov_b32_e32 v5, s6
122 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
123 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
124 ; GCN-NEXT: v_mov_b32_e32 v6, s7
125 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
126 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
127 ; GCN-NEXT: v_mov_b32_e32 v7, s8
128 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
129 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
130 ; GCN-NEXT: v_mov_b32_e32 v8, s9
131 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
132 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
133 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc
134 ; GCN-NEXT: ; return to shader part epilog
136 ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v:
137 ; GFX10PLUS: ; %bb.0: ; %entry
138 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
139 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
140 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
141 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
142 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
143 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
144 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
145 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
146 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
147 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
148 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo
149 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
150 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
151 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
152 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s9, vcc_lo
153 ; GFX10PLUS-NEXT: ; return to shader part epilog
155 %ext = extractelement <8 x float> %vec, i32 %sel
159 define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
160 ; GCN-LABEL: dyn_extract_v8f32_v_v:
161 ; GCN: ; %bb.0: ; %entry
162 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
164 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
165 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
166 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
167 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
168 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
169 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
170 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
171 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
172 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
173 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
174 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
175 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
176 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
177 ; GCN-NEXT: s_setpc_b64 s[30:31]
179 ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v:
180 ; GFX10PLUS: ; %bb.0: ; %entry
181 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
183 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
184 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
185 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
186 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
187 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
188 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
189 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
190 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
191 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
192 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
193 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
194 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
195 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
196 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
198 %ext = extractelement <8 x float> %vec, i32 %sel
202 define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
203 ; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
204 ; GPRIDX: ; %bb.0: ; %entry
205 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
206 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
207 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
208 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
209 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
210 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
211 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
212 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
213 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
214 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
215 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
216 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
217 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7
218 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
219 ; GPRIDX-NEXT: ; return to shader part epilog
221 ; MOVREL-LABEL: dyn_extract_v8f32_v_s:
222 ; MOVREL: ; %bb.0: ; %entry
223 ; MOVREL-NEXT: s_mov_b32 m0, s2
224 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
225 ; MOVREL-NEXT: ; return to shader part epilog
227 ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s:
228 ; GFX10PLUS: ; %bb.0: ; %entry
229 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
230 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
231 ; GFX10PLUS-NEXT: ; return to shader part epilog
233 %ext = extractelement <8 x float> %vec, i32 %sel
237 define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
238 ; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
239 ; GPRIDX: ; %bb.0: ; %entry
240 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
241 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2
242 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
243 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0
244 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
245 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0
246 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
247 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0
248 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
249 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0
250 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
251 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0
252 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7
253 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0
254 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
255 ; GPRIDX-NEXT: ; return to shader part epilog
257 ; MOVREL-LABEL: dyn_extract_v8f32_s_s:
258 ; MOVREL: ; %bb.0: ; %entry
259 ; MOVREL-NEXT: s_mov_b32 s0, s2
260 ; MOVREL-NEXT: s_mov_b32 m0, s10
261 ; MOVREL-NEXT: s_mov_b32 s1, s3
262 ; MOVREL-NEXT: s_mov_b32 s2, s4
263 ; MOVREL-NEXT: s_mov_b32 s3, s5
264 ; MOVREL-NEXT: s_mov_b32 s4, s6
265 ; MOVREL-NEXT: s_mov_b32 s5, s7
266 ; MOVREL-NEXT: s_mov_b32 s6, s8
267 ; MOVREL-NEXT: s_mov_b32 s7, s9
268 ; MOVREL-NEXT: s_movrels_b32 s0, s0
269 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
270 ; MOVREL-NEXT: ; return to shader part epilog
272 ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s:
273 ; GFX10PLUS: ; %bb.0: ; %entry
274 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
275 ; GFX10PLUS-NEXT: s_mov_b32 m0, s10
276 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
277 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
278 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
279 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
280 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
281 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
282 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
283 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s0
284 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
285 ; GFX10PLUS-NEXT: ; return to shader part epilog
287 %ext = extractelement <8 x float> %vec, i32 %sel
291 define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
292 ; GCN-LABEL: dyn_extract_v8i64_const_s_v:
293 ; GCN: ; %bb.0: ; %entry
294 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GCN-NEXT: s_mov_b64 s[16:17], 2
296 ; GCN-NEXT: s_mov_b64 s[18:19], 1
297 ; GCN-NEXT: s_mov_b64 s[14:15], 3
298 ; GCN-NEXT: v_mov_b32_e32 v1, s18
299 ; GCN-NEXT: v_mov_b32_e32 v2, s19
300 ; GCN-NEXT: v_mov_b32_e32 v3, s16
301 ; GCN-NEXT: v_mov_b32_e32 v4, s17
302 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
303 ; GCN-NEXT: s_mov_b64 s[12:13], 4
304 ; GCN-NEXT: v_mov_b32_e32 v5, s14
305 ; GCN-NEXT: v_mov_b32_e32 v6, s15
306 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
307 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
308 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
309 ; GCN-NEXT: s_mov_b64 s[10:11], 5
310 ; GCN-NEXT: v_mov_b32_e32 v7, s12
311 ; GCN-NEXT: v_mov_b32_e32 v8, s13
312 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
313 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
314 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
315 ; GCN-NEXT: s_mov_b64 s[8:9], 6
316 ; GCN-NEXT: v_mov_b32_e32 v9, s10
317 ; GCN-NEXT: v_mov_b32_e32 v10, s11
318 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
319 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
320 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
321 ; GCN-NEXT: s_mov_b64 s[6:7], 7
322 ; GCN-NEXT: v_mov_b32_e32 v11, s8
323 ; GCN-NEXT: v_mov_b32_e32 v12, s9
324 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
325 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
326 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
327 ; GCN-NEXT: s_mov_b64 s[4:5], 8
328 ; GCN-NEXT: v_mov_b32_e32 v13, s6
329 ; GCN-NEXT: v_mov_b32_e32 v14, s7
330 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
331 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
332 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
333 ; GCN-NEXT: v_mov_b32_e32 v15, s4
334 ; GCN-NEXT: v_mov_b32_e32 v16, s5
335 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
336 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
337 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
338 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
339 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
340 ; GCN-NEXT: s_setpc_b64 s[30:31]
342 ; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
343 ; GFX10: ; %bb.0: ; %entry
344 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX10-NEXT: s_mov_b64 s[4:5], 2
346 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
347 ; GFX10-NEXT: v_mov_b32_e32 v1, s4
348 ; GFX10-NEXT: v_mov_b32_e32 v2, s5
349 ; GFX10-NEXT: s_mov_b64 s[6:7], 1
350 ; GFX10-NEXT: s_mov_b64 s[4:5], 3
351 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo
352 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo
353 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
354 ; GFX10-NEXT: s_mov_b64 s[6:7], 4
355 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
356 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
357 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
358 ; GFX10-NEXT: s_mov_b64 s[4:5], 5
359 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
360 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
361 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
362 ; GFX10-NEXT: s_mov_b64 s[6:7], 6
363 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
364 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
365 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
366 ; GFX10-NEXT: s_mov_b64 s[4:5], 7
367 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
368 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
369 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
370 ; GFX10-NEXT: s_mov_b64 s[6:7], 8
371 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
372 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo
373 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
374 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo
375 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s7, vcc_lo
376 ; GFX10-NEXT: s_setpc_b64 s[30:31]
378 ; GFX11-LABEL: dyn_extract_v8i64_const_s_v:
379 ; GFX11: ; %bb.0: ; %entry
380 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381 ; GFX11-NEXT: s_mov_b64 s[0:1], 2
382 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
383 ; GFX11-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
384 ; GFX11-NEXT: s_mov_b64 s[2:3], 1
385 ; GFX11-NEXT: s_mov_b64 s[0:1], 3
386 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
387 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
388 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
389 ; GFX11-NEXT: s_mov_b64 s[2:3], 4
390 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
391 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo
392 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
393 ; GFX11-NEXT: s_mov_b64 s[0:1], 5
394 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo
395 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo
396 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
397 ; GFX11-NEXT: s_mov_b64 s[2:3], 6
398 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
399 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo
400 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
401 ; GFX11-NEXT: s_mov_b64 s[0:1], 7
402 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo
403 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo
404 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
405 ; GFX11-NEXT: s_mov_b64 s[2:3], 8
406 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
407 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo
408 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
409 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
410 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
411 ; GFX11-NEXT: s_setpc_b64 s[30:31]
413 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
417 define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
418 ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
419 ; GPRIDX: ; %bb.0: ; %entry
420 ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
421 ; GPRIDX-NEXT: s_mov_b32 m0, s2
422 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
423 ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
424 ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
425 ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
426 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
427 ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
428 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
429 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
430 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
431 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
432 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
433 ; GPRIDX-NEXT: s_endpgm
435 ; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
436 ; MOVREL: ; %bb.0: ; %entry
437 ; MOVREL-NEXT: s_mov_b64 s[4:5], 1
438 ; MOVREL-NEXT: s_mov_b32 m0, s2
439 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8
440 ; MOVREL-NEXT: s_mov_b64 s[16:17], 7
441 ; MOVREL-NEXT: s_mov_b64 s[14:15], 6
442 ; MOVREL-NEXT: s_mov_b64 s[12:13], 5
443 ; MOVREL-NEXT: s_mov_b64 s[10:11], 4
444 ; MOVREL-NEXT: s_mov_b64 s[8:9], 3
445 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2
446 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
447 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
448 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
449 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
450 ; MOVREL-NEXT: s_endpgm
452 ; GFX10-LABEL: dyn_extract_v8i64_const_s_s:
453 ; GFX10: ; %bb.0: ; %entry
454 ; GFX10-NEXT: s_mov_b64 s[4:5], 1
455 ; GFX10-NEXT: s_mov_b32 m0, s2
456 ; GFX10-NEXT: s_mov_b64 s[18:19], 8
457 ; GFX10-NEXT: s_mov_b64 s[16:17], 7
458 ; GFX10-NEXT: s_mov_b64 s[14:15], 6
459 ; GFX10-NEXT: s_mov_b64 s[12:13], 5
460 ; GFX10-NEXT: s_mov_b64 s[10:11], 4
461 ; GFX10-NEXT: s_mov_b64 s[8:9], 3
462 ; GFX10-NEXT: s_mov_b64 s[6:7], 2
463 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5]
464 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
465 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
466 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
467 ; GFX10-NEXT: s_endpgm
469 ; GFX11-LABEL: dyn_extract_v8i64_const_s_s:
470 ; GFX11: ; %bb.0: ; %entry
471 ; GFX11-NEXT: s_mov_b64 s[4:5], 1
472 ; GFX11-NEXT: s_mov_b32 m0, s2
473 ; GFX11-NEXT: s_mov_b64 s[18:19], 8
474 ; GFX11-NEXT: s_mov_b64 s[16:17], 7
475 ; GFX11-NEXT: s_mov_b64 s[14:15], 6
476 ; GFX11-NEXT: s_mov_b64 s[12:13], 5
477 ; GFX11-NEXT: s_mov_b64 s[10:11], 4
478 ; GFX11-NEXT: s_mov_b64 s[8:9], 3
479 ; GFX11-NEXT: s_mov_b64 s[6:7], 2
480 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[4:5]
481 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
482 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
483 ; GFX11-NEXT: s_nop 0
484 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
485 ; GFX11-NEXT: s_endpgm
487 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
488 store i64 %ext, ptr addrspace(1) undef
492 define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
493 ; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
494 ; GPRIDX: ; %bb.0: ; %entry
495 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s2
496 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s3
497 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4
498 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5
499 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
500 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s6
501 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s7
502 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
503 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
504 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
505 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s8
506 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s9
507 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
508 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
509 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
510 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s10
511 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s11
512 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
513 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
514 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
515 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s12
516 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s13
517 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
518 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
519 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
520 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s14
521 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s15
522 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
523 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
524 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
525 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s16
526 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s17
527 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
528 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
529 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
530 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
531 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
532 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
533 ; GPRIDX-NEXT: s_endpgm
535 ; MOVREL-LABEL: dyn_extract_v8i64_s_v:
536 ; MOVREL: ; %bb.0: ; %entry
537 ; MOVREL-NEXT: v_mov_b32_e32 v1, s2
538 ; MOVREL-NEXT: v_mov_b32_e32 v2, s3
539 ; MOVREL-NEXT: v_mov_b32_e32 v3, s4
540 ; MOVREL-NEXT: v_mov_b32_e32 v4, s5
541 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
542 ; MOVREL-NEXT: v_mov_b32_e32 v5, s6
543 ; MOVREL-NEXT: v_mov_b32_e32 v6, s7
544 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
545 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
546 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
547 ; MOVREL-NEXT: v_mov_b32_e32 v7, s8
548 ; MOVREL-NEXT: v_mov_b32_e32 v8, s9
549 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
550 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
551 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
552 ; MOVREL-NEXT: v_mov_b32_e32 v9, s10
553 ; MOVREL-NEXT: v_mov_b32_e32 v10, s11
554 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
555 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
556 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
557 ; MOVREL-NEXT: v_mov_b32_e32 v11, s12
558 ; MOVREL-NEXT: v_mov_b32_e32 v12, s13
559 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
560 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
561 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
562 ; MOVREL-NEXT: v_mov_b32_e32 v13, s14
563 ; MOVREL-NEXT: v_mov_b32_e32 v14, s15
564 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
565 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
566 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
567 ; MOVREL-NEXT: v_mov_b32_e32 v15, s16
568 ; MOVREL-NEXT: v_mov_b32_e32 v16, s17
569 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
570 ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
571 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
572 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
573 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
574 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
575 ; MOVREL-NEXT: s_endpgm
577 ; GFX10-LABEL: dyn_extract_v8i64_s_v:
578 ; GFX10: ; %bb.0: ; %entry
579 ; GFX10-NEXT: v_mov_b32_e32 v1, s4
580 ; GFX10-NEXT: v_mov_b32_e32 v2, s5
581 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
582 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
583 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
584 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
585 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
586 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
587 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
588 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
589 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
590 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
591 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
592 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
593 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
594 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
595 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
596 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
597 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo
598 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
599 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
600 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo
601 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo
602 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
603 ; GFX10-NEXT: s_endpgm
605 ; GFX11-LABEL: dyn_extract_v8i64_s_v:
606 ; GFX11: ; %bb.0: ; %entry
607 ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
608 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
609 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
610 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
611 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
612 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
613 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
614 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
615 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
616 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
617 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
618 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
619 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
620 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
621 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
622 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
623 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
624 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo
625 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
626 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
627 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo
628 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo
629 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
630 ; GFX11-NEXT: s_nop 0
631 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
632 ; GFX11-NEXT: s_endpgm
634 %ext = extractelement <8 x i64> %vec, i32 %sel
635 store i64 %ext, ptr addrspace(1) undef
639 define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
640 ; GCN-LABEL: dyn_extract_v8i64_v_v:
641 ; GCN: ; %bb.0: ; %entry
642 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
644 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
645 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
646 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
647 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
648 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
649 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
650 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
651 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
652 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
653 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
654 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
655 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
656 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
657 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
658 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
659 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
660 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
661 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
662 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
663 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
664 ; GCN-NEXT: s_setpc_b64 s[30:31]
666 ; GFX10-LABEL: dyn_extract_v8i64_v_v:
667 ; GFX10: ; %bb.0: ; %entry
668 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
670 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
671 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
672 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
673 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
674 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
675 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
676 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
677 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
678 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
679 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
680 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
681 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
682 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
683 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
684 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
685 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
686 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
687 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
688 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
689 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
690 ; GFX10-NEXT: s_setpc_b64 s[30:31]
692 ; GFX11-LABEL: dyn_extract_v8i64_v_v:
693 ; GFX11: ; %bb.0: ; %entry
694 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
696 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
697 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
698 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
699 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
700 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
701 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
702 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
703 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
704 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
705 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
706 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
707 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
708 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
709 ; GFX11-NEXT: s_setpc_b64 s[30:31]
711 %ext = extractelement <8 x i64> %vec, i32 %sel
715 define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
716 ; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
717 ; GPRIDX: ; %bb.0: ; %entry
718 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
719 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
720 ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
721 ; GPRIDX-NEXT: v_mov_b32_e32 v17, v1
722 ; GPRIDX-NEXT: s_set_gpr_idx_off
723 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
724 ; GPRIDX-NEXT: s_endpgm
726 ; MOVREL-LABEL: dyn_extract_v8i64_v_s:
727 ; MOVREL: ; %bb.0: ; %entry
728 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
729 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
730 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v1
731 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
732 ; MOVREL-NEXT: s_endpgm
734 ; GFX10-LABEL: dyn_extract_v8i64_v_s:
735 ; GFX10: ; %bb.0: ; %entry
736 ; GFX10-NEXT: s_lshl_b32 m0, s2, 1
737 ; GFX10-NEXT: v_movrels_b32_e32 v16, v0
738 ; GFX10-NEXT: v_movrels_b32_e32 v17, v1
739 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
740 ; GFX10-NEXT: s_endpgm
742 ; GFX11-LABEL: dyn_extract_v8i64_v_s:
743 ; GFX11: ; %bb.0: ; %entry
744 ; GFX11-NEXT: s_lshl_b32 m0, s2, 1
745 ; GFX11-NEXT: v_movrels_b32_e32 v16, v0
746 ; GFX11-NEXT: v_movrels_b32_e32 v17, v1
747 ; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off
748 ; GFX11-NEXT: s_nop 0
749 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
750 ; GFX11-NEXT: s_endpgm
752 %ext = extractelement <8 x i64> %vec, i32 %sel
753 store i64 %ext, ptr addrspace(1) undef
757 define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
758 ; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
759 ; GPRIDX: ; %bb.0: ; %entry
760 ; GPRIDX-NEXT: s_mov_b32 s0, s2
761 ; GPRIDX-NEXT: s_mov_b32 s1, s3
762 ; GPRIDX-NEXT: s_mov_b32 m0, s18
763 ; GPRIDX-NEXT: s_mov_b32 s2, s4
764 ; GPRIDX-NEXT: s_mov_b32 s3, s5
765 ; GPRIDX-NEXT: s_mov_b32 s4, s6
766 ; GPRIDX-NEXT: s_mov_b32 s5, s7
767 ; GPRIDX-NEXT: s_mov_b32 s6, s8
768 ; GPRIDX-NEXT: s_mov_b32 s7, s9
769 ; GPRIDX-NEXT: s_mov_b32 s8, s10
770 ; GPRIDX-NEXT: s_mov_b32 s9, s11
771 ; GPRIDX-NEXT: s_mov_b32 s10, s12
772 ; GPRIDX-NEXT: s_mov_b32 s11, s13
773 ; GPRIDX-NEXT: s_mov_b32 s12, s14
774 ; GPRIDX-NEXT: s_mov_b32 s13, s15
775 ; GPRIDX-NEXT: s_mov_b32 s14, s16
776 ; GPRIDX-NEXT: s_mov_b32 s15, s17
777 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
778 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
779 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
780 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
781 ; GPRIDX-NEXT: s_endpgm
783 ; MOVREL-LABEL: dyn_extract_v8i64_s_s:
784 ; MOVREL: ; %bb.0: ; %entry
785 ; MOVREL-NEXT: s_mov_b32 s0, s2
786 ; MOVREL-NEXT: s_mov_b32 s1, s3
787 ; MOVREL-NEXT: s_mov_b32 m0, s18
788 ; MOVREL-NEXT: s_mov_b32 s2, s4
789 ; MOVREL-NEXT: s_mov_b32 s3, s5
790 ; MOVREL-NEXT: s_mov_b32 s4, s6
791 ; MOVREL-NEXT: s_mov_b32 s5, s7
792 ; MOVREL-NEXT: s_mov_b32 s6, s8
793 ; MOVREL-NEXT: s_mov_b32 s7, s9
794 ; MOVREL-NEXT: s_mov_b32 s8, s10
795 ; MOVREL-NEXT: s_mov_b32 s9, s11
796 ; MOVREL-NEXT: s_mov_b32 s10, s12
797 ; MOVREL-NEXT: s_mov_b32 s11, s13
798 ; MOVREL-NEXT: s_mov_b32 s12, s14
799 ; MOVREL-NEXT: s_mov_b32 s13, s15
800 ; MOVREL-NEXT: s_mov_b32 s14, s16
801 ; MOVREL-NEXT: s_mov_b32 s15, s17
802 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
803 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
804 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
805 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
806 ; MOVREL-NEXT: s_endpgm
808 ; GFX10-LABEL: dyn_extract_v8i64_s_s:
809 ; GFX10: ; %bb.0: ; %entry
810 ; GFX10-NEXT: s_mov_b32 s0, s2
811 ; GFX10-NEXT: s_mov_b32 s1, s3
812 ; GFX10-NEXT: s_mov_b32 m0, s18
813 ; GFX10-NEXT: s_mov_b32 s2, s4
814 ; GFX10-NEXT: s_mov_b32 s3, s5
815 ; GFX10-NEXT: s_mov_b32 s4, s6
816 ; GFX10-NEXT: s_mov_b32 s5, s7
817 ; GFX10-NEXT: s_mov_b32 s6, s8
818 ; GFX10-NEXT: s_mov_b32 s7, s9
819 ; GFX10-NEXT: s_mov_b32 s8, s10
820 ; GFX10-NEXT: s_mov_b32 s9, s11
821 ; GFX10-NEXT: s_mov_b32 s10, s12
822 ; GFX10-NEXT: s_mov_b32 s11, s13
823 ; GFX10-NEXT: s_mov_b32 s12, s14
824 ; GFX10-NEXT: s_mov_b32 s13, s15
825 ; GFX10-NEXT: s_mov_b32 s14, s16
826 ; GFX10-NEXT: s_mov_b32 s15, s17
827 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1]
828 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
829 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
830 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
831 ; GFX10-NEXT: s_endpgm
833 ; GFX11-LABEL: dyn_extract_v8i64_s_s:
834 ; GFX11: ; %bb.0: ; %entry
835 ; GFX11-NEXT: s_mov_b32 s0, s2
836 ; GFX11-NEXT: s_mov_b32 s1, s3
837 ; GFX11-NEXT: s_mov_b32 m0, s18
838 ; GFX11-NEXT: s_mov_b32 s2, s4
839 ; GFX11-NEXT: s_mov_b32 s3, s5
840 ; GFX11-NEXT: s_mov_b32 s4, s6
841 ; GFX11-NEXT: s_mov_b32 s5, s7
842 ; GFX11-NEXT: s_mov_b32 s6, s8
843 ; GFX11-NEXT: s_mov_b32 s7, s9
844 ; GFX11-NEXT: s_mov_b32 s8, s10
845 ; GFX11-NEXT: s_mov_b32 s9, s11
846 ; GFX11-NEXT: s_mov_b32 s10, s12
847 ; GFX11-NEXT: s_mov_b32 s11, s13
848 ; GFX11-NEXT: s_mov_b32 s12, s14
849 ; GFX11-NEXT: s_mov_b32 s13, s15
850 ; GFX11-NEXT: s_mov_b32 s14, s16
851 ; GFX11-NEXT: s_mov_b32 s15, s17
852 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1]
853 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
854 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
855 ; GFX11-NEXT: s_nop 0
856 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
857 ; GFX11-NEXT: s_endpgm
859 %ext = extractelement <8 x i64> %vec, i32 %sel
860 store i64 %ext, ptr addrspace(1) undef
864 define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
865 ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
866 ; GPRIDX: ; %bb.0: ; %entry
867 ; GPRIDX-NEXT: s_add_i32 s10, s10, 3
868 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
869 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2
870 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
871 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0
872 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
873 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0
874 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
875 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0
876 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
877 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0
878 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
879 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0
880 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7
881 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0
882 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
883 ; GPRIDX-NEXT: ; return to shader part epilog
885 ; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
886 ; MOVREL: ; %bb.0: ; %entry
887 ; MOVREL-NEXT: s_mov_b32 s0, s2
888 ; MOVREL-NEXT: s_mov_b32 s1, s3
889 ; MOVREL-NEXT: s_mov_b32 s3, s5
890 ; MOVREL-NEXT: s_mov_b32 m0, s10
891 ; MOVREL-NEXT: s_mov_b32 s2, s4
892 ; MOVREL-NEXT: s_mov_b32 s4, s6
893 ; MOVREL-NEXT: s_mov_b32 s5, s7
894 ; MOVREL-NEXT: s_mov_b32 s6, s8
895 ; MOVREL-NEXT: s_mov_b32 s7, s9
896 ; MOVREL-NEXT: s_movrels_b32 s0, s3
897 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
898 ; MOVREL-NEXT: ; return to shader part epilog
900 ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3:
901 ; GFX10PLUS: ; %bb.0: ; %entry
902 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
903 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
904 ; GFX10PLUS-NEXT: s_mov_b32 m0, s10
905 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
906 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
907 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
908 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
909 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
910 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
911 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s3
912 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
913 ; GFX10PLUS-NEXT: ; return to shader part epilog
915 %add = add i32 %sel, 3
916 %ext = extractelement <8 x float> %vec, i32 %add
920 define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
921 ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
922 ; GPRIDX: ; %bb.0: ; %entry
923 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924 ; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8
925 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
926 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
927 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
928 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
929 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
930 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
931 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
932 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
933 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
934 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
935 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
936 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
937 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
938 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
939 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
941 ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
942 ; MOVREL: ; %bb.0: ; %entry
943 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944 ; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8
945 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
946 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
947 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
948 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
949 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
950 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
951 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
952 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
953 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
954 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
955 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
956 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
957 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
958 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
959 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
961 ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3:
962 ; GFX10PLUS: ; %bb.0: ; %entry
963 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964 ; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8
965 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
966 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
967 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
968 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
969 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
970 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
971 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
972 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
973 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
974 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
975 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
976 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
977 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
978 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
979 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
981 %add = add i32 %sel, 3
982 %ext = extractelement <8 x float> %vec, i32 %add
986 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
987 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset1:
988 ; GCN: ; %bb.0: ; %entry
989 ; GCN-NEXT: s_mov_b32 s0, s2
990 ; GCN-NEXT: s_mov_b32 s1, s3
991 ; GCN-NEXT: s_mov_b32 s2, s4
992 ; GCN-NEXT: s_mov_b32 s3, s5
993 ; GCN-NEXT: s_mov_b32 m0, s18
994 ; GCN-NEXT: s_mov_b32 s4, s6
995 ; GCN-NEXT: s_mov_b32 s5, s7
996 ; GCN-NEXT: s_mov_b32 s6, s8
997 ; GCN-NEXT: s_mov_b32 s7, s9
998 ; GCN-NEXT: s_mov_b32 s8, s10
999 ; GCN-NEXT: s_mov_b32 s9, s11
1000 ; GCN-NEXT: s_mov_b32 s10, s12
1001 ; GCN-NEXT: s_mov_b32 s11, s13
1002 ; GCN-NEXT: s_mov_b32 s12, s14
1003 ; GCN-NEXT: s_mov_b32 s13, s15
1004 ; GCN-NEXT: s_mov_b32 s14, s16
1005 ; GCN-NEXT: s_mov_b32 s15, s17
1006 ; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3]
1007 ; GCN-NEXT: ; return to shader part epilog
1009 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1:
1010 ; GFX10PLUS: ; %bb.0: ; %entry
1011 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1012 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1013 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1014 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1015 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1016 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1017 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1018 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1019 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1020 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1021 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1022 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1023 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1024 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1025 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1026 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1027 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1028 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[2:3]
1029 ; GFX10PLUS-NEXT: ; return to shader part epilog
1031 %add = add i32 %sel, 1
1032 %ext = extractelement <8 x double> %vec, i32 %add
1036 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
1037 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset2:
1038 ; GCN: ; %bb.0: ; %entry
1039 ; GCN-NEXT: s_mov_b32 s0, s2
1040 ; GCN-NEXT: s_mov_b32 s1, s3
1041 ; GCN-NEXT: s_mov_b32 s2, s4
1042 ; GCN-NEXT: s_mov_b32 s3, s5
1043 ; GCN-NEXT: s_mov_b32 s4, s6
1044 ; GCN-NEXT: s_mov_b32 s5, s7
1045 ; GCN-NEXT: s_mov_b32 m0, s18
1046 ; GCN-NEXT: s_mov_b32 s6, s8
1047 ; GCN-NEXT: s_mov_b32 s7, s9
1048 ; GCN-NEXT: s_mov_b32 s8, s10
1049 ; GCN-NEXT: s_mov_b32 s9, s11
1050 ; GCN-NEXT: s_mov_b32 s10, s12
1051 ; GCN-NEXT: s_mov_b32 s11, s13
1052 ; GCN-NEXT: s_mov_b32 s12, s14
1053 ; GCN-NEXT: s_mov_b32 s13, s15
1054 ; GCN-NEXT: s_mov_b32 s14, s16
1055 ; GCN-NEXT: s_mov_b32 s15, s17
1056 ; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5]
1057 ; GCN-NEXT: ; return to shader part epilog
1059 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2:
1060 ; GFX10PLUS: ; %bb.0: ; %entry
1061 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1062 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1063 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1064 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1065 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1066 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1067 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1068 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1069 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1070 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1071 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1072 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1073 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1074 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1075 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1076 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1077 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1078 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[4:5]
1079 ; GFX10PLUS-NEXT: ; return to shader part epilog
1081 %add = add i32 %sel, 2
1082 %ext = extractelement <8 x double> %vec, i32 %add
1086 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
1087 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset3:
1088 ; GCN: ; %bb.0: ; %entry
1089 ; GCN-NEXT: s_mov_b32 s0, s2
1090 ; GCN-NEXT: s_mov_b32 s1, s3
1091 ; GCN-NEXT: s_mov_b32 s2, s4
1092 ; GCN-NEXT: s_mov_b32 s3, s5
1093 ; GCN-NEXT: s_mov_b32 s4, s6
1094 ; GCN-NEXT: s_mov_b32 s5, s7
1095 ; GCN-NEXT: s_mov_b32 s6, s8
1096 ; GCN-NEXT: s_mov_b32 s7, s9
1097 ; GCN-NEXT: s_mov_b32 m0, s18
1098 ; GCN-NEXT: s_mov_b32 s8, s10
1099 ; GCN-NEXT: s_mov_b32 s9, s11
1100 ; GCN-NEXT: s_mov_b32 s10, s12
1101 ; GCN-NEXT: s_mov_b32 s11, s13
1102 ; GCN-NEXT: s_mov_b32 s12, s14
1103 ; GCN-NEXT: s_mov_b32 s13, s15
1104 ; GCN-NEXT: s_mov_b32 s14, s16
1105 ; GCN-NEXT: s_mov_b32 s15, s17
1106 ; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7]
1107 ; GCN-NEXT: ; return to shader part epilog
1109 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3:
1110 ; GFX10PLUS: ; %bb.0: ; %entry
1111 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1112 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1113 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1114 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1115 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1116 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1117 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1118 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1119 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1120 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1121 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1122 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1123 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1124 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1125 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1126 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1127 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1128 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[6:7]
1129 ; GFX10PLUS-NEXT: ; return to shader part epilog
1131 %add = add i32 %sel, 3
1132 %ext = extractelement <8 x double> %vec, i32 %add
1136 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
1137 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset4:
1138 ; GCN: ; %bb.0: ; %entry
1139 ; GCN-NEXT: s_mov_b32 s0, s2
1140 ; GCN-NEXT: s_mov_b32 s1, s3
1141 ; GCN-NEXT: s_mov_b32 s2, s4
1142 ; GCN-NEXT: s_mov_b32 s3, s5
1143 ; GCN-NEXT: s_mov_b32 s4, s6
1144 ; GCN-NEXT: s_mov_b32 s5, s7
1145 ; GCN-NEXT: s_mov_b32 s6, s8
1146 ; GCN-NEXT: s_mov_b32 s7, s9
1147 ; GCN-NEXT: s_mov_b32 s8, s10
1148 ; GCN-NEXT: s_mov_b32 s9, s11
1149 ; GCN-NEXT: s_mov_b32 m0, s18
1150 ; GCN-NEXT: s_mov_b32 s10, s12
1151 ; GCN-NEXT: s_mov_b32 s11, s13
1152 ; GCN-NEXT: s_mov_b32 s12, s14
1153 ; GCN-NEXT: s_mov_b32 s13, s15
1154 ; GCN-NEXT: s_mov_b32 s14, s16
1155 ; GCN-NEXT: s_mov_b32 s15, s17
1156 ; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9]
1157 ; GCN-NEXT: ; return to shader part epilog
1159 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4:
1160 ; GFX10PLUS: ; %bb.0: ; %entry
1161 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1162 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1163 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1164 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1165 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1166 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1167 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1168 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1169 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1170 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1171 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1172 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1173 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1174 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1175 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1176 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1177 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1178 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[8:9]
1179 ; GFX10PLUS-NEXT: ; return to shader part epilog
1181 %add = add i32 %sel, 4
1182 %ext = extractelement <8 x double> %vec, i32 %add
1186 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
1187 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset5:
1188 ; GCN: ; %bb.0: ; %entry
1189 ; GCN-NEXT: s_mov_b32 s0, s2
1190 ; GCN-NEXT: s_mov_b32 s1, s3
1191 ; GCN-NEXT: s_mov_b32 s2, s4
1192 ; GCN-NEXT: s_mov_b32 s3, s5
1193 ; GCN-NEXT: s_mov_b32 s4, s6
1194 ; GCN-NEXT: s_mov_b32 s5, s7
1195 ; GCN-NEXT: s_mov_b32 s6, s8
1196 ; GCN-NEXT: s_mov_b32 s7, s9
1197 ; GCN-NEXT: s_mov_b32 s8, s10
1198 ; GCN-NEXT: s_mov_b32 s9, s11
1199 ; GCN-NEXT: s_mov_b32 s10, s12
1200 ; GCN-NEXT: s_mov_b32 s11, s13
1201 ; GCN-NEXT: s_mov_b32 m0, s18
1202 ; GCN-NEXT: s_mov_b32 s12, s14
1203 ; GCN-NEXT: s_mov_b32 s13, s15
1204 ; GCN-NEXT: s_mov_b32 s14, s16
1205 ; GCN-NEXT: s_mov_b32 s15, s17
1206 ; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11]
1207 ; GCN-NEXT: ; return to shader part epilog
1209 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5:
1210 ; GFX10PLUS: ; %bb.0: ; %entry
1211 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1212 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1213 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1214 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1215 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1216 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1217 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1218 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1219 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1220 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1221 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1222 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1223 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1224 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1225 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1226 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1227 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1228 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[10:11]
1229 ; GFX10PLUS-NEXT: ; return to shader part epilog
1231 %add = add i32 %sel, 5
1232 %ext = extractelement <8 x double> %vec, i32 %add
1236 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
1237 ; GCN-LABEL: dyn_extract_v8f64_s_s_offset6:
1238 ; GCN: ; %bb.0: ; %entry
1239 ; GCN-NEXT: s_mov_b32 s0, s2
1240 ; GCN-NEXT: s_mov_b32 s1, s3
1241 ; GCN-NEXT: s_mov_b32 s2, s4
1242 ; GCN-NEXT: s_mov_b32 s3, s5
1243 ; GCN-NEXT: s_mov_b32 s4, s6
1244 ; GCN-NEXT: s_mov_b32 s5, s7
1245 ; GCN-NEXT: s_mov_b32 s6, s8
1246 ; GCN-NEXT: s_mov_b32 s7, s9
1247 ; GCN-NEXT: s_mov_b32 s8, s10
1248 ; GCN-NEXT: s_mov_b32 s9, s11
1249 ; GCN-NEXT: s_mov_b32 s10, s12
1250 ; GCN-NEXT: s_mov_b32 s11, s13
1251 ; GCN-NEXT: s_mov_b32 s12, s14
1252 ; GCN-NEXT: s_mov_b32 s13, s15
1253 ; GCN-NEXT: s_mov_b32 m0, s18
1254 ; GCN-NEXT: s_mov_b32 s14, s16
1255 ; GCN-NEXT: s_mov_b32 s15, s17
1256 ; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13]
1257 ; GCN-NEXT: ; return to shader part epilog
1259 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6:
1260 ; GFX10PLUS: ; %bb.0: ; %entry
1261 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1262 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1263 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1264 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1265 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1266 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1267 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1268 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1269 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1270 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1271 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1272 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1273 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1274 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1275 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1276 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1277 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1278 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[12:13]
1279 ; GFX10PLUS-NEXT: ; return to shader part epilog
1281 %add = add i32 %sel, 6
1282 %ext = extractelement <8 x double> %vec, i32 %add
1286 define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
1287 ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
1288 ; GPRIDX: ; %bb.0: ; %entry
1289 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1290 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1291 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1292 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1293 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1294 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1295 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1296 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1297 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1298 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1299 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1300 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1301 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1302 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1303 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1304 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1305 ; GPRIDX-NEXT: s_mov_b32 m0, s18
1306 ; GPRIDX-NEXT: s_nop 0
1307 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15]
1308 ; GPRIDX-NEXT: ; return to shader part epilog
1310 ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
1311 ; MOVREL: ; %bb.0: ; %entry
1312 ; MOVREL-NEXT: s_mov_b32 s0, s2
1313 ; MOVREL-NEXT: s_mov_b32 s1, s3
1314 ; MOVREL-NEXT: s_mov_b32 s2, s4
1315 ; MOVREL-NEXT: s_mov_b32 s3, s5
1316 ; MOVREL-NEXT: s_mov_b32 s4, s6
1317 ; MOVREL-NEXT: s_mov_b32 s5, s7
1318 ; MOVREL-NEXT: s_mov_b32 s6, s8
1319 ; MOVREL-NEXT: s_mov_b32 s7, s9
1320 ; MOVREL-NEXT: s_mov_b32 s8, s10
1321 ; MOVREL-NEXT: s_mov_b32 s9, s11
1322 ; MOVREL-NEXT: s_mov_b32 s10, s12
1323 ; MOVREL-NEXT: s_mov_b32 s11, s13
1324 ; MOVREL-NEXT: s_mov_b32 s12, s14
1325 ; MOVREL-NEXT: s_mov_b32 s13, s15
1326 ; MOVREL-NEXT: s_mov_b32 s14, s16
1327 ; MOVREL-NEXT: s_mov_b32 s15, s17
1328 ; MOVREL-NEXT: s_mov_b32 m0, s18
1329 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15]
1330 ; MOVREL-NEXT: ; return to shader part epilog
1332 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7:
1333 ; GFX10PLUS: ; %bb.0: ; %entry
1334 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1335 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1336 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1337 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1338 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1339 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1340 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1341 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1342 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1343 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1344 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1345 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1346 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1347 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1348 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1349 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1350 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
1351 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[14:15]
1352 ; GFX10PLUS-NEXT: ; return to shader part epilog
1354 %add = add i32 %sel, 7
1355 %ext = extractelement <8 x double> %vec, i32 %add
1359 define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
1360 ; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1361 ; GCN: ; %bb.0: ; %entry
1362 ; GCN-NEXT: s_mov_b32 s0, s2
1363 ; GCN-NEXT: s_mov_b32 s1, s3
1364 ; GCN-NEXT: s_add_i32 m0, s18, -1
1365 ; GCN-NEXT: s_mov_b32 s2, s4
1366 ; GCN-NEXT: s_mov_b32 s3, s5
1367 ; GCN-NEXT: s_mov_b32 s4, s6
1368 ; GCN-NEXT: s_mov_b32 s5, s7
1369 ; GCN-NEXT: s_mov_b32 s6, s8
1370 ; GCN-NEXT: s_mov_b32 s7, s9
1371 ; GCN-NEXT: s_mov_b32 s8, s10
1372 ; GCN-NEXT: s_mov_b32 s9, s11
1373 ; GCN-NEXT: s_mov_b32 s10, s12
1374 ; GCN-NEXT: s_mov_b32 s11, s13
1375 ; GCN-NEXT: s_mov_b32 s12, s14
1376 ; GCN-NEXT: s_mov_b32 s13, s15
1377 ; GCN-NEXT: s_mov_b32 s14, s16
1378 ; GCN-NEXT: s_mov_b32 s15, s17
1379 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1]
1380 ; GCN-NEXT: ; return to shader part epilog
1382 ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1383 ; GFX10PLUS: ; %bb.0: ; %entry
1384 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1385 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1386 ; GFX10PLUS-NEXT: s_add_i32 m0, s18, -1
1387 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1388 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1389 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1390 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1391 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1392 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1393 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1394 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1395 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1396 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1397 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1398 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1399 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1400 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1401 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1]
1402 ; GFX10PLUS-NEXT: ; return to shader part epilog
1404 %add = add i32 %sel, -1
1405 %ext = extractelement <8 x double> %vec, i32 %add
1409 define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
1410 ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
1411 ; GPRIDX: ; %bb.0: ; %entry
1412 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413 ; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16
1414 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1415 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1416 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1417 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1418 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1419 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1420 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1421 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1422 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1423 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1424 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1425 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
1426 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1427 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
1428 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
1429 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1430 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
1431 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
1432 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1433 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
1434 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
1435 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
1437 ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
1438 ; MOVREL: ; %bb.0: ; %entry
1439 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1440 ; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16
1441 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1442 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1443 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1444 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1445 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1446 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1447 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1448 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1449 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1450 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1451 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1452 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
1453 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1454 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
1455 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
1456 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1457 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
1458 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
1459 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1460 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
1461 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
1462 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
1464 ; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
1465 ; GFX10: ; %bb.0: ; %entry
1466 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467 ; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16
1468 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1469 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1470 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1471 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1472 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1473 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1474 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1475 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1476 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1477 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1478 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1479 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1480 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1481 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1482 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1483 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1484 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1485 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1486 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1487 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1488 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1489 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1491 ; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3:
1492 ; GFX11: ; %bb.0: ; %entry
1493 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494 ; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16
1495 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1496 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
1497 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1498 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
1499 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1500 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
1501 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1502 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
1503 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1504 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
1505 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1506 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
1507 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1508 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
1509 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1511 %add = add i32 %sel, 3
1512 %ext = extractelement <8 x double> %vec, i32 %add
1516 define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) {
1517 ; GCN-LABEL: dyn_extract_v8p3_v_v:
1518 ; GCN: ; %bb.0: ; %entry
1519 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1520 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
1521 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1522 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
1523 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1524 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
1525 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1526 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
1527 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1528 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
1529 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1530 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
1531 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1532 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
1533 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1534 ; GCN-NEXT: s_setpc_b64 s[30:31]
1536 ; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v:
1537 ; GFX10PLUS: ; %bb.0: ; %entry
1538 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1539 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
1540 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1541 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
1542 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1543 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
1544 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1545 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
1546 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1547 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
1548 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1549 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
1550 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1551 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
1552 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1553 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1555 %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
1556 ret ptr addrspace(3) %ext
1559 define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) {
1560 ; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
1561 ; GPRIDX: ; %bb.0: ; %entry
1562 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
1563 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2
1564 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
1565 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0
1566 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
1567 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0
1568 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
1569 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0
1570 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
1571 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0
1572 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
1573 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0
1574 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7
1575 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0
1576 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
1577 ; GPRIDX-NEXT: ds_write_b32 v0, v0
1578 ; GPRIDX-NEXT: s_endpgm
1580 ; MOVREL-LABEL: dyn_extract_v8p3_s_s:
1581 ; MOVREL: ; %bb.0: ; %entry
1582 ; MOVREL-NEXT: s_mov_b32 s0, s2
1583 ; MOVREL-NEXT: s_mov_b32 m0, s10
1584 ; MOVREL-NEXT: s_mov_b32 s1, s3
1585 ; MOVREL-NEXT: s_mov_b32 s2, s4
1586 ; MOVREL-NEXT: s_mov_b32 s3, s5
1587 ; MOVREL-NEXT: s_mov_b32 s4, s6
1588 ; MOVREL-NEXT: s_mov_b32 s5, s7
1589 ; MOVREL-NEXT: s_mov_b32 s6, s8
1590 ; MOVREL-NEXT: s_mov_b32 s7, s9
1591 ; MOVREL-NEXT: s_movrels_b32 s0, s0
1592 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
1593 ; MOVREL-NEXT: s_mov_b32 m0, -1
1594 ; MOVREL-NEXT: ds_write_b32 v0, v0
1595 ; MOVREL-NEXT: s_endpgm
1597 ; GFX10-LABEL: dyn_extract_v8p3_s_s:
1598 ; GFX10: ; %bb.0: ; %entry
1599 ; GFX10-NEXT: s_mov_b32 s0, s2
1600 ; GFX10-NEXT: s_mov_b32 m0, s10
1601 ; GFX10-NEXT: s_mov_b32 s1, s3
1602 ; GFX10-NEXT: s_mov_b32 s2, s4
1603 ; GFX10-NEXT: s_mov_b32 s3, s5
1604 ; GFX10-NEXT: s_mov_b32 s4, s6
1605 ; GFX10-NEXT: s_mov_b32 s5, s7
1606 ; GFX10-NEXT: s_mov_b32 s6, s8
1607 ; GFX10-NEXT: s_mov_b32 s7, s9
1608 ; GFX10-NEXT: s_movrels_b32 s0, s0
1609 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1610 ; GFX10-NEXT: ds_write_b32 v0, v0
1611 ; GFX10-NEXT: s_endpgm
1613 ; GFX11-LABEL: dyn_extract_v8p3_s_s:
1614 ; GFX11: ; %bb.0: ; %entry
1615 ; GFX11-NEXT: s_mov_b32 s0, s2
1616 ; GFX11-NEXT: s_mov_b32 m0, s10
1617 ; GFX11-NEXT: s_mov_b32 s1, s3
1618 ; GFX11-NEXT: s_mov_b32 s2, s4
1619 ; GFX11-NEXT: s_mov_b32 s3, s5
1620 ; GFX11-NEXT: s_mov_b32 s4, s6
1621 ; GFX11-NEXT: s_mov_b32 s5, s7
1622 ; GFX11-NEXT: s_mov_b32 s6, s8
1623 ; GFX11-NEXT: s_mov_b32 s7, s9
1624 ; GFX11-NEXT: s_movrels_b32 s0, s0
1625 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
1626 ; GFX11-NEXT: ds_store_b32 v0, v0
1627 ; GFX11-NEXT: s_endpgm
1629 %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
1630 store ptr addrspace(3) %ext, ptr addrspace(3) undef
1634 define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) {
1635 ; GCN-LABEL: dyn_extract_v8p1_v_v:
1636 ; GCN: ; %bb.0: ; %entry
1637 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1639 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1640 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1641 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1642 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1643 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1644 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1645 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1646 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1647 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1648 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1649 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
1650 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1651 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
1652 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
1653 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1654 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
1655 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
1656 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1657 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
1658 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
1659 ; GCN-NEXT: s_setpc_b64 s[30:31]
1661 ; GFX10-LABEL: dyn_extract_v8p1_v_v:
1662 ; GFX10: ; %bb.0: ; %entry
1663 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1664 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1665 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1666 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1667 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1668 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1669 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1670 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1671 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1672 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1673 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1674 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1675 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1676 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1677 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1678 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1679 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1680 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1681 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1682 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1683 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1684 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1685 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1687 ; GFX11-LABEL: dyn_extract_v8p1_v_v:
1688 ; GFX11: ; %bb.0: ; %entry
1689 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
1691 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
1692 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1693 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
1694 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16
1695 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
1696 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1697 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
1698 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16
1699 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
1700 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1701 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
1702 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16
1703 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
1704 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1706 %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
1707 ret ptr addrspace(1) %ext
1710 define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) {
1711 ; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
1712 ; GPRIDX: ; %bb.0: ; %entry
1713 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1714 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1715 ; GPRIDX-NEXT: s_mov_b32 m0, s18
1716 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1717 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1718 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1719 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1720 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1721 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1722 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1723 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1724 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1725 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1726 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1727 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1728 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1729 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1730 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
1731 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
1732 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
1733 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1734 ; GPRIDX-NEXT: s_endpgm
1736 ; MOVREL-LABEL: dyn_extract_v8p1_s_s:
1737 ; MOVREL: ; %bb.0: ; %entry
1738 ; MOVREL-NEXT: s_mov_b32 s0, s2
1739 ; MOVREL-NEXT: s_mov_b32 s1, s3
1740 ; MOVREL-NEXT: s_mov_b32 m0, s18
1741 ; MOVREL-NEXT: s_mov_b32 s2, s4
1742 ; MOVREL-NEXT: s_mov_b32 s3, s5
1743 ; MOVREL-NEXT: s_mov_b32 s4, s6
1744 ; MOVREL-NEXT: s_mov_b32 s5, s7
1745 ; MOVREL-NEXT: s_mov_b32 s6, s8
1746 ; MOVREL-NEXT: s_mov_b32 s7, s9
1747 ; MOVREL-NEXT: s_mov_b32 s8, s10
1748 ; MOVREL-NEXT: s_mov_b32 s9, s11
1749 ; MOVREL-NEXT: s_mov_b32 s10, s12
1750 ; MOVREL-NEXT: s_mov_b32 s11, s13
1751 ; MOVREL-NEXT: s_mov_b32 s12, s14
1752 ; MOVREL-NEXT: s_mov_b32 s13, s15
1753 ; MOVREL-NEXT: s_mov_b32 s14, s16
1754 ; MOVREL-NEXT: s_mov_b32 s15, s17
1755 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
1756 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
1757 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
1758 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
1759 ; MOVREL-NEXT: s_endpgm
1761 ; GFX10-LABEL: dyn_extract_v8p1_s_s:
1762 ; GFX10: ; %bb.0: ; %entry
1763 ; GFX10-NEXT: s_mov_b32 s0, s2
1764 ; GFX10-NEXT: s_mov_b32 s1, s3
1765 ; GFX10-NEXT: s_mov_b32 m0, s18
1766 ; GFX10-NEXT: s_mov_b32 s2, s4
1767 ; GFX10-NEXT: s_mov_b32 s3, s5
1768 ; GFX10-NEXT: s_mov_b32 s4, s6
1769 ; GFX10-NEXT: s_mov_b32 s5, s7
1770 ; GFX10-NEXT: s_mov_b32 s6, s8
1771 ; GFX10-NEXT: s_mov_b32 s7, s9
1772 ; GFX10-NEXT: s_mov_b32 s8, s10
1773 ; GFX10-NEXT: s_mov_b32 s9, s11
1774 ; GFX10-NEXT: s_mov_b32 s10, s12
1775 ; GFX10-NEXT: s_mov_b32 s11, s13
1776 ; GFX10-NEXT: s_mov_b32 s12, s14
1777 ; GFX10-NEXT: s_mov_b32 s13, s15
1778 ; GFX10-NEXT: s_mov_b32 s14, s16
1779 ; GFX10-NEXT: s_mov_b32 s15, s17
1780 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1]
1781 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1782 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1783 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1784 ; GFX10-NEXT: s_endpgm
1786 ; GFX11-LABEL: dyn_extract_v8p1_s_s:
1787 ; GFX11: ; %bb.0: ; %entry
1788 ; GFX11-NEXT: s_mov_b32 s0, s2
1789 ; GFX11-NEXT: s_mov_b32 s1, s3
1790 ; GFX11-NEXT: s_mov_b32 m0, s18
1791 ; GFX11-NEXT: s_mov_b32 s2, s4
1792 ; GFX11-NEXT: s_mov_b32 s3, s5
1793 ; GFX11-NEXT: s_mov_b32 s4, s6
1794 ; GFX11-NEXT: s_mov_b32 s5, s7
1795 ; GFX11-NEXT: s_mov_b32 s6, s8
1796 ; GFX11-NEXT: s_mov_b32 s7, s9
1797 ; GFX11-NEXT: s_mov_b32 s8, s10
1798 ; GFX11-NEXT: s_mov_b32 s9, s11
1799 ; GFX11-NEXT: s_mov_b32 s10, s12
1800 ; GFX11-NEXT: s_mov_b32 s11, s13
1801 ; GFX11-NEXT: s_mov_b32 s12, s14
1802 ; GFX11-NEXT: s_mov_b32 s13, s15
1803 ; GFX11-NEXT: s_mov_b32 s14, s16
1804 ; GFX11-NEXT: s_mov_b32 s15, s17
1805 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1]
1806 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1807 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
1808 ; GFX11-NEXT: s_nop 0
1809 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1810 ; GFX11-NEXT: s_endpgm
1812 %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
1813 store ptr addrspace(1) %ext, ptr addrspace(1) undef
1817 define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) {
1818 ; GPRIDX-LABEL: dyn_extract_v16f32_v_s:
1819 ; GPRIDX: ; %bb.0: ; %entry
1820 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
1821 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
1822 ; GPRIDX-NEXT: s_set_gpr_idx_off
1823 ; GPRIDX-NEXT: ; return to shader part epilog
1825 ; MOVREL-LABEL: dyn_extract_v16f32_v_s:
1826 ; MOVREL: ; %bb.0: ; %entry
1827 ; MOVREL-NEXT: s_mov_b32 m0, s2
1828 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
1829 ; MOVREL-NEXT: ; return to shader part epilog
1831 ; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s:
1832 ; GFX10PLUS: ; %bb.0: ; %entry
1833 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1834 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
1835 ; GFX10PLUS-NEXT: ; return to shader part epilog
1837 %ext = extractelement <16 x float> %vec, i32 %sel
1841 define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) {
1842 ; GPRIDX-LABEL: dyn_extract_v32f32_v_s:
1843 ; GPRIDX: ; %bb.0: ; %entry
1844 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
1845 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
1846 ; GPRIDX-NEXT: s_set_gpr_idx_off
1847 ; GPRIDX-NEXT: ; return to shader part epilog
1849 ; MOVREL-LABEL: dyn_extract_v32f32_v_s:
1850 ; MOVREL: ; %bb.0: ; %entry
1851 ; MOVREL-NEXT: s_mov_b32 m0, s2
1852 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
1853 ; MOVREL-NEXT: ; return to shader part epilog
1855 ; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s:
1856 ; GFX10PLUS: ; %bb.0: ; %entry
1857 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1858 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
1859 ; GFX10PLUS-NEXT: ; return to shader part epilog
1861 %ext = extractelement <32 x float> %vec, i32 %sel
1865 define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) {
1866 ; GPRIDX-LABEL: dyn_extract_v16f64_v_s:
1867 ; GPRIDX: ; %bb.0: ; %entry
1868 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
1869 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
1870 ; GPRIDX-NEXT: v_mov_b32_e32 v32, v0
1871 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
1872 ; GPRIDX-NEXT: s_set_gpr_idx_off
1873 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32
1874 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0
1875 ; GPRIDX-NEXT: ; return to shader part epilog
1877 ; MOVREL-LABEL: dyn_extract_v16f64_v_s:
1878 ; MOVREL: ; %bb.0: ; %entry
1879 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
1880 ; MOVREL-NEXT: v_movrels_b32_e32 v32, v0
1881 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1
1882 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v32
1883 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
1884 ; MOVREL-NEXT: ; return to shader part epilog
1886 ; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s:
1887 ; GFX10PLUS: ; %bb.0: ; %entry
1888 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
1889 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v32, v0
1890 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1
1891 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v32
1892 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0
1893 ; GFX10PLUS-NEXT: ; return to shader part epilog
1895 %ext = extractelement <16 x double> %vec, i32 %sel
1899 define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) {
1900 ; GCN-LABEL: dyn_extract_v16f32_s_s:
1901 ; GCN: ; %bb.0: ; %entry
1902 ; GCN-NEXT: s_mov_b32 s4, 1.0
1903 ; GCN-NEXT: s_mov_b32 m0, s2
1904 ; GCN-NEXT: s_mov_b32 s19, 0x41800000
1905 ; GCN-NEXT: s_mov_b32 s18, 0x41700000
1906 ; GCN-NEXT: s_mov_b32 s17, 0x41600000
1907 ; GCN-NEXT: s_mov_b32 s16, 0x41500000
1908 ; GCN-NEXT: s_mov_b32 s15, 0x41400000
1909 ; GCN-NEXT: s_mov_b32 s14, 0x41300000
1910 ; GCN-NEXT: s_mov_b32 s13, 0x41200000
1911 ; GCN-NEXT: s_mov_b32 s12, 0x41100000
1912 ; GCN-NEXT: s_mov_b32 s11, 0x41000000
1913 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000
1914 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000
1915 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000
1916 ; GCN-NEXT: s_mov_b32 s7, 4.0
1917 ; GCN-NEXT: s_mov_b32 s6, 0x40400000
1918 ; GCN-NEXT: s_mov_b32 s5, 2.0
1919 ; GCN-NEXT: s_movrels_b32 s0, s4
1920 ; GCN-NEXT: v_mov_b32_e32 v0, s0
1921 ; GCN-NEXT: ; return to shader part epilog
1923 ; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s:
1924 ; GFX10PLUS: ; %bb.0: ; %entry
1925 ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0
1926 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1927 ; GFX10PLUS-NEXT: s_mov_b32 s19, 0x41800000
1928 ; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000
1929 ; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000
1930 ; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000
1931 ; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000
1932 ; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000
1933 ; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000
1934 ; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000
1935 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000
1936 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000
1937 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000
1938 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000
1939 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0
1940 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000
1941 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0
1942 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4
1943 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
1944 ; GFX10PLUS-NEXT: ; return to shader part epilog
1946 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
1950 define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) {
1951 ; GCN-LABEL: dyn_extract_v32f32_s_s:
1952 ; GCN: ; %bb.0: ; %entry
1953 ; GCN-NEXT: s_mov_b32 s36, 1.0
1954 ; GCN-NEXT: s_mov_b32 m0, s2
1955 ; GCN-NEXT: s_mov_b32 s67, 0x42000000
1956 ; GCN-NEXT: s_mov_b32 s66, 0x41f80000
1957 ; GCN-NEXT: s_mov_b32 s65, 0x41f00000
1958 ; GCN-NEXT: s_mov_b32 s64, 0x41e80000
1959 ; GCN-NEXT: s_mov_b32 s63, 0x41e00000
1960 ; GCN-NEXT: s_mov_b32 s62, 0x41d80000
1961 ; GCN-NEXT: s_mov_b32 s61, 0x41d00000
1962 ; GCN-NEXT: s_mov_b32 s60, 0x41c80000
1963 ; GCN-NEXT: s_mov_b32 s59, 0x41c00000
1964 ; GCN-NEXT: s_mov_b32 s58, 0x41b80000
1965 ; GCN-NEXT: s_mov_b32 s57, 0x41b00000
1966 ; GCN-NEXT: s_mov_b32 s56, 0x41a80000
1967 ; GCN-NEXT: s_mov_b32 s55, 0x41a00000
1968 ; GCN-NEXT: s_mov_b32 s54, 0x41980000
1969 ; GCN-NEXT: s_mov_b32 s53, 0x41900000
1970 ; GCN-NEXT: s_mov_b32 s52, 0x41880000
1971 ; GCN-NEXT: s_mov_b32 s51, 0x41800000
1972 ; GCN-NEXT: s_mov_b32 s50, 0x41700000
1973 ; GCN-NEXT: s_mov_b32 s49, 0x41600000
1974 ; GCN-NEXT: s_mov_b32 s48, 0x41500000
1975 ; GCN-NEXT: s_mov_b32 s47, 0x41400000
1976 ; GCN-NEXT: s_mov_b32 s46, 0x41300000
1977 ; GCN-NEXT: s_mov_b32 s45, 0x41200000
1978 ; GCN-NEXT: s_mov_b32 s44, 0x41100000
1979 ; GCN-NEXT: s_mov_b32 s43, 0x41000000
1980 ; GCN-NEXT: s_mov_b32 s42, 0x40e00000
1981 ; GCN-NEXT: s_mov_b32 s41, 0x40c00000
1982 ; GCN-NEXT: s_mov_b32 s40, 0x40a00000
1983 ; GCN-NEXT: s_mov_b32 s39, 4.0
1984 ; GCN-NEXT: s_mov_b32 s38, 0x40400000
1985 ; GCN-NEXT: s_mov_b32 s37, 2.0
1986 ; GCN-NEXT: s_movrels_b32 s0, s36
1987 ; GCN-NEXT: v_mov_b32_e32 v0, s0
1988 ; GCN-NEXT: ; return to shader part epilog
1990 ; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s:
1991 ; GFX10PLUS: ; %bb.0: ; %entry
1992 ; GFX10PLUS-NEXT: s_mov_b32 s36, 1.0
1993 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
1994 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x42000000
1995 ; GFX10PLUS-NEXT: s_mov_b32 s66, 0x41f80000
1996 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x41f00000
1997 ; GFX10PLUS-NEXT: s_mov_b32 s64, 0x41e80000
1998 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x41e00000
1999 ; GFX10PLUS-NEXT: s_mov_b32 s62, 0x41d80000
2000 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x41d00000
2001 ; GFX10PLUS-NEXT: s_mov_b32 s60, 0x41c80000
2002 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x41c00000
2003 ; GFX10PLUS-NEXT: s_mov_b32 s58, 0x41b80000
2004 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x41b00000
2005 ; GFX10PLUS-NEXT: s_mov_b32 s56, 0x41a80000
2006 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x41a00000
2007 ; GFX10PLUS-NEXT: s_mov_b32 s54, 0x41980000
2008 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x41900000
2009 ; GFX10PLUS-NEXT: s_mov_b32 s52, 0x41880000
2010 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x41800000
2011 ; GFX10PLUS-NEXT: s_mov_b32 s50, 0x41700000
2012 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x41600000
2013 ; GFX10PLUS-NEXT: s_mov_b32 s48, 0x41500000
2014 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x41400000
2015 ; GFX10PLUS-NEXT: s_mov_b32 s46, 0x41300000
2016 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x41200000
2017 ; GFX10PLUS-NEXT: s_mov_b32 s44, 0x41100000
2018 ; GFX10PLUS-NEXT: s_mov_b32 s43, 0x41000000
2019 ; GFX10PLUS-NEXT: s_mov_b32 s42, 0x40e00000
2020 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40c00000
2021 ; GFX10PLUS-NEXT: s_mov_b32 s40, 0x40a00000
2022 ; GFX10PLUS-NEXT: s_mov_b32 s39, 4.0
2023 ; GFX10PLUS-NEXT: s_mov_b32 s38, 0x40400000
2024 ; GFX10PLUS-NEXT: s_mov_b32 s37, 2.0
2025 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s36
2026 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2027 ; GFX10PLUS-NEXT: ; return to shader part epilog
2029 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
2033 define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
2034 ; GCN-LABEL: dyn_extract_v16f64_s_s:
2035 ; GCN: ; %bb.0: ; %entry
2036 ; GCN-NEXT: s_mov_b32 s66, 0
2037 ; GCN-NEXT: s_mov_b32 s64, 0
2038 ; GCN-NEXT: s_mov_b32 s62, 0
2039 ; GCN-NEXT: s_mov_b32 s60, 0
2040 ; GCN-NEXT: s_mov_b32 s58, 0
2041 ; GCN-NEXT: s_mov_b32 s56, 0
2042 ; GCN-NEXT: s_mov_b32 s54, 0
2043 ; GCN-NEXT: s_mov_b32 s52, 0
2044 ; GCN-NEXT: s_mov_b32 s50, 0
2045 ; GCN-NEXT: s_mov_b32 s48, 0
2046 ; GCN-NEXT: s_mov_b32 s46, 0
2047 ; GCN-NEXT: s_mov_b32 s44, 0
2048 ; GCN-NEXT: s_mov_b32 s40, 0
2049 ; GCN-NEXT: s_mov_b64 s[36:37], 1.0
2050 ; GCN-NEXT: s_mov_b32 m0, s2
2051 ; GCN-NEXT: s_mov_b32 s67, 0x40300000
2052 ; GCN-NEXT: s_mov_b32 s65, 0x402e0000
2053 ; GCN-NEXT: s_mov_b32 s63, 0x402c0000
2054 ; GCN-NEXT: s_mov_b32 s61, 0x402a0000
2055 ; GCN-NEXT: s_mov_b32 s59, 0x40280000
2056 ; GCN-NEXT: s_mov_b32 s57, 0x40260000
2057 ; GCN-NEXT: s_mov_b32 s55, 0x40240000
2058 ; GCN-NEXT: s_mov_b32 s53, 0x40220000
2059 ; GCN-NEXT: s_mov_b32 s51, 0x40200000
2060 ; GCN-NEXT: s_mov_b32 s49, 0x401c0000
2061 ; GCN-NEXT: s_mov_b32 s47, 0x40180000
2062 ; GCN-NEXT: s_mov_b32 s45, 0x40140000
2063 ; GCN-NEXT: s_mov_b64 s[42:43], 4.0
2064 ; GCN-NEXT: s_mov_b32 s41, 0x40080000
2065 ; GCN-NEXT: s_mov_b64 s[38:39], 2.0
2066 ; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37]
2067 ; GCN-NEXT: ; return to shader part epilog
2069 ; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s:
2070 ; GFX10PLUS: ; %bb.0: ; %entry
2071 ; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0
2072 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2073 ; GFX10PLUS-NEXT: s_mov_b32 s66, 0
2074 ; GFX10PLUS-NEXT: s_mov_b32 s64, 0
2075 ; GFX10PLUS-NEXT: s_mov_b32 s62, 0
2076 ; GFX10PLUS-NEXT: s_mov_b32 s60, 0
2077 ; GFX10PLUS-NEXT: s_mov_b32 s58, 0
2078 ; GFX10PLUS-NEXT: s_mov_b32 s56, 0
2079 ; GFX10PLUS-NEXT: s_mov_b32 s54, 0
2080 ; GFX10PLUS-NEXT: s_mov_b32 s52, 0
2081 ; GFX10PLUS-NEXT: s_mov_b32 s50, 0
2082 ; GFX10PLUS-NEXT: s_mov_b32 s48, 0
2083 ; GFX10PLUS-NEXT: s_mov_b32 s46, 0
2084 ; GFX10PLUS-NEXT: s_mov_b32 s44, 0
2085 ; GFX10PLUS-NEXT: s_mov_b32 s40, 0
2086 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000
2087 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000
2088 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000
2089 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000
2090 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000
2091 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000
2092 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000
2093 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000
2094 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000
2095 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000
2096 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000
2097 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000
2098 ; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0
2099 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000
2100 ; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0
2101 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37]
2102 ; GFX10PLUS-NEXT: ; return to shader part epilog
2104 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
2108 define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) {
2109 ; GCN-LABEL: dyn_extract_v6f32_s_v:
2110 ; GCN: ; %bb.0: ; %entry
2111 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2112 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2113 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2114 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2115 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2116 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2117 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2118 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2119 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2120 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2121 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
2122 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2123 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2124 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2125 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2126 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc
2127 ; GCN-NEXT: ; return to shader part epilog
2129 ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v:
2130 ; GFX10PLUS: ; %bb.0: ; %entry
2131 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
2132 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2133 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2134 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2135 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2136 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2137 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2138 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2139 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2140 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2141 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo
2142 ; GFX10PLUS-NEXT: ; return to shader part epilog
2144 %ext = extractelement <6 x float> %vec, i32 %sel
2148 define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) {
2149 ; GCN-LABEL: dyn_extract_v6f32_v_v:
2150 ; GCN: ; %bb.0: ; %entry
2151 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2152 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
2153 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2154 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6
2155 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2156 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6
2157 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2158 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6
2159 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2160 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6
2161 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2162 ; GCN-NEXT: s_setpc_b64 s[30:31]
2164 ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v:
2165 ; GFX10PLUS: ; %bb.0: ; %entry
2166 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2167 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
2168 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2169 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6
2170 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2171 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6
2172 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2173 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6
2174 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2175 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6
2176 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2177 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
2179 %ext = extractelement <6 x float> %vec, i32 %sel
2183 define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) {
2184 ; GCN-LABEL: dyn_extract_v6f32_v_s:
2185 ; GCN: ; %bb.0: ; %entry
2186 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
2187 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2188 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
2189 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2190 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
2191 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2192 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
2193 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2194 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
2195 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2196 ; GCN-NEXT: ; return to shader part epilog
2198 ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s:
2199 ; GFX10PLUS: ; %bb.0: ; %entry
2200 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
2201 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2202 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
2203 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2204 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
2205 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2206 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
2207 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2208 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5
2209 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2210 ; GFX10PLUS-NEXT: ; return to shader part epilog
2212 %ext = extractelement <6 x float> %vec, i32 %sel
2216 define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) {
2217 ; GCN-LABEL: dyn_extract_v6f32_s_s:
2218 ; GCN: ; %bb.0: ; %entry
2219 ; GCN-NEXT: s_cmp_eq_u32 s8, 1
2220 ; GCN-NEXT: s_cselect_b32 s0, s3, s2
2221 ; GCN-NEXT: s_cmp_eq_u32 s8, 2
2222 ; GCN-NEXT: s_cselect_b32 s0, s4, s0
2223 ; GCN-NEXT: s_cmp_eq_u32 s8, 3
2224 ; GCN-NEXT: s_cselect_b32 s0, s5, s0
2225 ; GCN-NEXT: s_cmp_eq_u32 s8, 4
2226 ; GCN-NEXT: s_cselect_b32 s0, s6, s0
2227 ; GCN-NEXT: s_cmp_eq_u32 s8, 5
2228 ; GCN-NEXT: s_cselect_b32 s0, s7, s0
2229 ; GCN-NEXT: v_mov_b32_e32 v0, s0
2230 ; GCN-NEXT: ; return to shader part epilog
2232 ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s:
2233 ; GFX10PLUS: ; %bb.0: ; %entry
2234 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1
2235 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2
2236 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2
2237 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0
2238 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3
2239 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0
2240 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4
2241 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0
2242 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 5
2243 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0
2244 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2245 ; GFX10PLUS-NEXT: ; return to shader part epilog
2247 %ext = extractelement <6 x float> %vec, i32 %sel
2251 define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) {
2252 ; GCN-LABEL: dyn_extract_v7f32_s_v:
2253 ; GCN: ; %bb.0: ; %entry
2254 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2255 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2256 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2257 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2258 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2259 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2260 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2261 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2262 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2263 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2264 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
2265 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2266 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2267 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2268 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2269 ; GCN-NEXT: v_mov_b32_e32 v7, s8
2270 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
2271 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2272 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc
2273 ; GCN-NEXT: ; return to shader part epilog
2275 ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v:
2276 ; GFX10PLUS: ; %bb.0: ; %entry
2277 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
2278 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2279 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2280 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2281 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2282 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2283 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2284 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2285 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2286 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2287 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo
2288 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2289 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s8, vcc_lo
2290 ; GFX10PLUS-NEXT: ; return to shader part epilog
2292 %ext = extractelement <7 x float> %vec, i32 %sel
2296 define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) {
2297 ; GCN-LABEL: dyn_extract_v7f32_v_v:
2298 ; GCN: ; %bb.0: ; %entry
2299 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2300 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
2301 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2302 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7
2303 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2304 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7
2305 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2306 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7
2307 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2308 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7
2309 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2310 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7
2311 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2312 ; GCN-NEXT: s_setpc_b64 s[30:31]
2314 ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v:
2315 ; GFX10PLUS: ; %bb.0: ; %entry
2316 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2317 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
2318 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2319 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7
2320 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2321 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7
2322 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2323 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7
2324 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2325 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7
2326 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2327 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7
2328 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2329 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
2331 %ext = extractelement <7 x float> %vec, i32 %sel
2335 define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) {
2336 ; GCN-LABEL: dyn_extract_v7f32_v_s:
2337 ; GCN: ; %bb.0: ; %entry
2338 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
2339 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2340 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
2341 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2342 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
2343 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2344 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
2345 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2346 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
2347 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2348 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
2349 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2350 ; GCN-NEXT: ; return to shader part epilog
2352 ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s:
2353 ; GFX10PLUS: ; %bb.0: ; %entry
2354 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
2355 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2356 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
2357 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2358 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
2359 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2360 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
2361 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2362 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5
2363 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2364 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6
2365 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2366 ; GFX10PLUS-NEXT: ; return to shader part epilog
2368 %ext = extractelement <7 x float> %vec, i32 %sel
2372 define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) {
2373 ; GCN-LABEL: dyn_extract_v7f32_s_s:
2374 ; GCN: ; %bb.0: ; %entry
2375 ; GCN-NEXT: s_cmp_eq_u32 s9, 1
2376 ; GCN-NEXT: s_cselect_b32 s0, s3, s2
2377 ; GCN-NEXT: s_cmp_eq_u32 s9, 2
2378 ; GCN-NEXT: s_cselect_b32 s0, s4, s0
2379 ; GCN-NEXT: s_cmp_eq_u32 s9, 3
2380 ; GCN-NEXT: s_cselect_b32 s0, s5, s0
2381 ; GCN-NEXT: s_cmp_eq_u32 s9, 4
2382 ; GCN-NEXT: s_cselect_b32 s0, s6, s0
2383 ; GCN-NEXT: s_cmp_eq_u32 s9, 5
2384 ; GCN-NEXT: s_cselect_b32 s0, s7, s0
2385 ; GCN-NEXT: s_cmp_eq_u32 s9, 6
2386 ; GCN-NEXT: s_cselect_b32 s0, s8, s0
2387 ; GCN-NEXT: v_mov_b32_e32 v0, s0
2388 ; GCN-NEXT: ; return to shader part epilog
2390 ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s:
2391 ; GFX10PLUS: ; %bb.0: ; %entry
2392 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 1
2393 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2
2394 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 2
2395 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0
2396 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 3
2397 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0
2398 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 4
2399 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0
2400 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 5
2401 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0
2402 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 6
2403 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s8, s0
2404 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
2405 ; GFX10PLUS-NEXT: ; return to shader part epilog
2407 %ext = extractelement <7 x float> %vec, i32 %sel
2411 define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) {
2412 ; GCN-LABEL: dyn_extract_v6f64_s_v:
2413 ; GCN: ; %bb.0: ; %entry
2414 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2415 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2416 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2417 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2418 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2419 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2420 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2421 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2422 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2423 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2424 ; GCN-NEXT: v_mov_b32_e32 v7, s8
2425 ; GCN-NEXT: v_mov_b32_e32 v8, s9
2426 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2427 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2428 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2429 ; GCN-NEXT: v_mov_b32_e32 v9, s10
2430 ; GCN-NEXT: v_mov_b32_e32 v10, s11
2431 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2432 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2433 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2434 ; GCN-NEXT: v_mov_b32_e32 v11, s12
2435 ; GCN-NEXT: v_mov_b32_e32 v12, s13
2436 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2437 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2438 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2439 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc
2440 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc
2441 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2442 ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2443 ; GCN-NEXT: ; return to shader part epilog
2445 ; GFX10-LABEL: dyn_extract_v6f64_s_v:
2446 ; GFX10: ; %bb.0: ; %entry
2447 ; GFX10-NEXT: v_mov_b32_e32 v1, s4
2448 ; GFX10-NEXT: v_mov_b32_e32 v2, s5
2449 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2450 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2451 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2452 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2453 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2454 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2455 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2456 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2457 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2458 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2459 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2460 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2461 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2462 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2463 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2464 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2465 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2466 ; GFX10-NEXT: ; return to shader part epilog
2468 ; GFX11-LABEL: dyn_extract_v6f64_s_v:
2469 ; GFX11: ; %bb.0: ; %entry
2470 ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2471 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2472 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2473 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2474 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2475 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2476 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2477 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2478 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2479 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2480 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2481 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2482 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2483 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2484 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2485 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2486 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2487 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2488 ; GFX11-NEXT: ; return to shader part epilog
2490 %ext = extractelement <6 x double> %vec, i32 %sel
2494 define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) {
2495 ; GCN-LABEL: dyn_extract_v6f64_v_v:
2496 ; GCN: ; %bb.0: ; %entry
2497 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2498 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12
2499 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2500 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2501 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12
2502 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2503 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2504 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12
2505 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2506 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2507 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12
2508 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
2509 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2510 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12
2511 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
2512 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2513 ; GCN-NEXT: s_setpc_b64 s[30:31]
2515 ; GFX10-LABEL: dyn_extract_v6f64_v_v:
2516 ; GFX10: ; %bb.0: ; %entry
2517 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2518 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
2519 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2520 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2521 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
2522 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2523 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2524 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
2525 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2526 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2527 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
2528 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2529 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2530 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12
2531 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2532 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2533 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2535 ; GFX11-LABEL: dyn_extract_v6f64_v_v:
2536 ; GFX11: ; %bb.0: ; %entry
2537 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2538 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
2539 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2540 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
2541 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2542 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
2543 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2544 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
2545 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2546 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12
2547 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2548 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2550 %ext = extractelement <6 x double> %vec, i32 %sel
2554 define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) {
2555 ; GPRIDX-LABEL: dyn_extract_v6f64_v_s:
2556 ; GPRIDX: ; %bb.0: ; %entry
2557 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
2558 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
2559 ; GPRIDX-NEXT: v_mov_b32_e32 v12, v0
2560 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
2561 ; GPRIDX-NEXT: s_set_gpr_idx_off
2562 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12
2563 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0
2564 ; GPRIDX-NEXT: ; return to shader part epilog
2566 ; MOVREL-LABEL: dyn_extract_v6f64_v_s:
2567 ; MOVREL: ; %bb.0: ; %entry
2568 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
2569 ; MOVREL-NEXT: v_movrels_b32_e32 v12, v0
2570 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1
2571 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v12
2572 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
2573 ; MOVREL-NEXT: ; return to shader part epilog
2575 ; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s:
2576 ; GFX10PLUS: ; %bb.0: ; %entry
2577 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
2578 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v12, v0
2579 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1
2580 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v12
2581 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0
2582 ; GFX10PLUS-NEXT: ; return to shader part epilog
2584 %ext = extractelement <6 x double> %vec, i32 %sel
2588 define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) {
2589 ; GCN-LABEL: dyn_extract_v6f64_s_s:
2590 ; GCN: ; %bb.0: ; %entry
2591 ; GCN-NEXT: s_mov_b32 s0, s2
2592 ; GCN-NEXT: s_mov_b32 s1, s3
2593 ; GCN-NEXT: s_mov_b32 m0, s14
2594 ; GCN-NEXT: s_mov_b32 s2, s4
2595 ; GCN-NEXT: s_mov_b32 s3, s5
2596 ; GCN-NEXT: s_mov_b32 s4, s6
2597 ; GCN-NEXT: s_mov_b32 s5, s7
2598 ; GCN-NEXT: s_mov_b32 s6, s8
2599 ; GCN-NEXT: s_mov_b32 s7, s9
2600 ; GCN-NEXT: s_mov_b32 s8, s10
2601 ; GCN-NEXT: s_mov_b32 s9, s11
2602 ; GCN-NEXT: s_mov_b32 s10, s12
2603 ; GCN-NEXT: s_mov_b32 s11, s13
2604 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1]
2605 ; GCN-NEXT: ; return to shader part epilog
2607 ; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s:
2608 ; GFX10PLUS: ; %bb.0: ; %entry
2609 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2610 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2611 ; GFX10PLUS-NEXT: s_mov_b32 m0, s14
2612 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2613 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
2614 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
2615 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
2616 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
2617 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
2618 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
2619 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
2620 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
2621 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
2622 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1]
2623 ; GFX10PLUS-NEXT: ; return to shader part epilog
2625 %ext = extractelement <6 x double> %vec, i32 %sel
2629 define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userData, i32 %sel) {
2630 ; GCN-LABEL: dyn_extract_v7f64_s_v_bitcast:
2631 ; GCN: ; %bb.0: ; %entry
2632 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2633 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2634 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2635 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2636 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2637 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2638 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2639 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2640 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2641 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2642 ; GCN-NEXT: v_mov_b32_e32 v7, s8
2643 ; GCN-NEXT: v_mov_b32_e32 v8, s9
2644 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2645 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2646 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2647 ; GCN-NEXT: v_mov_b32_e32 v9, s10
2648 ; GCN-NEXT: v_mov_b32_e32 v10, s11
2649 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2650 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2651 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2652 ; GCN-NEXT: v_mov_b32_e32 v11, s12
2653 ; GCN-NEXT: v_mov_b32_e32 v12, s13
2654 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2655 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2656 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2657 ; GCN-NEXT: v_mov_b32_e32 v13, s14
2658 ; GCN-NEXT: v_mov_b32_e32 v14, s15
2659 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2660 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
2661 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2662 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2663 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
2664 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
2665 ; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec
2666 ; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec
2667 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
2668 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
2669 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2670 ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2671 ; GCN-NEXT: ; return to shader part epilog
2673 ; GFX10-LABEL: dyn_extract_v7f64_s_v_bitcast:
2674 ; GFX10: ; %bb.0: ; %entry
2675 ; GFX10-NEXT: v_mov_b32_e32 v1, s4
2676 ; GFX10-NEXT: v_mov_b32_e32 v2, s5
2677 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2678 ; GFX10-NEXT: s_mov_b32 s0, s14
2679 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2680 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2681 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2682 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2683 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2684 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2685 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2686 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2687 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2688 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2689 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2690 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2691 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2692 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2693 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2694 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2695 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2696 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2697 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2698 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2699 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2700 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2701 ; GFX10-NEXT: ; return to shader part epilog
2703 ; GFX11-LABEL: dyn_extract_v7f64_s_v_bitcast:
2704 ; GFX11: ; %bb.0: ; %entry
2705 ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2706 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2707 ; GFX11-NEXT: s_mov_b32 s0, s14
2708 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2709 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2710 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2711 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2712 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2713 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2714 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2715 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2716 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2717 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2718 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2719 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2720 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2721 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2722 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2723 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2724 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2725 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2726 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2727 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2728 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2729 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2730 ; GFX11-NEXT: ; return to shader part epilog
2732 %bc = bitcast <14 x float> %userData to <7 x double>
2733 %ext = extractelement <7 x double> %bc, i32 %sel
2737 define amdgpu_ps i64 @dyn_extract_v7i64_s_v_bitcast(<14 x i32> inreg %userData, i32 %sel) {
2738 ; GCN-LABEL: dyn_extract_v7i64_s_v_bitcast:
2739 ; GCN: ; %bb.0: ; %entry
2740 ; GCN-NEXT: s_mov_b32 s0, s10
2741 ; GCN-NEXT: s_mov_b32 s1, s11
2742 ; GCN-NEXT: ; return to shader part epilog
2744 ; GFX10PLUS-LABEL: dyn_extract_v7i64_s_v_bitcast:
2745 ; GFX10PLUS: ; %bb.0: ; %entry
2746 ; GFX10PLUS-NEXT: s_mov_b32 s0, s10
2747 ; GFX10PLUS-NEXT: s_mov_b32 s1, s11
2748 ; GFX10PLUS-NEXT: ; return to shader part epilog
2750 %.bc = bitcast <14 x i32> %userData to <7 x i64>
2751 %ext = extractelement <7 x i64> %.bc, i32 4
2755 define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) {
2756 ; GCN-LABEL: dyn_extract_v7f64_s_v:
2757 ; GCN: ; %bb.0: ; %entry
2758 ; GCN-NEXT: v_mov_b32_e32 v1, s2
2759 ; GCN-NEXT: v_mov_b32_e32 v2, s3
2760 ; GCN-NEXT: v_mov_b32_e32 v3, s4
2761 ; GCN-NEXT: v_mov_b32_e32 v4, s5
2762 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
2763 ; GCN-NEXT: v_mov_b32_e32 v5, s6
2764 ; GCN-NEXT: v_mov_b32_e32 v6, s7
2765 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2766 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2767 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
2768 ; GCN-NEXT: v_mov_b32_e32 v7, s8
2769 ; GCN-NEXT: v_mov_b32_e32 v8, s9
2770 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2771 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2772 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
2773 ; GCN-NEXT: v_mov_b32_e32 v9, s10
2774 ; GCN-NEXT: v_mov_b32_e32 v10, s11
2775 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2776 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2777 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2778 ; GCN-NEXT: v_mov_b32_e32 v11, s12
2779 ; GCN-NEXT: v_mov_b32_e32 v12, s13
2780 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2781 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2782 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
2783 ; GCN-NEXT: v_mov_b32_e32 v13, s14
2784 ; GCN-NEXT: v_mov_b32_e32 v14, s15
2785 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2786 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
2787 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
2788 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2789 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
2790 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
2791 ; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec
2792 ; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec
2793 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc
2794 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc
2795 ; GCN-NEXT: v_readfirstlane_b32 s0, v0
2796 ; GCN-NEXT: v_readfirstlane_b32 s1, v1
2797 ; GCN-NEXT: ; return to shader part epilog
2799 ; GFX10-LABEL: dyn_extract_v7f64_s_v:
2800 ; GFX10: ; %bb.0: ; %entry
2801 ; GFX10-NEXT: v_mov_b32_e32 v1, s4
2802 ; GFX10-NEXT: v_mov_b32_e32 v2, s5
2803 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2804 ; GFX10-NEXT: s_mov_b32 s0, s14
2805 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2806 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2807 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2808 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2809 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2810 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2811 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2812 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2813 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2814 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2815 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2816 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2817 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2818 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2819 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2820 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2821 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2822 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2823 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2824 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2825 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2826 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2827 ; GFX10-NEXT: ; return to shader part epilog
2829 ; GFX11-LABEL: dyn_extract_v7f64_s_v:
2830 ; GFX11: ; %bb.0: ; %entry
2831 ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2832 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
2833 ; GFX11-NEXT: s_mov_b32 s0, s14
2834 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2835 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2836 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
2837 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2838 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2839 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
2840 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2841 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2842 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
2843 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2844 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2845 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
2846 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2847 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2848 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
2849 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2850 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2851 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
2852 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2853 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2854 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2855 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2856 ; GFX11-NEXT: ; return to shader part epilog
2858 %ext = extractelement <7 x double> %vec, i32 %sel
2862 define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
2863 ; GCN-LABEL: dyn_extract_v7f64_v_v:
2864 ; GCN: ; %bb.0: ; %entry
2865 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2866 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14
2867 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2868 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
2869 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14
2870 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
2871 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
2872 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14
2873 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2874 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2875 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14
2876 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
2877 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2878 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14
2879 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
2880 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
2881 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14
2882 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
2883 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
2884 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14
2885 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
2886 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
2887 ; GCN-NEXT: s_setpc_b64 s[30:31]
2889 ; GFX10-LABEL: dyn_extract_v7f64_v_v:
2890 ; GFX10: ; %bb.0: ; %entry
2891 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2892 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
2893 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2894 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2895 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14
2896 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2897 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2898 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14
2899 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2900 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2901 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14
2902 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2903 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2904 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14
2905 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2906 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2907 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14
2908 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
2909 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
2910 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14
2911 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
2912 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
2913 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2915 ; GFX11-LABEL: dyn_extract_v7f64_v_v:
2916 ; GFX11: ; %bb.0: ; %entry
2917 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
2919 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2920 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14
2921 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2922 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14
2923 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2924 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14
2925 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2926 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14
2927 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2928 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14
2929 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
2930 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14
2931 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
2932 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2934 %ext = extractelement <7 x double> %vec, i32 %sel
2938 define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) {
2939 ; GPRIDX-LABEL: dyn_extract_v7f64_v_s:
2940 ; GPRIDX: ; %bb.0: ; %entry
2941 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
2942 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
2943 ; GPRIDX-NEXT: v_mov_b32_e32 v14, v0
2944 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
2945 ; GPRIDX-NEXT: s_set_gpr_idx_off
2946 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14
2947 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0
2948 ; GPRIDX-NEXT: ; return to shader part epilog
2950 ; MOVREL-LABEL: dyn_extract_v7f64_v_s:
2951 ; MOVREL: ; %bb.0: ; %entry
2952 ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
2953 ; MOVREL-NEXT: v_movrels_b32_e32 v14, v0
2954 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1
2955 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v14
2956 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
2957 ; MOVREL-NEXT: ; return to shader part epilog
2959 ; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s:
2960 ; GFX10PLUS: ; %bb.0: ; %entry
2961 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
2962 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v14, v0
2963 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1
2964 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v14
2965 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0
2966 ; GFX10PLUS-NEXT: ; return to shader part epilog
2968 %ext = extractelement <7 x double> %vec, i32 %sel
2972 define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) {
2973 ; GCN-LABEL: dyn_extract_v7f64_s_s:
2974 ; GCN: ; %bb.0: ; %entry
2975 ; GCN-NEXT: s_mov_b32 s0, s2
2976 ; GCN-NEXT: s_mov_b32 s1, s3
2977 ; GCN-NEXT: s_mov_b32 m0, s16
2978 ; GCN-NEXT: s_mov_b32 s2, s4
2979 ; GCN-NEXT: s_mov_b32 s3, s5
2980 ; GCN-NEXT: s_mov_b32 s4, s6
2981 ; GCN-NEXT: s_mov_b32 s5, s7
2982 ; GCN-NEXT: s_mov_b32 s6, s8
2983 ; GCN-NEXT: s_mov_b32 s7, s9
2984 ; GCN-NEXT: s_mov_b32 s8, s10
2985 ; GCN-NEXT: s_mov_b32 s9, s11
2986 ; GCN-NEXT: s_mov_b32 s10, s12
2987 ; GCN-NEXT: s_mov_b32 s11, s13
2988 ; GCN-NEXT: s_mov_b32 s12, s14
2989 ; GCN-NEXT: s_mov_b32 s13, s15
2990 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1]
2991 ; GCN-NEXT: ; return to shader part epilog
2993 ; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s:
2994 ; GFX10PLUS: ; %bb.0: ; %entry
2995 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
2996 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
2997 ; GFX10PLUS-NEXT: s_mov_b32 m0, s16
2998 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
2999 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3000 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3001 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3002 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3003 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3004 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3005 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3006 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3007 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3008 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3009 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3010 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1]
3011 ; GFX10PLUS-NEXT: ; return to shader part epilog
3013 %ext = extractelement <7 x double> %vec, i32 %sel
3017 define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) {
3018 ; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
3019 ; GPRIDX: .amd_kernel_code_t
3020 ; GPRIDX-NEXT: amd_code_version_major = 1
3021 ; GPRIDX-NEXT: amd_code_version_minor = 2
3022 ; GPRIDX-NEXT: amd_machine_kind = 1
3023 ; GPRIDX-NEXT: amd_machine_version_major = 9
3024 ; GPRIDX-NEXT: amd_machine_version_minor = 0
3025 ; GPRIDX-NEXT: amd_machine_version_stepping = 0
3026 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
3027 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
3028 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
3029 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
3030 ; GPRIDX-NEXT: priority = 0
3031 ; GPRIDX-NEXT: float_mode = 240
3032 ; GPRIDX-NEXT: priv = 0
3033 ; GPRIDX-NEXT: enable_dx10_clamp = 1
3034 ; GPRIDX-NEXT: debug_mode = 0
3035 ; GPRIDX-NEXT: enable_ieee_mode = 1
3036 ; GPRIDX-NEXT: enable_wgp_mode = 0
3037 ; GPRIDX-NEXT: enable_mem_ordered = 0
3038 ; GPRIDX-NEXT: enable_fwd_progress = 0
3039 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3040 ; GPRIDX-NEXT: user_sgpr_count = 10
3041 ; GPRIDX-NEXT: enable_trap_handler = 0
3042 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1
3043 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 1
3044 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 1
3045 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0
3046 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 2
3047 ; GPRIDX-NEXT: enable_exception_msb = 0
3048 ; GPRIDX-NEXT: granulated_lds_size = 0
3049 ; GPRIDX-NEXT: enable_exception = 0
3050 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1
3051 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 1
3052 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0
3053 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3054 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 1
3055 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0
3056 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0
3057 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3058 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3059 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3060 ; GPRIDX-NEXT: enable_wavefront_size32 = 0
3061 ; GPRIDX-NEXT: enable_ordered_append_gds = 0
3062 ; GPRIDX-NEXT: private_element_size = 1
3063 ; GPRIDX-NEXT: is_ptr64 = 1
3064 ; GPRIDX-NEXT: is_dynamic_callstack = 0
3065 ; GPRIDX-NEXT: is_debug_enabled = 0
3066 ; GPRIDX-NEXT: is_xnack_enabled = 1
3067 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
3068 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
3069 ; GPRIDX-NEXT: gds_segment_byte_size = 0
3070 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28
3071 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0
3072 ; GPRIDX-NEXT: wavefront_sgpr_count = 13
3073 ; GPRIDX-NEXT: workitem_vgpr_count = 3
3074 ; GPRIDX-NEXT: reserved_vgpr_first = 0
3075 ; GPRIDX-NEXT: reserved_vgpr_count = 0
3076 ; GPRIDX-NEXT: reserved_sgpr_first = 0
3077 ; GPRIDX-NEXT: reserved_sgpr_count = 0
3078 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3079 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0
3080 ; GPRIDX-NEXT: kernarg_segment_alignment = 4
3081 ; GPRIDX-NEXT: group_segment_alignment = 4
3082 ; GPRIDX-NEXT: private_segment_alignment = 4
3083 ; GPRIDX-NEXT: wavefront_size = 6
3084 ; GPRIDX-NEXT: call_convention = -1
3085 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0
3086 ; GPRIDX-NEXT: .end_amd_kernel_code_t
3087 ; GPRIDX-NEXT: ; %bb.0: ; %entry
3088 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
3089 ; GPRIDX-NEXT: s_load_dword s8, s[6:7], 0x8
3090 ; GPRIDX-NEXT: s_mov_b32 s4, 0
3091 ; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000
3092 ; GPRIDX-NEXT: s_mov_b32 s2, 0
3093 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000
3094 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
3095 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1
3096 ; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
3097 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2
3098 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
3099 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3
3100 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5]
3101 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4
3102 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3103 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
3104 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
3105 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0
3106 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
3107 ; GPRIDX-NEXT: s_endpgm
3109 ; MOVREL-LABEL: dyn_extract_v5f64_s_s:
3110 ; MOVREL: .amd_kernel_code_t
3111 ; MOVREL-NEXT: amd_code_version_major = 1
3112 ; MOVREL-NEXT: amd_code_version_minor = 2
3113 ; MOVREL-NEXT: amd_machine_kind = 1
3114 ; MOVREL-NEXT: amd_machine_version_major = 8
3115 ; MOVREL-NEXT: amd_machine_version_minor = 0
3116 ; MOVREL-NEXT: amd_machine_version_stepping = 3
3117 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256
3118 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0
3119 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0
3120 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1
3121 ; MOVREL-NEXT: priority = 0
3122 ; MOVREL-NEXT: float_mode = 240
3123 ; MOVREL-NEXT: priv = 0
3124 ; MOVREL-NEXT: enable_dx10_clamp = 1
3125 ; MOVREL-NEXT: debug_mode = 0
3126 ; MOVREL-NEXT: enable_ieee_mode = 1
3127 ; MOVREL-NEXT: enable_wgp_mode = 0
3128 ; MOVREL-NEXT: enable_mem_ordered = 0
3129 ; MOVREL-NEXT: enable_fwd_progress = 0
3130 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3131 ; MOVREL-NEXT: user_sgpr_count = 10
3132 ; MOVREL-NEXT: enable_trap_handler = 0
3133 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1
3134 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 1
3135 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 1
3136 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0
3137 ; MOVREL-NEXT: enable_vgpr_workitem_id = 2
3138 ; MOVREL-NEXT: enable_exception_msb = 0
3139 ; MOVREL-NEXT: granulated_lds_size = 0
3140 ; MOVREL-NEXT: enable_exception = 0
3141 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1
3142 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 1
3143 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0
3144 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3145 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 1
3146 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0
3147 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0
3148 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3149 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3150 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3151 ; MOVREL-NEXT: enable_wavefront_size32 = 0
3152 ; MOVREL-NEXT: enable_ordered_append_gds = 0
3153 ; MOVREL-NEXT: private_element_size = 1
3154 ; MOVREL-NEXT: is_ptr64 = 1
3155 ; MOVREL-NEXT: is_dynamic_callstack = 0
3156 ; MOVREL-NEXT: is_debug_enabled = 0
3157 ; MOVREL-NEXT: is_xnack_enabled = 0
3158 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0
3159 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
3160 ; MOVREL-NEXT: gds_segment_byte_size = 0
3161 ; MOVREL-NEXT: kernarg_segment_byte_size = 28
3162 ; MOVREL-NEXT: workgroup_fbarrier_count = 0
3163 ; MOVREL-NEXT: wavefront_sgpr_count = 9
3164 ; MOVREL-NEXT: workitem_vgpr_count = 4
3165 ; MOVREL-NEXT: reserved_vgpr_first = 0
3166 ; MOVREL-NEXT: reserved_vgpr_count = 0
3167 ; MOVREL-NEXT: reserved_sgpr_first = 0
3168 ; MOVREL-NEXT: reserved_sgpr_count = 0
3169 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3170 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0
3171 ; MOVREL-NEXT: kernarg_segment_alignment = 4
3172 ; MOVREL-NEXT: group_segment_alignment = 4
3173 ; MOVREL-NEXT: private_segment_alignment = 4
3174 ; MOVREL-NEXT: wavefront_size = 6
3175 ; MOVREL-NEXT: call_convention = -1
3176 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0
3177 ; MOVREL-NEXT: .end_amd_kernel_code_t
3178 ; MOVREL-NEXT: ; %bb.0: ; %entry
3179 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
3180 ; MOVREL-NEXT: s_load_dword s8, s[6:7], 0x8
3181 ; MOVREL-NEXT: s_mov_b32 s4, 0
3182 ; MOVREL-NEXT: s_mov_b32 s5, 0x40080000
3183 ; MOVREL-NEXT: s_mov_b32 s2, 0
3184 ; MOVREL-NEXT: s_mov_b32 s3, 0x40140000
3185 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
3186 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1
3187 ; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
3188 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 2
3189 ; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
3190 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 3
3191 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5]
3192 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 4
3193 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3194 ; MOVREL-NEXT: v_mov_b32_e32 v0, s2
3195 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1
3196 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3
3197 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0
3198 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
3199 ; MOVREL-NEXT: s_endpgm
3201 ; GFX10-LABEL: dyn_extract_v5f64_s_s:
3202 ; GFX10: .amd_kernel_code_t
3203 ; GFX10-NEXT: amd_code_version_major = 1
3204 ; GFX10-NEXT: amd_code_version_minor = 2
3205 ; GFX10-NEXT: amd_machine_kind = 1
3206 ; GFX10-NEXT: amd_machine_version_major = 10
3207 ; GFX10-NEXT: amd_machine_version_minor = 1
3208 ; GFX10-NEXT: amd_machine_version_stepping = 0
3209 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256
3210 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
3211 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0
3212 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
3213 ; GFX10-NEXT: priority = 0
3214 ; GFX10-NEXT: float_mode = 240
3215 ; GFX10-NEXT: priv = 0
3216 ; GFX10-NEXT: enable_dx10_clamp = 1
3217 ; GFX10-NEXT: debug_mode = 0
3218 ; GFX10-NEXT: enable_ieee_mode = 1
3219 ; GFX10-NEXT: enable_wgp_mode = 1
3220 ; GFX10-NEXT: enable_mem_ordered = 1
3221 ; GFX10-NEXT: enable_fwd_progress = 0
3222 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3223 ; GFX10-NEXT: user_sgpr_count = 10
3224 ; GFX10-NEXT: enable_trap_handler = 0
3225 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1
3226 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 1
3227 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 1
3228 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0
3229 ; GFX10-NEXT: enable_vgpr_workitem_id = 2
3230 ; GFX10-NEXT: enable_exception_msb = 0
3231 ; GFX10-NEXT: granulated_lds_size = 0
3232 ; GFX10-NEXT: enable_exception = 0
3233 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1
3234 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 1
3235 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0
3236 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3237 ; GFX10-NEXT: enable_sgpr_dispatch_id = 1
3238 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0
3239 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0
3240 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3241 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3242 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3243 ; GFX10-NEXT: enable_wavefront_size32 = 1
3244 ; GFX10-NEXT: enable_ordered_append_gds = 0
3245 ; GFX10-NEXT: private_element_size = 1
3246 ; GFX10-NEXT: is_ptr64 = 1
3247 ; GFX10-NEXT: is_dynamic_callstack = 0
3248 ; GFX10-NEXT: is_debug_enabled = 0
3249 ; GFX10-NEXT: is_xnack_enabled = 1
3250 ; GFX10-NEXT: workitem_private_segment_byte_size = 0
3251 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0
3252 ; GFX10-NEXT: gds_segment_byte_size = 0
3253 ; GFX10-NEXT: kernarg_segment_byte_size = 28
3254 ; GFX10-NEXT: workgroup_fbarrier_count = 0
3255 ; GFX10-NEXT: wavefront_sgpr_count = 9
3256 ; GFX10-NEXT: workitem_vgpr_count = 3
3257 ; GFX10-NEXT: reserved_vgpr_first = 0
3258 ; GFX10-NEXT: reserved_vgpr_count = 0
3259 ; GFX10-NEXT: reserved_sgpr_first = 0
3260 ; GFX10-NEXT: reserved_sgpr_count = 0
3261 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3262 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0
3263 ; GFX10-NEXT: kernarg_segment_alignment = 4
3264 ; GFX10-NEXT: group_segment_alignment = 4
3265 ; GFX10-NEXT: private_segment_alignment = 4
3266 ; GFX10-NEXT: wavefront_size = 5
3267 ; GFX10-NEXT: call_convention = -1
3268 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0
3269 ; GFX10-NEXT: .end_amd_kernel_code_t
3270 ; GFX10-NEXT: ; %bb.0: ; %entry
3271 ; GFX10-NEXT: s_clause 0x1
3272 ; GFX10-NEXT: s_load_dword s8, s[6:7], 0x8
3273 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
3274 ; GFX10-NEXT: s_mov_b32 s2, 0
3275 ; GFX10-NEXT: s_mov_b32 s3, 0x40080000
3276 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
3277 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3278 ; GFX10-NEXT: s_cmp_eq_u32 s8, 1
3279 ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
3280 ; GFX10-NEXT: s_cmp_eq_u32 s8, 2
3281 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3282 ; GFX10-NEXT: s_cmp_eq_u32 s8, 3
3283 ; GFX10-NEXT: s_mov_b32 s4, 0
3284 ; GFX10-NEXT: s_mov_b32 s5, 0x40140000
3285 ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
3286 ; GFX10-NEXT: s_cmp_eq_u32 s8, 4
3287 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
3288 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
3289 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
3290 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
3291 ; GFX10-NEXT: s_endpgm
3293 ; GFX11-LABEL: dyn_extract_v5f64_s_s:
3294 ; GFX11: .amd_kernel_code_t
3295 ; GFX11-NEXT: amd_code_version_major = 1
3296 ; GFX11-NEXT: amd_code_version_minor = 2
3297 ; GFX11-NEXT: amd_machine_kind = 1
3298 ; GFX11-NEXT: amd_machine_version_major = 11
3299 ; GFX11-NEXT: amd_machine_version_minor = 0
3300 ; GFX11-NEXT: amd_machine_version_stepping = 0
3301 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256
3302 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
3303 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0
3304 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
3305 ; GFX11-NEXT: priority = 0
3306 ; GFX11-NEXT: float_mode = 240
3307 ; GFX11-NEXT: priv = 0
3308 ; GFX11-NEXT: enable_dx10_clamp = 1
3309 ; GFX11-NEXT: debug_mode = 0
3310 ; GFX11-NEXT: enable_ieee_mode = 1
3311 ; GFX11-NEXT: enable_wgp_mode = 1
3312 ; GFX11-NEXT: enable_mem_ordered = 1
3313 ; GFX11-NEXT: enable_fwd_progress = 0
3314 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
3315 ; GFX11-NEXT: user_sgpr_count = 13
3316 ; GFX11-NEXT: enable_trap_handler = 0
3317 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1
3318 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 1
3319 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 1
3320 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0
3321 ; GFX11-NEXT: enable_vgpr_workitem_id = 2
3322 ; GFX11-NEXT: enable_exception_msb = 0
3323 ; GFX11-NEXT: granulated_lds_size = 0
3324 ; GFX11-NEXT: enable_exception = 0
3325 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0
3326 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 1
3327 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0
3328 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1
3329 ; GFX11-NEXT: enable_sgpr_dispatch_id = 1
3330 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0
3331 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0
3332 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0
3333 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0
3334 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0
3335 ; GFX11-NEXT: enable_wavefront_size32 = 1
3336 ; GFX11-NEXT: enable_ordered_append_gds = 0
3337 ; GFX11-NEXT: private_element_size = 1
3338 ; GFX11-NEXT: is_ptr64 = 1
3339 ; GFX11-NEXT: is_dynamic_callstack = 0
3340 ; GFX11-NEXT: is_debug_enabled = 0
3341 ; GFX11-NEXT: is_xnack_enabled = 0
3342 ; GFX11-NEXT: workitem_private_segment_byte_size = 0
3343 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0
3344 ; GFX11-NEXT: gds_segment_byte_size = 0
3345 ; GFX11-NEXT: kernarg_segment_byte_size = 28
3346 ; GFX11-NEXT: workgroup_fbarrier_count = 0
3347 ; GFX11-NEXT: wavefront_sgpr_count = 7
3348 ; GFX11-NEXT: workitem_vgpr_count = 3
3349 ; GFX11-NEXT: reserved_vgpr_first = 0
3350 ; GFX11-NEXT: reserved_vgpr_count = 0
3351 ; GFX11-NEXT: reserved_sgpr_first = 0
3352 ; GFX11-NEXT: reserved_sgpr_count = 0
3353 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
3354 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0
3355 ; GFX11-NEXT: kernarg_segment_alignment = 4
3356 ; GFX11-NEXT: group_segment_alignment = 4
3357 ; GFX11-NEXT: private_segment_alignment = 4
3358 ; GFX11-NEXT: wavefront_size = 5
3359 ; GFX11-NEXT: call_convention = -1
3360 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0
3361 ; GFX11-NEXT: .end_amd_kernel_code_t
3362 ; GFX11-NEXT: ; %bb.0: ; %entry
3363 ; GFX11-NEXT: s_clause 0x1
3364 ; GFX11-NEXT: s_load_b32 s6, s[2:3], 0x8
3365 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
3366 ; GFX11-NEXT: s_mov_b32 s2, 0
3367 ; GFX11-NEXT: s_mov_b32 s3, 0x40080000
3368 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
3369 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3370 ; GFX11-NEXT: s_cmp_eq_u32 s6, 1
3371 ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
3372 ; GFX11-NEXT: s_cmp_eq_u32 s6, 2
3373 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3374 ; GFX11-NEXT: s_cmp_eq_u32 s6, 3
3375 ; GFX11-NEXT: s_mov_b32 s4, 0
3376 ; GFX11-NEXT: s_mov_b32 s5, 0x40140000
3377 ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
3378 ; GFX11-NEXT: s_cmp_eq_u32 s6, 4
3379 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
3380 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3381 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
3382 ; GFX11-NEXT: s_nop 0
3383 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3384 ; GFX11-NEXT: s_endpgm
3386 %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
3387 store double %ext, ptr addrspace(1) %out
3391 define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
3392 ; GCN-LABEL: dyn_extract_v15f32_const_s_v:
3393 ; GCN: ; %bb.0: ; %entry
3394 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3395 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
3396 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
3397 ; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
3398 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
3399 ; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
3400 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
3401 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000
3402 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
3403 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
3404 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000
3405 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
3406 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
3407 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000
3408 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
3409 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
3410 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000
3411 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
3412 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
3413 ; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000
3414 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
3415 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0
3416 ; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000
3417 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
3418 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0
3419 ; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000
3420 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
3421 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0
3422 ; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000
3423 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
3424 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0
3425 ; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000
3426 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
3427 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0
3428 ; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000
3429 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
3430 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0
3431 ; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000
3432 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
3433 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0
3434 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
3435 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0
3436 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
3437 ; GCN-NEXT: s_setpc_b64 s[30:31]
3439 ; GFX10-LABEL: dyn_extract_v15f32_const_s_v:
3440 ; GFX10: ; %bb.0: ; %entry
3441 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3442 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3443 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3444 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3445 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3446 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3447 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3448 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3449 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3450 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3451 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3452 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3453 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3454 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3455 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3456 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3457 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3458 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3459 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3460 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3461 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3462 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3463 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3464 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3465 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3466 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3467 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3468 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3469 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3470 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
3471 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo
3472 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3474 ; GFX11-LABEL: dyn_extract_v15f32_const_s_v:
3475 ; GFX11: ; %bb.0: ; %entry
3476 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3477 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3478 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3479 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3480 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3481 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3482 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3483 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3484 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3485 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3486 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3487 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3488 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3489 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3490 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3491 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3492 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3493 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3494 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3495 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3496 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3497 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3498 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3499 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3500 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3501 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3502 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3503 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3504 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3505 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
3506 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
3507 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3509 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3513 define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) {
3514 ; GCN-LABEL: dyn_extract_v15f32_const_s_s:
3515 ; GCN: ; %bb.0: ; %entry
3516 ; GCN-NEXT: s_mov_b32 s4, 1.0
3517 ; GCN-NEXT: s_mov_b32 m0, s2
3518 ; GCN-NEXT: s_mov_b32 s18, 0x41700000
3519 ; GCN-NEXT: s_mov_b32 s17, 0x41600000
3520 ; GCN-NEXT: s_mov_b32 s16, 0x41500000
3521 ; GCN-NEXT: s_mov_b32 s15, 0x41400000
3522 ; GCN-NEXT: s_mov_b32 s14, 0x41300000
3523 ; GCN-NEXT: s_mov_b32 s13, 0x41200000
3524 ; GCN-NEXT: s_mov_b32 s12, 0x41100000
3525 ; GCN-NEXT: s_mov_b32 s11, 0x41000000
3526 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000
3527 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000
3528 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000
3529 ; GCN-NEXT: s_mov_b32 s7, 4.0
3530 ; GCN-NEXT: s_mov_b32 s6, 0x40400000
3531 ; GCN-NEXT: s_mov_b32 s5, 2.0
3532 ; GCN-NEXT: s_movrels_b32 s0, s4
3533 ; GCN-NEXT: v_mov_b32_e32 v0, s0
3534 ; GCN-NEXT: ; return to shader part epilog
3536 ; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s:
3537 ; GFX10PLUS: ; %bb.0: ; %entry
3538 ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0
3539 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
3540 ; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000
3541 ; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000
3542 ; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000
3543 ; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000
3544 ; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000
3545 ; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000
3546 ; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000
3547 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000
3548 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000
3549 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000
3550 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000
3551 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0
3552 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000
3553 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0
3554 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4
3555 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
3556 ; GFX10PLUS-NEXT: ; return to shader part epilog
3558 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3562 define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) {
3563 ; GCN-LABEL: dyn_extract_v15f32_s_v:
3564 ; GCN: ; %bb.0: ; %entry
3565 ; GCN-NEXT: v_mov_b32_e32 v1, s2
3566 ; GCN-NEXT: v_mov_b32_e32 v2, s3
3567 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
3568 ; GCN-NEXT: v_mov_b32_e32 v3, s4
3569 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
3570 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
3571 ; GCN-NEXT: v_mov_b32_e32 v4, s5
3572 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
3573 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
3574 ; GCN-NEXT: v_mov_b32_e32 v5, s6
3575 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
3576 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
3577 ; GCN-NEXT: v_mov_b32_e32 v6, s7
3578 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
3579 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
3580 ; GCN-NEXT: v_mov_b32_e32 v7, s8
3581 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
3582 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
3583 ; GCN-NEXT: v_mov_b32_e32 v8, s9
3584 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
3585 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
3586 ; GCN-NEXT: v_mov_b32_e32 v9, s10
3587 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
3588 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0
3589 ; GCN-NEXT: v_mov_b32_e32 v10, s11
3590 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
3591 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0
3592 ; GCN-NEXT: v_mov_b32_e32 v11, s12
3593 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
3594 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0
3595 ; GCN-NEXT: v_mov_b32_e32 v12, s13
3596 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
3597 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0
3598 ; GCN-NEXT: v_mov_b32_e32 v13, s14
3599 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
3600 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0
3601 ; GCN-NEXT: v_mov_b32_e32 v14, s15
3602 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
3603 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0
3604 ; GCN-NEXT: v_mov_b32_e32 v15, s16
3605 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
3606 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0
3607 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
3608 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0
3609 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
3610 ; GCN-NEXT: ; return to shader part epilog
3612 ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v:
3613 ; GFX10PLUS: ; %bb.0: ; %entry
3614 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3
3615 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3616 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
3617 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3618 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo
3619 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3620 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo
3621 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3622 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo
3623 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3624 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo
3625 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3626 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo
3627 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3628 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo
3629 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3630 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo
3631 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3632 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo
3633 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3634 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo
3635 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3636 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo
3637 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3638 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo
3639 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3640 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo
3641 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3642 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo
3643 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
3644 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
3645 ; GFX10PLUS-NEXT: ; return to shader part epilog
3647 %ext = extractelement <15 x float> %vec, i32 %sel
3651 define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) {
3652 ; GCN-LABEL: dyn_extract_v15f32_v_v:
3653 ; GCN: ; %bb.0: ; %entry
3654 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3655 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15
3656 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3657 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15
3658 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3659 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15
3660 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
3661 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15
3662 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
3663 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15
3664 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
3665 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15
3666 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
3667 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15
3668 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
3669 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15
3670 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
3671 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15
3672 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
3673 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15
3674 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
3675 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15
3676 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3677 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15
3678 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3679 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15
3680 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc
3681 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15
3682 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
3683 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15
3684 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
3685 ; GCN-NEXT: s_setpc_b64 s[30:31]
3687 ; GFX10-LABEL: dyn_extract_v15f32_v_v:
3688 ; GFX10: ; %bb.0: ; %entry
3689 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3690 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3691 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3692 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3693 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3694 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
3695 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3696 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
3697 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3698 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
3699 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3700 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
3701 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3702 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
3703 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3704 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
3705 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3706 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
3707 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3708 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
3709 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3710 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
3711 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3712 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
3713 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3714 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
3715 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3716 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
3717 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3718 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
3719 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo
3720 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3722 ; GFX11-LABEL: dyn_extract_v15f32_v_v:
3723 ; GFX11: ; %bb.0: ; %entry
3724 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3725 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3726 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3727 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3728 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3729 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
3730 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3731 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
3732 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3733 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
3734 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3735 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
3736 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3737 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
3738 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3739 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
3740 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3741 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
3742 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3743 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
3744 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3745 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
3746 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3747 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
3748 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3749 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
3750 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3751 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
3752 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3753 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
3754 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo
3755 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3757 %ext = extractelement <15 x float> %vec, i32 %sel
3761 define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) {
3762 ; GPRIDX-LABEL: dyn_extract_v15f32_v_s:
3763 ; GPRIDX: ; %bb.0: ; %entry
3764 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
3765 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
3766 ; GPRIDX-NEXT: s_set_gpr_idx_off
3767 ; GPRIDX-NEXT: ; return to shader part epilog
3769 ; MOVREL-LABEL: dyn_extract_v15f32_v_s:
3770 ; MOVREL: ; %bb.0: ; %entry
3771 ; MOVREL-NEXT: s_mov_b32 m0, s2
3772 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
3773 ; MOVREL-NEXT: ; return to shader part epilog
3775 ; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s:
3776 ; GFX10PLUS: ; %bb.0: ; %entry
3777 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
3778 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0
3779 ; GFX10PLUS-NEXT: ; return to shader part epilog
3781 %ext = extractelement <15 x float> %vec, i32 %sel
3785 define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) {
3786 ; GCN-LABEL: dyn_extract_v15f32_s_s:
3787 ; GCN: ; %bb.0: ; %entry
3788 ; GCN-NEXT: s_mov_b32 s0, s2
3789 ; GCN-NEXT: s_mov_b32 m0, s17
3790 ; GCN-NEXT: s_mov_b32 s1, s3
3791 ; GCN-NEXT: s_mov_b32 s2, s4
3792 ; GCN-NEXT: s_mov_b32 s3, s5
3793 ; GCN-NEXT: s_mov_b32 s4, s6
3794 ; GCN-NEXT: s_mov_b32 s5, s7
3795 ; GCN-NEXT: s_mov_b32 s6, s8
3796 ; GCN-NEXT: s_mov_b32 s7, s9
3797 ; GCN-NEXT: s_mov_b32 s8, s10
3798 ; GCN-NEXT: s_mov_b32 s9, s11
3799 ; GCN-NEXT: s_mov_b32 s10, s12
3800 ; GCN-NEXT: s_mov_b32 s11, s13
3801 ; GCN-NEXT: s_mov_b32 s12, s14
3802 ; GCN-NEXT: s_mov_b32 s13, s15
3803 ; GCN-NEXT: s_mov_b32 s14, s16
3804 ; GCN-NEXT: s_movrels_b32 s0, s0
3805 ; GCN-NEXT: v_mov_b32_e32 v0, s0
3806 ; GCN-NEXT: ; return to shader part epilog
3808 ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s:
3809 ; GFX10PLUS: ; %bb.0: ; %entry
3810 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3811 ; GFX10PLUS-NEXT: s_mov_b32 m0, s17
3812 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3813 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3814 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3815 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3816 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3817 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3818 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3819 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3820 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3821 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3822 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3823 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3824 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3825 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
3826 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s0
3827 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
3828 ; GFX10PLUS-NEXT: ; return to shader part epilog
3830 %ext = extractelement <15 x float> %vec, i32 %sel
3834 define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) {
3835 ; GCN-LABEL: dyn_extract_v15f32_s_s_offset3:
3836 ; GCN: ; %bb.0: ; %entry
3837 ; GCN-NEXT: s_mov_b32 s0, s2
3838 ; GCN-NEXT: s_mov_b32 s1, s3
3839 ; GCN-NEXT: s_mov_b32 s3, s5
3840 ; GCN-NEXT: s_mov_b32 m0, s17
3841 ; GCN-NEXT: s_mov_b32 s2, s4
3842 ; GCN-NEXT: s_mov_b32 s4, s6
3843 ; GCN-NEXT: s_mov_b32 s5, s7
3844 ; GCN-NEXT: s_mov_b32 s6, s8
3845 ; GCN-NEXT: s_mov_b32 s7, s9
3846 ; GCN-NEXT: s_mov_b32 s8, s10
3847 ; GCN-NEXT: s_mov_b32 s9, s11
3848 ; GCN-NEXT: s_mov_b32 s10, s12
3849 ; GCN-NEXT: s_mov_b32 s11, s13
3850 ; GCN-NEXT: s_mov_b32 s12, s14
3851 ; GCN-NEXT: s_mov_b32 s13, s15
3852 ; GCN-NEXT: s_mov_b32 s14, s16
3853 ; GCN-NEXT: s_movrels_b32 s0, s3
3854 ; GCN-NEXT: v_mov_b32_e32 v0, s0
3855 ; GCN-NEXT: ; return to shader part epilog
3857 ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3:
3858 ; GFX10PLUS: ; %bb.0: ; %entry
3859 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3860 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3861 ; GFX10PLUS-NEXT: s_mov_b32 m0, s17
3862 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3863 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3864 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3865 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3866 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3867 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3868 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3869 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3870 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3871 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3872 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3873 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3874 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
3875 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s3
3876 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0
3877 ; GFX10PLUS-NEXT: ; return to shader part epilog
3879 %add = add i32 %sel, 3
3880 %ext = extractelement <15 x float> %vec, i32 %add
3884 define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) {
3885 ; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3:
3886 ; GPRIDX: ; %bb.0: ; %entry
3887 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3888 ; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15
3889 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15
3890 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3891 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15
3892 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3893 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15
3894 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
3895 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15
3896 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
3897 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15
3898 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
3899 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15
3900 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
3901 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15
3902 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
3903 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15
3904 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
3905 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15
3906 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
3907 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15
3908 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
3909 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15
3910 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3911 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15
3912 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3913 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15
3914 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc
3915 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15
3916 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
3917 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15
3918 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
3919 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
3921 ; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3:
3922 ; MOVREL: ; %bb.0: ; %entry
3923 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3924 ; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15
3925 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15
3926 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3927 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15
3928 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3929 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15
3930 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
3931 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15
3932 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
3933 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15
3934 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
3935 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15
3936 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
3937 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15
3938 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
3939 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15
3940 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
3941 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15
3942 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
3943 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15
3944 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
3945 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15
3946 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3947 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15
3948 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3949 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15
3950 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc
3951 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15
3952 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
3953 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15
3954 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
3955 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
3957 ; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3:
3958 ; GFX10: ; %bb.0: ; %entry
3959 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3960 ; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15
3961 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3962 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3963 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
3964 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3965 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
3966 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3967 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
3968 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3969 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
3970 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3971 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
3972 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3973 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
3974 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3975 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
3976 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3977 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
3978 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3979 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
3980 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3981 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
3982 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3983 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
3984 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3985 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
3986 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3987 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
3988 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3989 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
3990 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo
3991 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3993 ; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3:
3994 ; GFX11: ; %bb.0: ; %entry
3995 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3996 ; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15
3997 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
3998 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3999 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
4000 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
4001 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15
4002 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
4003 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15
4004 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
4005 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15
4006 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
4007 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15
4008 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
4009 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15
4010 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
4011 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15
4012 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
4013 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15
4014 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
4015 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15
4016 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
4017 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15
4018 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
4019 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15
4020 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
4021 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15
4022 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo
4023 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15
4024 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
4025 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15
4026 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo
4027 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4029 %add = add i32 %sel, 3
4030 %ext = extractelement <15 x float> %vec, i32 %add
4034 define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) {
4035 ; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
4036 ; GPRIDX: .amd_kernel_code_t
4037 ; GPRIDX-NEXT: amd_code_version_major = 1
4038 ; GPRIDX-NEXT: amd_code_version_minor = 2
4039 ; GPRIDX-NEXT: amd_machine_kind = 1
4040 ; GPRIDX-NEXT: amd_machine_version_major = 9
4041 ; GPRIDX-NEXT: amd_machine_version_minor = 0
4042 ; GPRIDX-NEXT: amd_machine_version_stepping = 0
4043 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
4044 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
4045 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
4046 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
4047 ; GPRIDX-NEXT: priority = 0
4048 ; GPRIDX-NEXT: float_mode = 240
4049 ; GPRIDX-NEXT: priv = 0
4050 ; GPRIDX-NEXT: enable_dx10_clamp = 1
4051 ; GPRIDX-NEXT: debug_mode = 0
4052 ; GPRIDX-NEXT: enable_ieee_mode = 1
4053 ; GPRIDX-NEXT: enable_wgp_mode = 0
4054 ; GPRIDX-NEXT: enable_mem_ordered = 0
4055 ; GPRIDX-NEXT: enable_fwd_progress = 0
4056 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4057 ; GPRIDX-NEXT: user_sgpr_count = 10
4058 ; GPRIDX-NEXT: enable_trap_handler = 0
4059 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1
4060 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 1
4061 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 1
4062 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0
4063 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 2
4064 ; GPRIDX-NEXT: enable_exception_msb = 0
4065 ; GPRIDX-NEXT: granulated_lds_size = 0
4066 ; GPRIDX-NEXT: enable_exception = 0
4067 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1
4068 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 1
4069 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0
4070 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4071 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 1
4072 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0
4073 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0
4074 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4075 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4076 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4077 ; GPRIDX-NEXT: enable_wavefront_size32 = 0
4078 ; GPRIDX-NEXT: enable_ordered_append_gds = 0
4079 ; GPRIDX-NEXT: private_element_size = 1
4080 ; GPRIDX-NEXT: is_ptr64 = 1
4081 ; GPRIDX-NEXT: is_dynamic_callstack = 0
4082 ; GPRIDX-NEXT: is_debug_enabled = 0
4083 ; GPRIDX-NEXT: is_xnack_enabled = 1
4084 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
4085 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
4086 ; GPRIDX-NEXT: gds_segment_byte_size = 0
4087 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28
4088 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0
4089 ; GPRIDX-NEXT: wavefront_sgpr_count = 12
4090 ; GPRIDX-NEXT: workitem_vgpr_count = 2
4091 ; GPRIDX-NEXT: reserved_vgpr_first = 0
4092 ; GPRIDX-NEXT: reserved_vgpr_count = 0
4093 ; GPRIDX-NEXT: reserved_sgpr_first = 0
4094 ; GPRIDX-NEXT: reserved_sgpr_count = 0
4095 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4096 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0
4097 ; GPRIDX-NEXT: kernarg_segment_alignment = 4
4098 ; GPRIDX-NEXT: group_segment_alignment = 4
4099 ; GPRIDX-NEXT: private_segment_alignment = 4
4100 ; GPRIDX-NEXT: wavefront_size = 6
4101 ; GPRIDX-NEXT: call_convention = -1
4102 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0
4103 ; GPRIDX-NEXT: .end_amd_kernel_code_t
4104 ; GPRIDX-NEXT: ; %bb.0: ; %entry
4105 ; GPRIDX-NEXT: s_load_dword s2, s[6:7], 0x8
4106 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4107 ; GPRIDX-NEXT: v_mov_b32_e32 v1, 0
4108 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
4109 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1
4110 ; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0
4111 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2
4112 ; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3
4113 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3
4114 ; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3
4115 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
4116 ; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1]
4117 ; GPRIDX-NEXT: s_endpgm
4119 ; MOVREL-LABEL: dyn_extract_v4f32_s_s_s:
4120 ; MOVREL: .amd_kernel_code_t
4121 ; MOVREL-NEXT: amd_code_version_major = 1
4122 ; MOVREL-NEXT: amd_code_version_minor = 2
4123 ; MOVREL-NEXT: amd_machine_kind = 1
4124 ; MOVREL-NEXT: amd_machine_version_major = 8
4125 ; MOVREL-NEXT: amd_machine_version_minor = 0
4126 ; MOVREL-NEXT: amd_machine_version_stepping = 3
4127 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256
4128 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0
4129 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0
4130 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0
4131 ; MOVREL-NEXT: priority = 0
4132 ; MOVREL-NEXT: float_mode = 240
4133 ; MOVREL-NEXT: priv = 0
4134 ; MOVREL-NEXT: enable_dx10_clamp = 1
4135 ; MOVREL-NEXT: debug_mode = 0
4136 ; MOVREL-NEXT: enable_ieee_mode = 1
4137 ; MOVREL-NEXT: enable_wgp_mode = 0
4138 ; MOVREL-NEXT: enable_mem_ordered = 0
4139 ; MOVREL-NEXT: enable_fwd_progress = 0
4140 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4141 ; MOVREL-NEXT: user_sgpr_count = 10
4142 ; MOVREL-NEXT: enable_trap_handler = 0
4143 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1
4144 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 1
4145 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 1
4146 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0
4147 ; MOVREL-NEXT: enable_vgpr_workitem_id = 2
4148 ; MOVREL-NEXT: enable_exception_msb = 0
4149 ; MOVREL-NEXT: granulated_lds_size = 0
4150 ; MOVREL-NEXT: enable_exception = 0
4151 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1
4152 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 1
4153 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0
4154 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4155 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 1
4156 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0
4157 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0
4158 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4159 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4160 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4161 ; MOVREL-NEXT: enable_wavefront_size32 = 0
4162 ; MOVREL-NEXT: enable_ordered_append_gds = 0
4163 ; MOVREL-NEXT: private_element_size = 1
4164 ; MOVREL-NEXT: is_ptr64 = 1
4165 ; MOVREL-NEXT: is_dynamic_callstack = 0
4166 ; MOVREL-NEXT: is_debug_enabled = 0
4167 ; MOVREL-NEXT: is_xnack_enabled = 0
4168 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0
4169 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
4170 ; MOVREL-NEXT: gds_segment_byte_size = 0
4171 ; MOVREL-NEXT: kernarg_segment_byte_size = 28
4172 ; MOVREL-NEXT: workgroup_fbarrier_count = 0
4173 ; MOVREL-NEXT: wavefront_sgpr_count = 8
4174 ; MOVREL-NEXT: workitem_vgpr_count = 3
4175 ; MOVREL-NEXT: reserved_vgpr_first = 0
4176 ; MOVREL-NEXT: reserved_vgpr_count = 0
4177 ; MOVREL-NEXT: reserved_sgpr_first = 0
4178 ; MOVREL-NEXT: reserved_sgpr_count = 0
4179 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4180 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0
4181 ; MOVREL-NEXT: kernarg_segment_alignment = 4
4182 ; MOVREL-NEXT: group_segment_alignment = 4
4183 ; MOVREL-NEXT: private_segment_alignment = 4
4184 ; MOVREL-NEXT: wavefront_size = 6
4185 ; MOVREL-NEXT: call_convention = -1
4186 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0
4187 ; MOVREL-NEXT: .end_amd_kernel_code_t
4188 ; MOVREL-NEXT: ; %bb.0: ; %entry
4189 ; MOVREL-NEXT: s_load_dword s2, s[6:7], 0x8
4190 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4191 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
4192 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 1
4193 ; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0
4194 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 2
4195 ; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3
4196 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 3
4197 ; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3
4198 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0
4199 ; MOVREL-NEXT: v_mov_b32_e32 v2, s2
4200 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1
4201 ; MOVREL-NEXT: flat_store_dword v[0:1], v2
4202 ; MOVREL-NEXT: s_endpgm
4204 ; GFX10-LABEL: dyn_extract_v4f32_s_s_s:
4205 ; GFX10: .amd_kernel_code_t
4206 ; GFX10-NEXT: amd_code_version_major = 1
4207 ; GFX10-NEXT: amd_code_version_minor = 2
4208 ; GFX10-NEXT: amd_machine_kind = 1
4209 ; GFX10-NEXT: amd_machine_version_major = 10
4210 ; GFX10-NEXT: amd_machine_version_minor = 1
4211 ; GFX10-NEXT: amd_machine_version_stepping = 0
4212 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256
4213 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
4214 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0
4215 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
4216 ; GFX10-NEXT: priority = 0
4217 ; GFX10-NEXT: float_mode = 240
4218 ; GFX10-NEXT: priv = 0
4219 ; GFX10-NEXT: enable_dx10_clamp = 1
4220 ; GFX10-NEXT: debug_mode = 0
4221 ; GFX10-NEXT: enable_ieee_mode = 1
4222 ; GFX10-NEXT: enable_wgp_mode = 1
4223 ; GFX10-NEXT: enable_mem_ordered = 1
4224 ; GFX10-NEXT: enable_fwd_progress = 0
4225 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4226 ; GFX10-NEXT: user_sgpr_count = 10
4227 ; GFX10-NEXT: enable_trap_handler = 0
4228 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1
4229 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 1
4230 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 1
4231 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0
4232 ; GFX10-NEXT: enable_vgpr_workitem_id = 2
4233 ; GFX10-NEXT: enable_exception_msb = 0
4234 ; GFX10-NEXT: granulated_lds_size = 0
4235 ; GFX10-NEXT: enable_exception = 0
4236 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1
4237 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 1
4238 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0
4239 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4240 ; GFX10-NEXT: enable_sgpr_dispatch_id = 1
4241 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0
4242 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0
4243 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4244 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4245 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4246 ; GFX10-NEXT: enable_wavefront_size32 = 1
4247 ; GFX10-NEXT: enable_ordered_append_gds = 0
4248 ; GFX10-NEXT: private_element_size = 1
4249 ; GFX10-NEXT: is_ptr64 = 1
4250 ; GFX10-NEXT: is_dynamic_callstack = 0
4251 ; GFX10-NEXT: is_debug_enabled = 0
4252 ; GFX10-NEXT: is_xnack_enabled = 1
4253 ; GFX10-NEXT: workitem_private_segment_byte_size = 0
4254 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0
4255 ; GFX10-NEXT: gds_segment_byte_size = 0
4256 ; GFX10-NEXT: kernarg_segment_byte_size = 28
4257 ; GFX10-NEXT: workgroup_fbarrier_count = 0
4258 ; GFX10-NEXT: wavefront_sgpr_count = 8
4259 ; GFX10-NEXT: workitem_vgpr_count = 2
4260 ; GFX10-NEXT: reserved_vgpr_first = 0
4261 ; GFX10-NEXT: reserved_vgpr_count = 0
4262 ; GFX10-NEXT: reserved_sgpr_first = 0
4263 ; GFX10-NEXT: reserved_sgpr_count = 0
4264 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4265 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0
4266 ; GFX10-NEXT: kernarg_segment_alignment = 4
4267 ; GFX10-NEXT: group_segment_alignment = 4
4268 ; GFX10-NEXT: private_segment_alignment = 4
4269 ; GFX10-NEXT: wavefront_size = 5
4270 ; GFX10-NEXT: call_convention = -1
4271 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0
4272 ; GFX10-NEXT: .end_amd_kernel_code_t
4273 ; GFX10-NEXT: ; %bb.0: ; %entry
4274 ; GFX10-NEXT: s_clause 0x1
4275 ; GFX10-NEXT: s_load_dword s2, s[6:7], 0x8
4276 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4277 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
4278 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4279 ; GFX10-NEXT: s_cmp_eq_u32 s2, 1
4280 ; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0
4281 ; GFX10-NEXT: s_cmp_eq_u32 s2, 2
4282 ; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3
4283 ; GFX10-NEXT: s_cmp_eq_u32 s2, 3
4284 ; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3
4285 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
4286 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
4287 ; GFX10-NEXT: s_endpgm
4289 ; GFX11-LABEL: dyn_extract_v4f32_s_s_s:
4290 ; GFX11: .amd_kernel_code_t
4291 ; GFX11-NEXT: amd_code_version_major = 1
4292 ; GFX11-NEXT: amd_code_version_minor = 2
4293 ; GFX11-NEXT: amd_machine_kind = 1
4294 ; GFX11-NEXT: amd_machine_version_major = 11
4295 ; GFX11-NEXT: amd_machine_version_minor = 0
4296 ; GFX11-NEXT: amd_machine_version_stepping = 0
4297 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256
4298 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
4299 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0
4300 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
4301 ; GFX11-NEXT: priority = 0
4302 ; GFX11-NEXT: float_mode = 240
4303 ; GFX11-NEXT: priv = 0
4304 ; GFX11-NEXT: enable_dx10_clamp = 1
4305 ; GFX11-NEXT: debug_mode = 0
4306 ; GFX11-NEXT: enable_ieee_mode = 1
4307 ; GFX11-NEXT: enable_wgp_mode = 1
4308 ; GFX11-NEXT: enable_mem_ordered = 1
4309 ; GFX11-NEXT: enable_fwd_progress = 0
4310 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4311 ; GFX11-NEXT: user_sgpr_count = 13
4312 ; GFX11-NEXT: enable_trap_handler = 0
4313 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1
4314 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 1
4315 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 1
4316 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0
4317 ; GFX11-NEXT: enable_vgpr_workitem_id = 2
4318 ; GFX11-NEXT: enable_exception_msb = 0
4319 ; GFX11-NEXT: granulated_lds_size = 0
4320 ; GFX11-NEXT: enable_exception = 0
4321 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0
4322 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 1
4323 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0
4324 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4325 ; GFX11-NEXT: enable_sgpr_dispatch_id = 1
4326 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0
4327 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0
4328 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4329 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4330 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4331 ; GFX11-NEXT: enable_wavefront_size32 = 1
4332 ; GFX11-NEXT: enable_ordered_append_gds = 0
4333 ; GFX11-NEXT: private_element_size = 1
4334 ; GFX11-NEXT: is_ptr64 = 1
4335 ; GFX11-NEXT: is_dynamic_callstack = 0
4336 ; GFX11-NEXT: is_debug_enabled = 0
4337 ; GFX11-NEXT: is_xnack_enabled = 0
4338 ; GFX11-NEXT: workitem_private_segment_byte_size = 0
4339 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0
4340 ; GFX11-NEXT: gds_segment_byte_size = 0
4341 ; GFX11-NEXT: kernarg_segment_byte_size = 28
4342 ; GFX11-NEXT: workgroup_fbarrier_count = 0
4343 ; GFX11-NEXT: wavefront_sgpr_count = 5
4344 ; GFX11-NEXT: workitem_vgpr_count = 2
4345 ; GFX11-NEXT: reserved_vgpr_first = 0
4346 ; GFX11-NEXT: reserved_vgpr_count = 0
4347 ; GFX11-NEXT: reserved_sgpr_first = 0
4348 ; GFX11-NEXT: reserved_sgpr_count = 0
4349 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4350 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0
4351 ; GFX11-NEXT: kernarg_segment_alignment = 4
4352 ; GFX11-NEXT: group_segment_alignment = 4
4353 ; GFX11-NEXT: private_segment_alignment = 4
4354 ; GFX11-NEXT: wavefront_size = 5
4355 ; GFX11-NEXT: call_convention = -1
4356 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0
4357 ; GFX11-NEXT: .end_amd_kernel_code_t
4358 ; GFX11-NEXT: ; %bb.0: ; %entry
4359 ; GFX11-NEXT: s_clause 0x1
4360 ; GFX11-NEXT: s_load_b32 s4, s[2:3], 0x8
4361 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
4362 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
4363 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4364 ; GFX11-NEXT: s_cmp_eq_u32 s4, 1
4365 ; GFX11-NEXT: s_cselect_b32 s2, 2.0, 1.0
4366 ; GFX11-NEXT: s_cmp_eq_u32 s4, 2
4367 ; GFX11-NEXT: s_cselect_b32 s2, 0x40400000, s2
4368 ; GFX11-NEXT: s_cmp_eq_u32 s4, 3
4369 ; GFX11-NEXT: s_cselect_b32 s2, 4.0, s2
4370 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
4371 ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
4372 ; GFX11-NEXT: s_nop 0
4373 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4374 ; GFX11-NEXT: s_endpgm
4376 %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
4377 store float %ext, ptr addrspace(1) %out
4381 define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) {
4382 ; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
4383 ; GPRIDX: .amd_kernel_code_t
4384 ; GPRIDX-NEXT: amd_code_version_major = 1
4385 ; GPRIDX-NEXT: amd_code_version_minor = 2
4386 ; GPRIDX-NEXT: amd_machine_kind = 1
4387 ; GPRIDX-NEXT: amd_machine_version_major = 9
4388 ; GPRIDX-NEXT: amd_machine_version_minor = 0
4389 ; GPRIDX-NEXT: amd_machine_version_stepping = 0
4390 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
4391 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
4392 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
4393 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
4394 ; GPRIDX-NEXT: priority = 0
4395 ; GPRIDX-NEXT: float_mode = 240
4396 ; GPRIDX-NEXT: priv = 0
4397 ; GPRIDX-NEXT: enable_dx10_clamp = 1
4398 ; GPRIDX-NEXT: debug_mode = 0
4399 ; GPRIDX-NEXT: enable_ieee_mode = 1
4400 ; GPRIDX-NEXT: enable_wgp_mode = 0
4401 ; GPRIDX-NEXT: enable_mem_ordered = 0
4402 ; GPRIDX-NEXT: enable_fwd_progress = 0
4403 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4404 ; GPRIDX-NEXT: user_sgpr_count = 10
4405 ; GPRIDX-NEXT: enable_trap_handler = 0
4406 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1
4407 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 1
4408 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 1
4409 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0
4410 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 2
4411 ; GPRIDX-NEXT: enable_exception_msb = 0
4412 ; GPRIDX-NEXT: granulated_lds_size = 0
4413 ; GPRIDX-NEXT: enable_exception = 0
4414 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1
4415 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 1
4416 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0
4417 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4418 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 1
4419 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0
4420 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0
4421 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4422 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4423 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4424 ; GPRIDX-NEXT: enable_wavefront_size32 = 0
4425 ; GPRIDX-NEXT: enable_ordered_append_gds = 0
4426 ; GPRIDX-NEXT: private_element_size = 1
4427 ; GPRIDX-NEXT: is_ptr64 = 1
4428 ; GPRIDX-NEXT: is_dynamic_callstack = 0
4429 ; GPRIDX-NEXT: is_debug_enabled = 0
4430 ; GPRIDX-NEXT: is_xnack_enabled = 1
4431 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
4432 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
4433 ; GPRIDX-NEXT: gds_segment_byte_size = 0
4434 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28
4435 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0
4436 ; GPRIDX-NEXT: wavefront_sgpr_count = 13
4437 ; GPRIDX-NEXT: workitem_vgpr_count = 3
4438 ; GPRIDX-NEXT: reserved_vgpr_first = 0
4439 ; GPRIDX-NEXT: reserved_vgpr_count = 0
4440 ; GPRIDX-NEXT: reserved_sgpr_first = 0
4441 ; GPRIDX-NEXT: reserved_sgpr_count = 0
4442 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4443 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0
4444 ; GPRIDX-NEXT: kernarg_segment_alignment = 4
4445 ; GPRIDX-NEXT: group_segment_alignment = 4
4446 ; GPRIDX-NEXT: private_segment_alignment = 4
4447 ; GPRIDX-NEXT: wavefront_size = 6
4448 ; GPRIDX-NEXT: call_convention = -1
4449 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0
4450 ; GPRIDX-NEXT: .end_amd_kernel_code_t
4451 ; GPRIDX-NEXT: ; %bb.0: ; %entry
4452 ; GPRIDX-NEXT: s_load_dword s8, s[6:7], 0x8
4453 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4454 ; GPRIDX-NEXT: s_mov_b32 s2, 0
4455 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000
4456 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0
4457 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
4458 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1
4459 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4460 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2
4461 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4462 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3
4463 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4464 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
4465 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
4466 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
4467 ; GPRIDX-NEXT: s_endpgm
4469 ; MOVREL-LABEL: dyn_extract_v4f64_s_s_s:
4470 ; MOVREL: .amd_kernel_code_t
4471 ; MOVREL-NEXT: amd_code_version_major = 1
4472 ; MOVREL-NEXT: amd_code_version_minor = 2
4473 ; MOVREL-NEXT: amd_machine_kind = 1
4474 ; MOVREL-NEXT: amd_machine_version_major = 8
4475 ; MOVREL-NEXT: amd_machine_version_minor = 0
4476 ; MOVREL-NEXT: amd_machine_version_stepping = 3
4477 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256
4478 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0
4479 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0
4480 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1
4481 ; MOVREL-NEXT: priority = 0
4482 ; MOVREL-NEXT: float_mode = 240
4483 ; MOVREL-NEXT: priv = 0
4484 ; MOVREL-NEXT: enable_dx10_clamp = 1
4485 ; MOVREL-NEXT: debug_mode = 0
4486 ; MOVREL-NEXT: enable_ieee_mode = 1
4487 ; MOVREL-NEXT: enable_wgp_mode = 0
4488 ; MOVREL-NEXT: enable_mem_ordered = 0
4489 ; MOVREL-NEXT: enable_fwd_progress = 0
4490 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4491 ; MOVREL-NEXT: user_sgpr_count = 10
4492 ; MOVREL-NEXT: enable_trap_handler = 0
4493 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1
4494 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 1
4495 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 1
4496 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0
4497 ; MOVREL-NEXT: enable_vgpr_workitem_id = 2
4498 ; MOVREL-NEXT: enable_exception_msb = 0
4499 ; MOVREL-NEXT: granulated_lds_size = 0
4500 ; MOVREL-NEXT: enable_exception = 0
4501 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1
4502 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 1
4503 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0
4504 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4505 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 1
4506 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0
4507 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0
4508 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4509 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4510 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4511 ; MOVREL-NEXT: enable_wavefront_size32 = 0
4512 ; MOVREL-NEXT: enable_ordered_append_gds = 0
4513 ; MOVREL-NEXT: private_element_size = 1
4514 ; MOVREL-NEXT: is_ptr64 = 1
4515 ; MOVREL-NEXT: is_dynamic_callstack = 0
4516 ; MOVREL-NEXT: is_debug_enabled = 0
4517 ; MOVREL-NEXT: is_xnack_enabled = 0
4518 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0
4519 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
4520 ; MOVREL-NEXT: gds_segment_byte_size = 0
4521 ; MOVREL-NEXT: kernarg_segment_byte_size = 28
4522 ; MOVREL-NEXT: workgroup_fbarrier_count = 0
4523 ; MOVREL-NEXT: wavefront_sgpr_count = 9
4524 ; MOVREL-NEXT: workitem_vgpr_count = 4
4525 ; MOVREL-NEXT: reserved_vgpr_first = 0
4526 ; MOVREL-NEXT: reserved_vgpr_count = 0
4527 ; MOVREL-NEXT: reserved_sgpr_first = 0
4528 ; MOVREL-NEXT: reserved_sgpr_count = 0
4529 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4530 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0
4531 ; MOVREL-NEXT: kernarg_segment_alignment = 4
4532 ; MOVREL-NEXT: group_segment_alignment = 4
4533 ; MOVREL-NEXT: private_segment_alignment = 4
4534 ; MOVREL-NEXT: wavefront_size = 6
4535 ; MOVREL-NEXT: call_convention = -1
4536 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0
4537 ; MOVREL-NEXT: .end_amd_kernel_code_t
4538 ; MOVREL-NEXT: ; %bb.0: ; %entry
4539 ; MOVREL-NEXT: s_load_dword s8, s[6:7], 0x8
4540 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4541 ; MOVREL-NEXT: s_mov_b32 s2, 0
4542 ; MOVREL-NEXT: s_mov_b32 s3, 0x40080000
4543 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
4544 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1
4545 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4546 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 2
4547 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4548 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 3
4549 ; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4550 ; MOVREL-NEXT: v_mov_b32_e32 v0, s2
4551 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1
4552 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3
4553 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0
4554 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
4555 ; MOVREL-NEXT: s_endpgm
4557 ; GFX10-LABEL: dyn_extract_v4f64_s_s_s:
4558 ; GFX10: .amd_kernel_code_t
4559 ; GFX10-NEXT: amd_code_version_major = 1
4560 ; GFX10-NEXT: amd_code_version_minor = 2
4561 ; GFX10-NEXT: amd_machine_kind = 1
4562 ; GFX10-NEXT: amd_machine_version_major = 10
4563 ; GFX10-NEXT: amd_machine_version_minor = 1
4564 ; GFX10-NEXT: amd_machine_version_stepping = 0
4565 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256
4566 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
4567 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0
4568 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
4569 ; GFX10-NEXT: priority = 0
4570 ; GFX10-NEXT: float_mode = 240
4571 ; GFX10-NEXT: priv = 0
4572 ; GFX10-NEXT: enable_dx10_clamp = 1
4573 ; GFX10-NEXT: debug_mode = 0
4574 ; GFX10-NEXT: enable_ieee_mode = 1
4575 ; GFX10-NEXT: enable_wgp_mode = 1
4576 ; GFX10-NEXT: enable_mem_ordered = 1
4577 ; GFX10-NEXT: enable_fwd_progress = 0
4578 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4579 ; GFX10-NEXT: user_sgpr_count = 10
4580 ; GFX10-NEXT: enable_trap_handler = 0
4581 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1
4582 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 1
4583 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 1
4584 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0
4585 ; GFX10-NEXT: enable_vgpr_workitem_id = 2
4586 ; GFX10-NEXT: enable_exception_msb = 0
4587 ; GFX10-NEXT: granulated_lds_size = 0
4588 ; GFX10-NEXT: enable_exception = 0
4589 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1
4590 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 1
4591 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0
4592 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4593 ; GFX10-NEXT: enable_sgpr_dispatch_id = 1
4594 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0
4595 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0
4596 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4597 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4598 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4599 ; GFX10-NEXT: enable_wavefront_size32 = 1
4600 ; GFX10-NEXT: enable_ordered_append_gds = 0
4601 ; GFX10-NEXT: private_element_size = 1
4602 ; GFX10-NEXT: is_ptr64 = 1
4603 ; GFX10-NEXT: is_dynamic_callstack = 0
4604 ; GFX10-NEXT: is_debug_enabled = 0
4605 ; GFX10-NEXT: is_xnack_enabled = 1
4606 ; GFX10-NEXT: workitem_private_segment_byte_size = 0
4607 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0
4608 ; GFX10-NEXT: gds_segment_byte_size = 0
4609 ; GFX10-NEXT: kernarg_segment_byte_size = 28
4610 ; GFX10-NEXT: workgroup_fbarrier_count = 0
4611 ; GFX10-NEXT: wavefront_sgpr_count = 9
4612 ; GFX10-NEXT: workitem_vgpr_count = 3
4613 ; GFX10-NEXT: reserved_vgpr_first = 0
4614 ; GFX10-NEXT: reserved_vgpr_count = 0
4615 ; GFX10-NEXT: reserved_sgpr_first = 0
4616 ; GFX10-NEXT: reserved_sgpr_count = 0
4617 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4618 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0
4619 ; GFX10-NEXT: kernarg_segment_alignment = 4
4620 ; GFX10-NEXT: group_segment_alignment = 4
4621 ; GFX10-NEXT: private_segment_alignment = 4
4622 ; GFX10-NEXT: wavefront_size = 5
4623 ; GFX10-NEXT: call_convention = -1
4624 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0
4625 ; GFX10-NEXT: .end_amd_kernel_code_t
4626 ; GFX10-NEXT: ; %bb.0: ; %entry
4627 ; GFX10-NEXT: s_clause 0x1
4628 ; GFX10-NEXT: s_load_dword s8, s[6:7], 0x8
4629 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4630 ; GFX10-NEXT: s_mov_b32 s2, 0
4631 ; GFX10-NEXT: s_mov_b32 s3, 0x40080000
4632 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
4633 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4634 ; GFX10-NEXT: s_cmp_eq_u32 s8, 1
4635 ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4636 ; GFX10-NEXT: s_cmp_eq_u32 s8, 2
4637 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4638 ; GFX10-NEXT: s_cmp_eq_u32 s8, 3
4639 ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4640 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
4641 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
4642 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
4643 ; GFX10-NEXT: s_endpgm
4645 ; GFX11-LABEL: dyn_extract_v4f64_s_s_s:
4646 ; GFX11: .amd_kernel_code_t
4647 ; GFX11-NEXT: amd_code_version_major = 1
4648 ; GFX11-NEXT: amd_code_version_minor = 2
4649 ; GFX11-NEXT: amd_machine_kind = 1
4650 ; GFX11-NEXT: amd_machine_version_major = 11
4651 ; GFX11-NEXT: amd_machine_version_minor = 0
4652 ; GFX11-NEXT: amd_machine_version_stepping = 0
4653 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256
4654 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
4655 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0
4656 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
4657 ; GFX11-NEXT: priority = 0
4658 ; GFX11-NEXT: float_mode = 240
4659 ; GFX11-NEXT: priv = 0
4660 ; GFX11-NEXT: enable_dx10_clamp = 1
4661 ; GFX11-NEXT: debug_mode = 0
4662 ; GFX11-NEXT: enable_ieee_mode = 1
4663 ; GFX11-NEXT: enable_wgp_mode = 1
4664 ; GFX11-NEXT: enable_mem_ordered = 1
4665 ; GFX11-NEXT: enable_fwd_progress = 0
4666 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
4667 ; GFX11-NEXT: user_sgpr_count = 13
4668 ; GFX11-NEXT: enable_trap_handler = 0
4669 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1
4670 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 1
4671 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 1
4672 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0
4673 ; GFX11-NEXT: enable_vgpr_workitem_id = 2
4674 ; GFX11-NEXT: enable_exception_msb = 0
4675 ; GFX11-NEXT: granulated_lds_size = 0
4676 ; GFX11-NEXT: enable_exception = 0
4677 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0
4678 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 1
4679 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0
4680 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1
4681 ; GFX11-NEXT: enable_sgpr_dispatch_id = 1
4682 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0
4683 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0
4684 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0
4685 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0
4686 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0
4687 ; GFX11-NEXT: enable_wavefront_size32 = 1
4688 ; GFX11-NEXT: enable_ordered_append_gds = 0
4689 ; GFX11-NEXT: private_element_size = 1
4690 ; GFX11-NEXT: is_ptr64 = 1
4691 ; GFX11-NEXT: is_dynamic_callstack = 0
4692 ; GFX11-NEXT: is_debug_enabled = 0
4693 ; GFX11-NEXT: is_xnack_enabled = 0
4694 ; GFX11-NEXT: workitem_private_segment_byte_size = 0
4695 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0
4696 ; GFX11-NEXT: gds_segment_byte_size = 0
4697 ; GFX11-NEXT: kernarg_segment_byte_size = 28
4698 ; GFX11-NEXT: workgroup_fbarrier_count = 0
4699 ; GFX11-NEXT: wavefront_sgpr_count = 7
4700 ; GFX11-NEXT: workitem_vgpr_count = 3
4701 ; GFX11-NEXT: reserved_vgpr_first = 0
4702 ; GFX11-NEXT: reserved_vgpr_count = 0
4703 ; GFX11-NEXT: reserved_sgpr_first = 0
4704 ; GFX11-NEXT: reserved_sgpr_count = 0
4705 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
4706 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0
4707 ; GFX11-NEXT: kernarg_segment_alignment = 4
4708 ; GFX11-NEXT: group_segment_alignment = 4
4709 ; GFX11-NEXT: private_segment_alignment = 4
4710 ; GFX11-NEXT: wavefront_size = 5
4711 ; GFX11-NEXT: call_convention = -1
4712 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0
4713 ; GFX11-NEXT: .end_amd_kernel_code_t
4714 ; GFX11-NEXT: ; %bb.0: ; %entry
4715 ; GFX11-NEXT: s_clause 0x1
4716 ; GFX11-NEXT: s_load_b32 s6, s[2:3], 0x8
4717 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
4718 ; GFX11-NEXT: s_mov_b32 s2, 0
4719 ; GFX11-NEXT: s_mov_b32 s3, 0x40080000
4720 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
4721 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4722 ; GFX11-NEXT: s_cmp_eq_u32 s6, 1
4723 ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
4724 ; GFX11-NEXT: s_cmp_eq_u32 s6, 2
4725 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
4726 ; GFX11-NEXT: s_cmp_eq_u32 s6, 3
4727 ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
4728 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
4729 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
4730 ; GFX11-NEXT: s_nop 0
4731 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4732 ; GFX11-NEXT: s_endpgm
4734 %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
4735 store double %ext, ptr addrspace(1) %out
4739 define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) {
4740 ; GPRIDX-LABEL: v_extract_v64i32_7:
4742 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4743 ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:28
4744 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4745 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4747 ; MOVREL-LABEL: v_extract_v64i32_7:
4749 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4750 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 28, v0
4751 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4752 ; MOVREL-NEXT: flat_load_dword v0, v[0:1]
4753 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4754 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4756 ; GFX10-LABEL: v_extract_v64i32_7:
4758 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4759 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:28
4760 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4761 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4763 ; GFX11-LABEL: v_extract_v64i32_7:
4765 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4766 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:28
4767 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4768 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4769 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4770 %elt = extractelement <64 x i32> %vec, i32 7
4774 define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) {
4775 ; GPRIDX-LABEL: v_extract_v64i32_32:
4777 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4778 ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:128
4779 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4780 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4782 ; MOVREL-LABEL: v_extract_v64i32_32:
4784 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4785 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0
4786 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4787 ; MOVREL-NEXT: flat_load_dword v0, v[0:1]
4788 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4789 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4791 ; GFX10-LABEL: v_extract_v64i32_32:
4793 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4794 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:128
4795 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4796 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4798 ; GFX11-LABEL: v_extract_v64i32_32:
4800 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4801 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:128
4802 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4803 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4804 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4805 %elt = extractelement <64 x i32> %vec, i32 32
4809 define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) {
4810 ; GPRIDX-LABEL: v_extract_v64i32_33:
4812 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4813 ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:132
4814 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4815 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4817 ; MOVREL-LABEL: v_extract_v64i32_33:
4819 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4820 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x84, v0
4821 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4822 ; MOVREL-NEXT: flat_load_dword v0, v[0:1]
4823 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4824 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4826 ; GFX10-LABEL: v_extract_v64i32_33:
4828 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4829 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:132
4830 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4831 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4833 ; GFX11-LABEL: v_extract_v64i32_33:
4835 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4836 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:132
4837 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4838 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4839 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4840 %elt = extractelement <64 x i32> %vec, i32 33
4844 define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) {
4845 ; GPRIDX-LABEL: v_extract_v64i32_37:
4847 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4848 ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:148
4849 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
4850 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
4852 ; MOVREL-LABEL: v_extract_v64i32_37:
4854 ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4855 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x94, v0
4856 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4857 ; MOVREL-NEXT: flat_load_dword v0, v[0:1]
4858 ; MOVREL-NEXT: s_waitcnt vmcnt(0)
4859 ; MOVREL-NEXT: s_setpc_b64 s[30:31]
4861 ; GFX10-LABEL: v_extract_v64i32_37:
4863 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4864 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:148
4865 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4866 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4868 ; GFX11-LABEL: v_extract_v64i32_37:
4870 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4871 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:148
4872 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4873 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4874 %vec = load <64 x i32>, ptr addrspace(1) %ptr
4875 %elt = extractelement <64 x i32> %vec, i32 37