1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
5 ; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
7 ; FIXME: Need constant bus fixup pre-gfx10 for movrel
8 ; ERR: Bad machine code: VOP* instruction violates constant bus restriction
10 define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
11 ; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s:
12 ; GPRIDX: ; %bb.0: ; %entry
13 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0
14 ; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2
15 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1
16 ; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3
17 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2
18 ; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4
19 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3
20 ; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5
21 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4
22 ; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6
23 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5
24 ; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7
25 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6
26 ; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8
27 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7
28 ; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9
29 ; GPRIDX-NEXT: ; return to shader part epilog
31 ; GFX10PLUS-LABEL: dyn_insertelement_v8i32_s_s_s:
32 ; GFX10PLUS: ; %bb.0: ; %entry
33 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
34 ; GFX10PLUS-NEXT: s_mov_b32 m0, s11
35 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
36 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
37 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
38 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
39 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
40 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
41 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
42 ; GFX10PLUS-NEXT: s_movreld_b32 s0, s10
43 ; GFX10PLUS-NEXT: ; return to shader part epilog
45 %insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx
49 define amdgpu_ps <8 x ptr addrspace(3)> @dyn_insertelement_v8p3i8_s_s_s(<8 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) {
50 ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s:
51 ; GPRIDX: ; %bb.0: ; %entry
52 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0
53 ; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2
54 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1
55 ; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3
56 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2
57 ; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4
58 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3
59 ; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5
60 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4
61 ; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6
62 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5
63 ; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7
64 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6
65 ; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8
66 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7
67 ; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9
68 ; GPRIDX-NEXT: ; return to shader part epilog
70 ; GFX10PLUS-LABEL: dyn_insertelement_v8p3i8_s_s_s:
71 ; GFX10PLUS: ; %bb.0: ; %entry
72 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
73 ; GFX10PLUS-NEXT: s_mov_b32 m0, s11
74 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
75 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
76 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
77 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
78 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
79 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
80 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
81 ; GFX10PLUS-NEXT: s_movreld_b32 s0, s10
82 ; GFX10PLUS-NEXT: ; return to shader part epilog
84 %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
85 ret <8 x ptr addrspace(3)> %insert
88 define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
89 ; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v:
90 ; GPRIDX: ; %bb.0: ; %entry
91 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
93 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc
94 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
95 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0x40400000
96 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc
97 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
98 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
99 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
100 ; GPRIDX-NEXT: v_mov_b32_e32 v4, 0x40a00000
101 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc
102 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
103 ; GPRIDX-NEXT: v_mov_b32_e32 v5, 0x40c00000
104 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
105 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
106 ; GPRIDX-NEXT: v_mov_b32_e32 v6, 0x40e00000
107 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc
108 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
109 ; GPRIDX-NEXT: v_mov_b32_e32 v7, 0x41000000
110 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
111 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
112 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
113 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
114 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v9
115 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
117 ; GFX10-LABEL: dyn_insertelement_v8f32_const_s_v_v:
118 ; GFX10: ; %bb.0: ; %entry
119 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
121 ; GFX10-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo
122 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
123 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo
124 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
125 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo
126 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
127 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo
128 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
129 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo
130 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
131 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo
132 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
133 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo
134 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
135 ; GFX10-NEXT: v_mov_b32_e32 v1, v9
136 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x41000000, v0, vcc_lo
137 ; GFX10-NEXT: v_mov_b32_e32 v0, v8
138 ; GFX10-NEXT: s_setpc_b64 s[30:31]
140 ; GFX11-LABEL: dyn_insertelement_v8f32_const_s_v_v:
141 ; GFX11: ; %bb.0: ; %entry
142 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
144 ; GFX11-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo
145 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
146 ; GFX11-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo
147 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
148 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo
149 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
150 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo
151 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
152 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo
153 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
154 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo
155 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
156 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo
157 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
158 ; GFX11-NEXT: v_mov_b32_e32 v1, v9
159 ; GFX11-NEXT: v_dual_cndmask_b32 v7, 0x41000000, v0 :: v_dual_mov_b32 v0, v8
160 ; GFX11-NEXT: s_setpc_b64 s[30:31]
162 %insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx
163 ret <8 x float> %insert
166 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) {
167 ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v:
168 ; GPRIDX: ; %bb.0: ; %entry
169 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s2
170 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
171 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
172 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s3
173 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v1, v10, vcc
174 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
175 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4
176 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v10, vcc
177 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
178 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5
179 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v10, vcc
180 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
181 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s6
182 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v10, vcc
183 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
184 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s7
185 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v10, vcc
186 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
187 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s8
188 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v10, vcc
189 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
190 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
191 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v7, v10, vcc
192 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
193 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v10, vcc
194 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
195 ; GPRIDX-NEXT: ; return to shader part epilog
197 ; GFX10-LABEL: dyn_insertelement_v8f32_s_s_v:
198 ; GFX10: ; %bb.0: ; %entry
199 ; GFX10-NEXT: v_mov_b32_e32 v7, s10
200 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
201 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v7, vcc_lo
202 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
203 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s3, v7, vcc_lo
204 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
205 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v7, vcc_lo
206 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
207 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v7, vcc_lo
208 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
209 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v7, vcc_lo
210 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
211 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v7, vcc_lo
212 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
213 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v7, vcc_lo
214 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
215 ; GFX10-NEXT: v_mov_b32_e32 v0, v8
216 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v7, vcc_lo
217 ; GFX10-NEXT: ; return to shader part epilog
219 ; GFX11-LABEL: dyn_insertelement_v8f32_s_s_v:
220 ; GFX11: ; %bb.0: ; %entry
221 ; GFX11-NEXT: v_mov_b32_e32 v7, s10
222 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
223 ; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v7, vcc_lo
224 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
225 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s3, v7, vcc_lo
226 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
227 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v7, vcc_lo
228 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
229 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v7, vcc_lo
230 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
231 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v7, vcc_lo
232 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
233 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v7, vcc_lo
234 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
235 ; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v7, vcc_lo
236 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
237 ; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_cndmask_b32 v7, s9, v7
238 ; GFX11-NEXT: ; return to shader part epilog
240 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
241 ret <8 x float> %insert
244 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) {
245 ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s:
246 ; GPRIDX: ; %bb.0: ; %entry
247 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s2
248 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 0
249 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s3
250 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v1, v0, vcc
251 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 1
252 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4
253 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
254 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 2
255 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5
256 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
257 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 3
258 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s6
259 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc
260 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 4
261 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s7
262 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc
263 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 5
264 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s8
265 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v0, vcc
266 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 6
267 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
268 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v7, v0, vcc
269 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 7
270 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc
271 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
272 ; GPRIDX-NEXT: ; return to shader part epilog
274 ; GFX10-LABEL: dyn_insertelement_v8f32_s_v_s:
275 ; GFX10: ; %bb.0: ; %entry
276 ; GFX10-NEXT: s_mov_b32 s0, s2
277 ; GFX10-NEXT: s_mov_b32 s1, s3
278 ; GFX10-NEXT: s_mov_b32 s2, s4
279 ; GFX10-NEXT: s_mov_b32 s3, s5
280 ; GFX10-NEXT: s_mov_b32 s4, s6
281 ; GFX10-NEXT: s_mov_b32 s5, s7
282 ; GFX10-NEXT: s_mov_b32 s6, s8
283 ; GFX10-NEXT: s_mov_b32 s7, s9
284 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
285 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
286 ; GFX10-NEXT: s_mov_b32 m0, s10
287 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
288 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
289 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
290 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
291 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
292 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
293 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
294 ; GFX10-NEXT: v_movreld_b32_e32 v0, v8
295 ; GFX10-NEXT: ; return to shader part epilog
297 ; GFX11-LABEL: dyn_insertelement_v8f32_s_v_s:
298 ; GFX11: ; %bb.0: ; %entry
299 ; GFX11-NEXT: s_mov_b32 s0, s2
300 ; GFX11-NEXT: s_mov_b32 s1, s3
301 ; GFX11-NEXT: s_mov_b32 s2, s4
302 ; GFX11-NEXT: s_mov_b32 s3, s5
303 ; GFX11-NEXT: s_mov_b32 s4, s6
304 ; GFX11-NEXT: s_mov_b32 s5, s7
305 ; GFX11-NEXT: s_mov_b32 s6, s8
306 ; GFX11-NEXT: s_mov_b32 s7, s9
307 ; GFX11-NEXT: v_mov_b32_e32 v8, v0
308 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
309 ; GFX11-NEXT: s_mov_b32 m0, s10
310 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
311 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
312 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
313 ; GFX11-NEXT: v_movreld_b32_e32 v0, v8
314 ; GFX11-NEXT: ; return to shader part epilog
316 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
317 ret <8 x float> %insert
320 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) {
321 ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s:
322 ; GPRIDX: ; %bb.0: ; %entry
323 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s2
324 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
325 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
326 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1
327 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
328 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 2
329 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
330 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 3
331 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
332 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 4
333 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
334 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 5
335 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
336 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 6
337 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
338 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 7
339 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
340 ; GPRIDX-NEXT: ; return to shader part epilog
342 ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_s:
343 ; GFX10PLUS: ; %bb.0: ; %entry
344 ; GFX10PLUS-NEXT: s_mov_b32 m0, s3
345 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, s2
346 ; GFX10PLUS-NEXT: ; return to shader part epilog
348 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
349 ret <8 x float> %insert
352 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) {
353 ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v:
354 ; GPRIDX: ; %bb.0: ; %entry
355 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
356 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
357 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
358 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v2, v0, vcc
359 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
360 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
361 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v3, v0, vcc
362 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
363 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
364 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
365 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
366 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
367 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc
368 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
369 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
370 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc
371 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
372 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
373 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
374 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
375 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
376 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v10, v0, vcc
377 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
378 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v11, v0, vcc
379 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
380 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v9
381 ; GPRIDX-NEXT: ; return to shader part epilog
383 ; GFX10-LABEL: dyn_insertelement_v8f32_s_v_v:
384 ; GFX10: ; %bb.0: ; %entry
385 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
386 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo
387 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
388 ; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo
389 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
390 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
391 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
392 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
393 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
394 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
395 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
396 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
397 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
398 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
399 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
400 ; GFX10-NEXT: v_mov_b32_e32 v1, v9
401 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
402 ; GFX10-NEXT: v_mov_b32_e32 v0, v8
403 ; GFX10-NEXT: ; return to shader part epilog
405 ; GFX11-LABEL: dyn_insertelement_v8f32_s_v_v:
406 ; GFX11: ; %bb.0: ; %entry
407 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
408 ; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo
409 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
410 ; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo
411 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
412 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
413 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
414 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
415 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
416 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
417 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
418 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
419 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
420 ; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
421 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
422 ; GFX11-NEXT: v_mov_b32_e32 v1, v9
423 ; GFX11-NEXT: v_dual_cndmask_b32 v7, s9, v0 :: v_dual_mov_b32 v0, v8
424 ; GFX11-NEXT: ; return to shader part epilog
426 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
427 ret <8 x float> %insert
430 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) {
431 ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v:
432 ; GPRIDX: ; %bb.0: ; %entry
433 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s2
434 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
435 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
436 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
437 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
438 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
439 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc
440 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
441 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
442 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
443 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
444 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
445 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
446 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
447 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc
448 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8
449 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
450 ; GPRIDX-NEXT: ; return to shader part epilog
452 ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_v:
453 ; GFX10PLUS: ; %bb.0: ; %entry
454 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
455 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo
456 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
457 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo
458 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
459 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo
460 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
461 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo
462 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
463 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo
464 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
465 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v5, v5, s2, vcc_lo
466 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
467 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v6, v6, s2, vcc_lo
468 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8
469 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v7, v7, s2, vcc_lo
470 ; GFX10PLUS-NEXT: ; return to shader part epilog
472 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
473 ret <8 x float> %insert
476 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) {
477 ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s:
478 ; GPRIDX: ; %bb.0: ; %entry
479 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
480 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
481 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
482 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
483 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
484 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
485 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
486 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
487 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
488 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
489 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
490 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
491 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
492 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
493 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7
494 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
495 ; GPRIDX-NEXT: ; return to shader part epilog
497 ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_s:
498 ; GFX10PLUS: ; %bb.0: ; %entry
499 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
500 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v8
501 ; GFX10PLUS-NEXT: ; return to shader part epilog
503 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
504 ret <8 x float> %insert
507 define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 inreg %idx) {
508 ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s:
509 ; GPRIDX: ; %bb.0: ; %entry
510 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
511 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
512 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
513 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
514 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
515 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
516 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
517 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
518 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
519 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
520 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
521 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
522 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
523 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
524 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7
525 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
526 ; GPRIDX-NEXT: ; return to shader part epilog
528 ; GFX10PLUS-LABEL: dyn_insertelement_v8p3i8_v_v_s:
529 ; GFX10PLUS: ; %bb.0: ; %entry
530 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
531 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v8
532 ; GFX10PLUS-NEXT: ; return to shader part epilog
534 %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
535 %cast.0 = ptrtoint <8 x ptr addrspace(3)> %insert to <8 x i32>
536 %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float>
537 ret <8 x float> %cast.1
540 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) {
541 ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v:
542 ; GPRIDX: ; %bb.0: ; %entry
543 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
544 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
545 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9
546 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
547 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9
548 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
549 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9
550 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
551 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9
552 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
553 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9
554 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
555 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9
556 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
557 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9
558 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
559 ; GPRIDX-NEXT: ; return to shader part epilog
561 ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v:
562 ; GFX10PLUS: ; %bb.0: ; %entry
563 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
564 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
565 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
566 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo
567 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9
568 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo
569 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9
570 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo
571 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9
572 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo
573 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9
574 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo
575 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9
576 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo
577 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9
578 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo
579 ; GFX10PLUS-NEXT: ; return to shader part epilog
581 %insert = insertelement <8 x float> %vec, float %val, i32 %idx
582 ret <8 x float> %insert
585 define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
586 ; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s:
587 ; GPRIDX: ; %bb.0: ; %entry
588 ; GPRIDX-NEXT: s_mov_b32 s0, s2
589 ; GPRIDX-NEXT: s_mov_b32 s1, s3
590 ; GPRIDX-NEXT: s_mov_b32 s2, s4
591 ; GPRIDX-NEXT: s_mov_b32 s3, s5
592 ; GPRIDX-NEXT: s_mov_b32 s4, s6
593 ; GPRIDX-NEXT: s_mov_b32 s5, s7
594 ; GPRIDX-NEXT: s_mov_b32 s6, s8
595 ; GPRIDX-NEXT: s_mov_b32 s7, s9
596 ; GPRIDX-NEXT: s_mov_b32 s8, s10
597 ; GPRIDX-NEXT: s_mov_b32 s9, s11
598 ; GPRIDX-NEXT: s_mov_b32 s10, s12
599 ; GPRIDX-NEXT: s_mov_b32 s11, s13
600 ; GPRIDX-NEXT: s_mov_b32 s12, s14
601 ; GPRIDX-NEXT: s_mov_b32 s13, s15
602 ; GPRIDX-NEXT: s_mov_b32 s14, s16
603 ; GPRIDX-NEXT: s_mov_b32 s15, s17
604 ; GPRIDX-NEXT: s_mov_b32 m0, s20
605 ; GPRIDX-NEXT: s_nop 0
606 ; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19]
607 ; GPRIDX-NEXT: ; return to shader part epilog
609 ; GFX10PLUS-LABEL: dyn_insertelement_v8i64_s_s_s:
610 ; GFX10PLUS: ; %bb.0: ; %entry
611 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
612 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
613 ; GFX10PLUS-NEXT: s_mov_b32 m0, s20
614 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
615 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
616 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
617 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
618 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
619 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
620 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
621 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
622 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
623 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
624 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
625 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
626 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
627 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
628 ; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[18:19]
629 ; GFX10PLUS-NEXT: ; return to shader part epilog
631 %insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx
632 ret <8 x i64> %insert
635 define amdgpu_ps <8 x ptr addrspace(1)> @dyn_insertelement_v8p1i8_s_s_s(<8 x ptr addrspace(1)> inreg %vec, ptr addrspace(1) inreg %val, i32 inreg %idx) {
636 ; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s:
637 ; GPRIDX: ; %bb.0: ; %entry
638 ; GPRIDX-NEXT: s_mov_b32 s0, s2
639 ; GPRIDX-NEXT: s_mov_b32 s1, s3
640 ; GPRIDX-NEXT: s_mov_b32 s2, s4
641 ; GPRIDX-NEXT: s_mov_b32 s3, s5
642 ; GPRIDX-NEXT: s_mov_b32 s4, s6
643 ; GPRIDX-NEXT: s_mov_b32 s5, s7
644 ; GPRIDX-NEXT: s_mov_b32 s6, s8
645 ; GPRIDX-NEXT: s_mov_b32 s7, s9
646 ; GPRIDX-NEXT: s_mov_b32 s8, s10
647 ; GPRIDX-NEXT: s_mov_b32 s9, s11
648 ; GPRIDX-NEXT: s_mov_b32 s10, s12
649 ; GPRIDX-NEXT: s_mov_b32 s11, s13
650 ; GPRIDX-NEXT: s_mov_b32 s12, s14
651 ; GPRIDX-NEXT: s_mov_b32 s13, s15
652 ; GPRIDX-NEXT: s_mov_b32 s14, s16
653 ; GPRIDX-NEXT: s_mov_b32 s15, s17
654 ; GPRIDX-NEXT: s_mov_b32 m0, s20
655 ; GPRIDX-NEXT: s_nop 0
656 ; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19]
657 ; GPRIDX-NEXT: ; return to shader part epilog
659 ; GFX10PLUS-LABEL: dyn_insertelement_v8p1i8_s_s_s:
660 ; GFX10PLUS: ; %bb.0: ; %entry
661 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
662 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
663 ; GFX10PLUS-NEXT: s_mov_b32 m0, s20
664 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
665 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
666 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
667 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
668 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
669 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
670 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
671 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
672 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
673 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
674 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
675 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
676 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
677 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
678 ; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[18:19]
679 ; GFX10PLUS-NEXT: ; return to shader part epilog
681 %insert = insertelement <8 x ptr addrspace(1)> %vec, ptr addrspace(1) %val, i32 %idx
682 ret <8 x ptr addrspace(1)> %insert
685 define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
686 ; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v:
687 ; GPRIDX: ; %bb.0: ; %entry
688 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689 ; GPRIDX-NEXT: s_mov_b32 s18, 0
690 ; GPRIDX-NEXT: s_mov_b32 s16, 0
691 ; GPRIDX-NEXT: s_mov_b32 s14, 0
692 ; GPRIDX-NEXT: s_mov_b32 s12, 0
693 ; GPRIDX-NEXT: s_mov_b32 s8, 0
694 ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0
695 ; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000
696 ; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000
697 ; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000
698 ; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000
699 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0
700 ; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000
701 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0
702 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4
703 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5
704 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s6
705 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s7
706 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s8
707 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s9
708 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s10
709 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s11
710 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s12
711 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s13
712 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s14
713 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s15
714 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s16
715 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s17
716 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s18
717 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s19
718 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
719 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v2
720 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v2
721 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 3, v2
722 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 4, v2
723 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 5, v2
724 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v2
725 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[14:15], 7, v2
726 ; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[16:17]
727 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc
728 ; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[16:17]
729 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
730 ; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5]
731 ; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[6:7]
732 ; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[8:9]
733 ; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[10:11]
734 ; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[12:13]
735 ; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[14:15]
736 ; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[4:5]
737 ; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[6:7]
738 ; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[8:9]
739 ; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[10:11]
740 ; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[12:13]
741 ; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[14:15]
742 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
743 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
744 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
745 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
746 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
747 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
748 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
749 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
750 ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
752 ; GFX10-LABEL: dyn_insertelement_v8f64_const_s_v_v:
753 ; GFX10: ; %bb.0: ; %entry
754 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755 ; GFX10-NEXT: s_mov_b64 s[4:5], 1.0
756 ; GFX10-NEXT: s_mov_b32 s18, 0
757 ; GFX10-NEXT: s_mov_b32 s16, 0
758 ; GFX10-NEXT: s_mov_b32 s14, 0
759 ; GFX10-NEXT: s_mov_b32 s12, 0
760 ; GFX10-NEXT: s_mov_b32 s8, 0
761 ; GFX10-NEXT: s_mov_b32 s19, 0x40200000
762 ; GFX10-NEXT: s_mov_b32 s17, 0x401c0000
763 ; GFX10-NEXT: s_mov_b32 s15, 0x40180000
764 ; GFX10-NEXT: s_mov_b32 s13, 0x40140000
765 ; GFX10-NEXT: s_mov_b64 s[10:11], 4.0
766 ; GFX10-NEXT: s_mov_b32 s9, 0x40080000
767 ; GFX10-NEXT: s_mov_b64 s[6:7], 2.0
768 ; GFX10-NEXT: v_mov_b32_e32 v3, s4
769 ; GFX10-NEXT: v_mov_b32_e32 v4, s5
770 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
771 ; GFX10-NEXT: v_mov_b32_e32 v5, s6
772 ; GFX10-NEXT: v_mov_b32_e32 v6, s7
773 ; GFX10-NEXT: v_mov_b32_e32 v7, s8
774 ; GFX10-NEXT: v_mov_b32_e32 v8, s9
775 ; GFX10-NEXT: v_mov_b32_e32 v9, s10
776 ; GFX10-NEXT: v_mov_b32_e32 v10, s11
777 ; GFX10-NEXT: v_mov_b32_e32 v11, s12
778 ; GFX10-NEXT: v_mov_b32_e32 v12, s13
779 ; GFX10-NEXT: v_mov_b32_e32 v13, s14
780 ; GFX10-NEXT: v_mov_b32_e32 v14, s15
781 ; GFX10-NEXT: v_mov_b32_e32 v15, s16
782 ; GFX10-NEXT: v_mov_b32_e32 v16, s17
783 ; GFX10-NEXT: v_mov_b32_e32 v17, s18
784 ; GFX10-NEXT: v_mov_b32_e32 v18, s19
785 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v2
786 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo
787 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo
788 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
789 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 7, v2
790 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s4
791 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s4
792 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 3, v2
793 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo
794 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo
795 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2
796 ; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s5
797 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s4
798 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s4
799 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 5, v2
800 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo
801 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v1, vcc_lo
802 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2
803 ; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v1, s5
804 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v0, s4
805 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v1, s4
806 ; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v0, vcc_lo
807 ; GFX10-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc_lo
808 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
809 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
810 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
811 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
812 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
813 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
814 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
815 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
816 ; GFX10-NEXT: s_setpc_b64 s[30:31]
818 ; GFX11-LABEL: dyn_insertelement_v8f64_const_s_v_v:
819 ; GFX11: ; %bb.0: ; %entry
820 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
821 ; GFX11-NEXT: s_mov_b32 s14, 0
822 ; GFX11-NEXT: s_mov_b32 s15, 0x40200000
823 ; GFX11-NEXT: s_mov_b32 s12, 0
824 ; GFX11-NEXT: s_mov_b32 s10, 0
825 ; GFX11-NEXT: s_mov_b32 s8, 0
826 ; GFX11-NEXT: s_mov_b32 s4, 0
827 ; GFX11-NEXT: s_mov_b64 s[0:1], 1.0
828 ; GFX11-NEXT: s_mov_b32 s13, 0x401c0000
829 ; GFX11-NEXT: s_mov_b32 s11, 0x40180000
830 ; GFX11-NEXT: s_mov_b32 s9, 0x40140000
831 ; GFX11-NEXT: s_mov_b64 s[6:7], 4.0
832 ; GFX11-NEXT: s_mov_b32 s5, 0x40080000
833 ; GFX11-NEXT: s_mov_b64 s[2:3], 2.0
834 ; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14
835 ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
836 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
837 ; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12
838 ; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10
839 ; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
840 ; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
841 ; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
842 ; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
843 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
844 ; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
845 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
846 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v2
847 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
848 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
849 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
850 ; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1
851 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2
852 ; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s1
853 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0
854 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0
855 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2
856 ; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v12, v12, v1
857 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2
858 ; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v1, s1
859 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v0, s0
860 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v1, s0
861 ; GFX11-NEXT: v_dual_cndmask_b32 v15, v15, v0 :: v_dual_cndmask_b32 v16, v16, v1
862 ; GFX11-NEXT: global_store_b128 v[0:1], v[3:6], off dlc
863 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
864 ; GFX11-NEXT: global_store_b128 v[0:1], v[7:10], off dlc
865 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
866 ; GFX11-NEXT: global_store_b128 v[0:1], v[11:14], off dlc
867 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
868 ; GFX11-NEXT: global_store_b128 v[0:1], v[15:18], off dlc
869 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
870 ; GFX11-NEXT: s_setpc_b64 s[30:31]
872 %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
873 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
874 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
875 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
876 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
877 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
878 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
879 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
880 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
884 define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) {
885 ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v:
886 ; GPRIDX: ; %bb.0: ; %entry
887 ; GPRIDX-NEXT: s_mov_b32 s1, s3
888 ; GPRIDX-NEXT: s_mov_b32 s3, s5
889 ; GPRIDX-NEXT: s_mov_b32 s5, s7
890 ; GPRIDX-NEXT: s_mov_b32 s7, s9
891 ; GPRIDX-NEXT: s_mov_b32 s9, s11
892 ; GPRIDX-NEXT: s_mov_b32 s11, s13
893 ; GPRIDX-NEXT: s_mov_b32 s13, s15
894 ; GPRIDX-NEXT: s_mov_b32 s15, s17
895 ; GPRIDX-NEXT: s_mov_b32 s0, s2
896 ; GPRIDX-NEXT: s_mov_b32 s2, s4
897 ; GPRIDX-NEXT: s_mov_b32 s4, s6
898 ; GPRIDX-NEXT: s_mov_b32 s6, s8
899 ; GPRIDX-NEXT: s_mov_b32 s8, s10
900 ; GPRIDX-NEXT: s_mov_b32 s10, s12
901 ; GPRIDX-NEXT: s_mov_b32 s12, s14
902 ; GPRIDX-NEXT: s_mov_b32 s14, s16
903 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s15
904 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s14
905 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s13
906 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s12
907 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s11
908 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s10
909 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s9
910 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s8
911 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s7
912 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s6
913 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s5
914 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s4
915 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s3
916 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s2
917 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s1
918 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s0
919 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s18
920 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
921 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
922 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
923 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v0
924 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0
925 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0
926 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
927 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
928 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s19
929 ; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[12:13]
930 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc
931 ; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[12:13]
932 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
933 ; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[0:1]
934 ; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[2:3]
935 ; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[4:5]
936 ; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[6:7]
937 ; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[8:9]
938 ; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11]
939 ; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[0:1]
940 ; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3]
941 ; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5]
942 ; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[6:7]
943 ; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v0, s[8:9]
944 ; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v0, s[10:11]
945 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off
946 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
947 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off
948 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
949 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
950 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
951 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off
952 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
953 ; GPRIDX-NEXT: s_endpgm
955 ; GFX10-LABEL: dyn_insertelement_v8f64_s_s_v:
956 ; GFX10: ; %bb.0: ; %entry
957 ; GFX10-NEXT: s_mov_b32 s1, s3
958 ; GFX10-NEXT: s_mov_b32 s3, s5
959 ; GFX10-NEXT: s_mov_b32 s5, s7
960 ; GFX10-NEXT: s_mov_b32 s7, s9
961 ; GFX10-NEXT: s_mov_b32 s9, s11
962 ; GFX10-NEXT: s_mov_b32 s11, s13
963 ; GFX10-NEXT: s_mov_b32 s13, s15
964 ; GFX10-NEXT: s_mov_b32 s15, s17
965 ; GFX10-NEXT: s_mov_b32 s0, s2
966 ; GFX10-NEXT: s_mov_b32 s2, s4
967 ; GFX10-NEXT: s_mov_b32 s4, s6
968 ; GFX10-NEXT: s_mov_b32 s6, s8
969 ; GFX10-NEXT: s_mov_b32 s8, s10
970 ; GFX10-NEXT: s_mov_b32 s10, s12
971 ; GFX10-NEXT: s_mov_b32 s12, s14
972 ; GFX10-NEXT: s_mov_b32 s14, s16
973 ; GFX10-NEXT: v_mov_b32_e32 v16, s15
974 ; GFX10-NEXT: v_mov_b32_e32 v2, s1
975 ; GFX10-NEXT: v_mov_b32_e32 v1, s0
976 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
977 ; GFX10-NEXT: v_mov_b32_e32 v15, s14
978 ; GFX10-NEXT: v_mov_b32_e32 v14, s13
979 ; GFX10-NEXT: v_mov_b32_e32 v13, s12
980 ; GFX10-NEXT: v_mov_b32_e32 v12, s11
981 ; GFX10-NEXT: v_mov_b32_e32 v11, s10
982 ; GFX10-NEXT: v_mov_b32_e32 v10, s9
983 ; GFX10-NEXT: v_mov_b32_e32 v9, s8
984 ; GFX10-NEXT: v_mov_b32_e32 v8, s7
985 ; GFX10-NEXT: v_mov_b32_e32 v7, s6
986 ; GFX10-NEXT: v_mov_b32_e32 v6, s5
987 ; GFX10-NEXT: v_mov_b32_e32 v5, s4
988 ; GFX10-NEXT: v_mov_b32_e32 v4, s3
989 ; GFX10-NEXT: v_mov_b32_e32 v3, s2
990 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
991 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s18, vcc_lo
992 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s19, vcc_lo
993 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
994 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v0
995 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s18, s0
996 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s19, s0
997 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v0
998 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s18, vcc_lo
999 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s19, vcc_lo
1000 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
1001 ; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, s18, s1
1002 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s18, s0
1003 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s19, s0
1004 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v0
1005 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, s18, vcc_lo
1006 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, s19, vcc_lo
1007 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
1008 ; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, s19, s1
1009 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, s18, s0
1010 ; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, s19, s0
1011 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, s18, vcc_lo
1012 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, s19, vcc_lo
1013 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[1:4], off
1014 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1015 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[5:8], off
1016 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1017 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
1018 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1019 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[13:16], off
1020 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1021 ; GFX10-NEXT: s_endpgm
1023 ; GFX11-LABEL: dyn_insertelement_v8f64_s_s_v:
1024 ; GFX11: ; %bb.0: ; %entry
1025 ; GFX11-NEXT: s_mov_b32 s1, s3
1026 ; GFX11-NEXT: s_mov_b32 s3, s5
1027 ; GFX11-NEXT: s_mov_b32 s5, s7
1028 ; GFX11-NEXT: s_mov_b32 s7, s9
1029 ; GFX11-NEXT: s_mov_b32 s9, s11
1030 ; GFX11-NEXT: s_mov_b32 s11, s13
1031 ; GFX11-NEXT: s_mov_b32 s13, s15
1032 ; GFX11-NEXT: s_mov_b32 s15, s17
1033 ; GFX11-NEXT: s_mov_b32 s0, s2
1034 ; GFX11-NEXT: s_mov_b32 s2, s4
1035 ; GFX11-NEXT: s_mov_b32 s4, s6
1036 ; GFX11-NEXT: s_mov_b32 s6, s8
1037 ; GFX11-NEXT: s_mov_b32 s8, s10
1038 ; GFX11-NEXT: s_mov_b32 s10, s12
1039 ; GFX11-NEXT: s_mov_b32 s12, s14
1040 ; GFX11-NEXT: s_mov_b32 s14, s16
1041 ; GFX11-NEXT: v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14
1042 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
1043 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1044 ; GFX11-NEXT: v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12
1045 ; GFX11-NEXT: v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10
1046 ; GFX11-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8
1047 ; GFX11-NEXT: v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6
1048 ; GFX11-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4
1049 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
1050 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
1051 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s18, vcc_lo
1052 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s19, vcc_lo
1053 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
1054 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v0
1055 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s18, s0
1056 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s19, s0
1057 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v0
1058 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s18, vcc_lo
1059 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s19, vcc_lo
1060 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
1061 ; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, s18, s1
1062 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s18, s0
1063 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s19, s0
1064 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v0
1065 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, s18, vcc_lo
1066 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, s19, vcc_lo
1067 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
1068 ; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, s19, s1
1069 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, s18, s0
1070 ; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, s19, s0
1071 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, s18, vcc_lo
1072 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, s19, vcc_lo
1073 ; GFX11-NEXT: global_store_b128 v[0:1], v[1:4], off dlc
1074 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1075 ; GFX11-NEXT: global_store_b128 v[0:1], v[5:8], off dlc
1076 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1077 ; GFX11-NEXT: global_store_b128 v[0:1], v[9:12], off dlc
1078 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1079 ; GFX11-NEXT: global_store_b128 v[0:1], v[13:16], off dlc
1080 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1081 ; GFX11-NEXT: s_endpgm
1083 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1084 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1085 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1086 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1087 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1088 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1089 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1090 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1091 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1095 define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) {
1096 ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s:
1097 ; GPRIDX: ; %bb.0: ; %entry
1098 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1099 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1100 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1101 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1102 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1103 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1104 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1105 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1106 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1107 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1108 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1109 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1110 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1111 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1112 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1113 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1114 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s15
1115 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s14
1116 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
1117 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
1118 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
1119 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
1120 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
1121 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
1122 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
1123 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
1124 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
1125 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
1126 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
1127 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
1128 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
1129 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
1130 ; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1
1131 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
1132 ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
1133 ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
1134 ; GPRIDX-NEXT: s_set_gpr_idx_off
1135 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1136 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1137 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
1138 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1139 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
1140 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1141 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
1142 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1143 ; GPRIDX-NEXT: s_endpgm
1145 ; GFX10-LABEL: dyn_insertelement_v8f64_s_v_s:
1146 ; GFX10: ; %bb.0: ; %entry
1147 ; GFX10-NEXT: s_mov_b32 s1, s3
1148 ; GFX10-NEXT: s_mov_b32 s3, s5
1149 ; GFX10-NEXT: s_mov_b32 s5, s7
1150 ; GFX10-NEXT: s_mov_b32 s7, s9
1151 ; GFX10-NEXT: s_mov_b32 s9, s11
1152 ; GFX10-NEXT: s_mov_b32 s11, s13
1153 ; GFX10-NEXT: s_mov_b32 s13, s15
1154 ; GFX10-NEXT: s_mov_b32 s15, s17
1155 ; GFX10-NEXT: s_mov_b32 s0, s2
1156 ; GFX10-NEXT: s_mov_b32 s2, s4
1157 ; GFX10-NEXT: s_mov_b32 s4, s6
1158 ; GFX10-NEXT: s_mov_b32 s6, s8
1159 ; GFX10-NEXT: s_mov_b32 s8, s10
1160 ; GFX10-NEXT: s_mov_b32 s10, s12
1161 ; GFX10-NEXT: s_mov_b32 s12, s14
1162 ; GFX10-NEXT: s_mov_b32 s14, s16
1163 ; GFX10-NEXT: v_mov_b32_e32 v17, s15
1164 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
1165 ; GFX10-NEXT: s_lshl_b32 m0, s18, 1
1166 ; GFX10-NEXT: v_mov_b32_e32 v16, s14
1167 ; GFX10-NEXT: v_mov_b32_e32 v15, s13
1168 ; GFX10-NEXT: v_mov_b32_e32 v14, s12
1169 ; GFX10-NEXT: v_mov_b32_e32 v13, s11
1170 ; GFX10-NEXT: v_mov_b32_e32 v12, s10
1171 ; GFX10-NEXT: v_mov_b32_e32 v11, s9
1172 ; GFX10-NEXT: v_mov_b32_e32 v10, s8
1173 ; GFX10-NEXT: v_mov_b32_e32 v9, s7
1174 ; GFX10-NEXT: v_mov_b32_e32 v8, s6
1175 ; GFX10-NEXT: v_mov_b32_e32 v7, s5
1176 ; GFX10-NEXT: v_mov_b32_e32 v6, s4
1177 ; GFX10-NEXT: v_mov_b32_e32 v5, s3
1178 ; GFX10-NEXT: v_mov_b32_e32 v4, s2
1179 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
1180 ; GFX10-NEXT: v_movreld_b32_e32 v2, v0
1181 ; GFX10-NEXT: v_movreld_b32_e32 v3, v1
1182 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1183 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1184 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
1185 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1186 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
1187 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1188 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
1189 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1190 ; GFX10-NEXT: s_endpgm
1192 ; GFX11-LABEL: dyn_insertelement_v8f64_s_v_s:
1193 ; GFX11: ; %bb.0: ; %entry
1194 ; GFX11-NEXT: s_mov_b32 s1, s3
1195 ; GFX11-NEXT: s_mov_b32 s3, s5
1196 ; GFX11-NEXT: s_mov_b32 s5, s7
1197 ; GFX11-NEXT: s_mov_b32 s7, s9
1198 ; GFX11-NEXT: s_mov_b32 s9, s11
1199 ; GFX11-NEXT: s_mov_b32 s11, s13
1200 ; GFX11-NEXT: s_mov_b32 s13, s15
1201 ; GFX11-NEXT: s_mov_b32 s15, s17
1202 ; GFX11-NEXT: s_mov_b32 s0, s2
1203 ; GFX11-NEXT: s_mov_b32 s2, s4
1204 ; GFX11-NEXT: s_mov_b32 s4, s6
1205 ; GFX11-NEXT: s_mov_b32 s6, s8
1206 ; GFX11-NEXT: s_mov_b32 s8, s10
1207 ; GFX11-NEXT: s_mov_b32 s10, s12
1208 ; GFX11-NEXT: s_mov_b32 s12, s14
1209 ; GFX11-NEXT: s_mov_b32 s14, s16
1210 ; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
1211 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1212 ; GFX11-NEXT: s_lshl_b32 m0, s18, 1
1213 ; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
1214 ; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
1215 ; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
1216 ; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
1217 ; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
1218 ; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
1219 ; GFX11-NEXT: v_movreld_b32_e32 v2, v0
1220 ; GFX11-NEXT: v_movreld_b32_e32 v3, v1
1221 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc
1222 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1223 ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc
1224 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1225 ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc
1226 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1227 ; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc
1228 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1229 ; GFX11-NEXT: s_endpgm
1231 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1232 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1233 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1234 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1235 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1236 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1237 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1238 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1239 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1243 define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) {
1244 ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s:
1245 ; GPRIDX: ; %bb.0: ; %entry
1246 ; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1
1247 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
1248 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
1249 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
1250 ; GPRIDX-NEXT: s_set_gpr_idx_off
1251 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1252 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1253 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1254 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1255 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1256 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1257 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1258 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1259 ; GPRIDX-NEXT: s_endpgm
1261 ; GFX10-LABEL: dyn_insertelement_v8f64_v_s_s:
1262 ; GFX10: ; %bb.0: ; %entry
1263 ; GFX10-NEXT: s_lshl_b32 m0, s4, 1
1264 ; GFX10-NEXT: v_movreld_b32_e32 v0, s2
1265 ; GFX10-NEXT: v_movreld_b32_e32 v1, s3
1266 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1267 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1268 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1269 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1270 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1271 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1272 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1273 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1274 ; GFX10-NEXT: s_endpgm
1276 ; GFX11-LABEL: dyn_insertelement_v8f64_v_s_s:
1277 ; GFX11: ; %bb.0: ; %entry
1278 ; GFX11-NEXT: s_lshl_b32 m0, s4, 1
1279 ; GFX11-NEXT: v_movreld_b32_e32 v0, s2
1280 ; GFX11-NEXT: v_movreld_b32_e32 v1, s3
1281 ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
1282 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1283 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
1284 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1285 ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
1286 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1287 ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
1288 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1289 ; GFX11-NEXT: s_endpgm
1291 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1292 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1293 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1294 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1295 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1296 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1297 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1298 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1299 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1303 define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) {
1304 ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v:
1305 ; GPRIDX: ; %bb.0: ; %entry
1306 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1307 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1308 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1309 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1310 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1311 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1312 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1313 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1314 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1315 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1316 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1317 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1318 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1319 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1320 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1321 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1322 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s15
1323 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s14
1324 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s13
1325 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s12
1326 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s11
1327 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s10
1328 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s9
1329 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s8
1330 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s7
1331 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s6
1332 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s5
1333 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s4
1334 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s3
1335 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s2
1336 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s1
1337 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s0
1338 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
1339 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v2
1340 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2
1341 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2
1342 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2
1343 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2
1344 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2
1345 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v2
1346 ; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[12:13]
1347 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc
1348 ; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[12:13]
1349 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
1350 ; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[0:1]
1351 ; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[2:3]
1352 ; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[4:5]
1353 ; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[6:7]
1354 ; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[8:9]
1355 ; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[10:11]
1356 ; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1]
1357 ; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3]
1358 ; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5]
1359 ; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[6:7]
1360 ; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[8:9]
1361 ; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[10:11]
1362 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1363 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1364 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
1365 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1366 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
1367 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1368 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
1369 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1370 ; GPRIDX-NEXT: s_endpgm
1372 ; GFX10-LABEL: dyn_insertelement_v8f64_s_v_v:
1373 ; GFX10: ; %bb.0: ; %entry
1374 ; GFX10-NEXT: s_mov_b32 s1, s3
1375 ; GFX10-NEXT: s_mov_b32 s3, s5
1376 ; GFX10-NEXT: s_mov_b32 s5, s7
1377 ; GFX10-NEXT: s_mov_b32 s7, s9
1378 ; GFX10-NEXT: s_mov_b32 s9, s11
1379 ; GFX10-NEXT: s_mov_b32 s11, s13
1380 ; GFX10-NEXT: s_mov_b32 s13, s15
1381 ; GFX10-NEXT: s_mov_b32 s15, s17
1382 ; GFX10-NEXT: s_mov_b32 s0, s2
1383 ; GFX10-NEXT: s_mov_b32 s2, s4
1384 ; GFX10-NEXT: s_mov_b32 s4, s6
1385 ; GFX10-NEXT: s_mov_b32 s6, s8
1386 ; GFX10-NEXT: s_mov_b32 s8, s10
1387 ; GFX10-NEXT: s_mov_b32 s10, s12
1388 ; GFX10-NEXT: s_mov_b32 s12, s14
1389 ; GFX10-NEXT: s_mov_b32 s14, s16
1390 ; GFX10-NEXT: v_mov_b32_e32 v18, s15
1391 ; GFX10-NEXT: v_mov_b32_e32 v4, s1
1392 ; GFX10-NEXT: v_mov_b32_e32 v3, s0
1393 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
1394 ; GFX10-NEXT: v_mov_b32_e32 v17, s14
1395 ; GFX10-NEXT: v_mov_b32_e32 v16, s13
1396 ; GFX10-NEXT: v_mov_b32_e32 v15, s12
1397 ; GFX10-NEXT: v_mov_b32_e32 v14, s11
1398 ; GFX10-NEXT: v_mov_b32_e32 v13, s10
1399 ; GFX10-NEXT: v_mov_b32_e32 v12, s9
1400 ; GFX10-NEXT: v_mov_b32_e32 v11, s8
1401 ; GFX10-NEXT: v_mov_b32_e32 v10, s7
1402 ; GFX10-NEXT: v_mov_b32_e32 v9, s6
1403 ; GFX10-NEXT: v_mov_b32_e32 v8, s5
1404 ; GFX10-NEXT: v_mov_b32_e32 v7, s4
1405 ; GFX10-NEXT: v_mov_b32_e32 v6, s3
1406 ; GFX10-NEXT: v_mov_b32_e32 v5, s2
1407 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
1408 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo
1409 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo
1410 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
1411 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v2
1412 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
1413 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
1414 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
1415 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo
1416 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo
1417 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2
1418 ; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s1
1419 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0
1420 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0
1421 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v2
1422 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo
1423 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v1, vcc_lo
1424 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2
1425 ; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v1, s1
1426 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v0, s0
1427 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v1, s0
1428 ; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v0, vcc_lo
1429 ; GFX10-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc_lo
1430 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1431 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1432 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
1433 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1434 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
1435 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1436 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
1437 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1438 ; GFX10-NEXT: s_endpgm
1440 ; GFX11-LABEL: dyn_insertelement_v8f64_s_v_v:
1441 ; GFX11: ; %bb.0: ; %entry
1442 ; GFX11-NEXT: s_mov_b32 s1, s3
1443 ; GFX11-NEXT: s_mov_b32 s3, s5
1444 ; GFX11-NEXT: s_mov_b32 s5, s7
1445 ; GFX11-NEXT: s_mov_b32 s7, s9
1446 ; GFX11-NEXT: s_mov_b32 s9, s11
1447 ; GFX11-NEXT: s_mov_b32 s11, s13
1448 ; GFX11-NEXT: s_mov_b32 s13, s15
1449 ; GFX11-NEXT: s_mov_b32 s15, s17
1450 ; GFX11-NEXT: s_mov_b32 s0, s2
1451 ; GFX11-NEXT: s_mov_b32 s2, s4
1452 ; GFX11-NEXT: s_mov_b32 s4, s6
1453 ; GFX11-NEXT: s_mov_b32 s6, s8
1454 ; GFX11-NEXT: s_mov_b32 s8, s10
1455 ; GFX11-NEXT: s_mov_b32 s10, s12
1456 ; GFX11-NEXT: s_mov_b32 s12, s14
1457 ; GFX11-NEXT: s_mov_b32 s14, s16
1458 ; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14
1459 ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
1460 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
1461 ; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12
1462 ; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10
1463 ; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
1464 ; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
1465 ; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
1466 ; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
1467 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
1468 ; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
1469 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
1470 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v2
1471 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
1472 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
1473 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
1474 ; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1
1475 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2
1476 ; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s1
1477 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0
1478 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0
1479 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2
1480 ; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v12, v12, v1
1481 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2
1482 ; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v1, s1
1483 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v0, s0
1484 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v1, s0
1485 ; GFX11-NEXT: v_dual_cndmask_b32 v15, v15, v0 :: v_dual_cndmask_b32 v16, v16, v1
1486 ; GFX11-NEXT: global_store_b128 v[0:1], v[3:6], off dlc
1487 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1488 ; GFX11-NEXT: global_store_b128 v[0:1], v[7:10], off dlc
1489 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1490 ; GFX11-NEXT: global_store_b128 v[0:1], v[11:14], off dlc
1491 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1492 ; GFX11-NEXT: global_store_b128 v[0:1], v[15:18], off dlc
1493 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1494 ; GFX11-NEXT: s_endpgm
1496 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1497 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1498 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1499 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1500 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1501 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1502 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1503 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1504 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1508 define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) {
1509 ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v:
1510 ; GPRIDX: ; %bb.0: ; %entry
1511 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s2
1512 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s3
1513 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
1514 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v17, vcc
1515 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v18, vcc
1516 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
1517 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v17, vcc
1518 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v18, vcc
1519 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
1520 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v17, vcc
1521 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v18, vcc
1522 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
1523 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v17, vcc
1524 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v18, vcc
1525 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
1526 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v17, vcc
1527 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v18, vcc
1528 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
1529 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v17, vcc
1530 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v18, vcc
1531 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
1532 ; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc
1533 ; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v18, vcc
1534 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16
1535 ; GPRIDX-NEXT: v_cndmask_b32_e32 v14, v14, v17, vcc
1536 ; GPRIDX-NEXT: v_cndmask_b32_e32 v15, v15, v18, vcc
1537 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1538 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1539 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1540 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1541 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1542 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1543 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1544 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1545 ; GPRIDX-NEXT: s_endpgm
1547 ; GFX10-LABEL: dyn_insertelement_v8f64_v_s_v:
1548 ; GFX10: ; %bb.0: ; %entry
1549 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16
1550 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v16
1551 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v16
1552 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo
1553 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1554 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1555 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, s0
1556 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s3, s0
1557 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v16
1558 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, s2, s1
1559 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo
1560 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo
1561 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1562 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s2, s0
1563 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s3, s0
1564 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v16
1565 ; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, s3, s1
1566 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo
1567 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo
1568 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1569 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, s2, s0
1570 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, s3, s0
1571 ; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo
1572 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo
1573 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1574 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1575 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1576 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1577 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1578 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1579 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1580 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1581 ; GFX10-NEXT: s_endpgm
1583 ; GFX11-LABEL: dyn_insertelement_v8f64_v_s_v:
1584 ; GFX11: ; %bb.0: ; %entry
1585 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16
1586 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v16
1587 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v16
1588 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo
1589 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1590 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
1591 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s2, s0
1592 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s3, s0
1593 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v16
1594 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, s2, s1
1595 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo
1596 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo
1597 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
1598 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s2, s0
1599 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s3, s0
1600 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v16
1601 ; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, s3, s1
1602 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo
1603 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo
1604 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16
1605 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, s2, s0
1606 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, s3, s0
1607 ; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo
1608 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo
1609 ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
1610 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1611 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
1612 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1613 ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
1614 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1615 ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
1616 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1617 ; GFX11-NEXT: s_endpgm
1619 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1620 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1621 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1622 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1623 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1624 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1625 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1626 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1627 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1631 define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) {
1632 ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s:
1633 ; GPRIDX: ; %bb.0: ; %entry
1634 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
1635 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
1636 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
1637 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
1638 ; GPRIDX-NEXT: s_set_gpr_idx_off
1639 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1640 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1641 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1642 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1643 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1644 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1645 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1646 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1647 ; GPRIDX-NEXT: s_endpgm
1649 ; GFX10-LABEL: dyn_insertelement_v8f64_v_v_s:
1650 ; GFX10: ; %bb.0: ; %entry
1651 ; GFX10-NEXT: s_lshl_b32 m0, s2, 1
1652 ; GFX10-NEXT: v_movreld_b32_e32 v0, v16
1653 ; GFX10-NEXT: v_movreld_b32_e32 v1, v17
1654 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1655 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1656 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1657 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1658 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1659 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1660 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1661 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1662 ; GFX10-NEXT: s_endpgm
1664 ; GFX11-LABEL: dyn_insertelement_v8f64_v_v_s:
1665 ; GFX11: ; %bb.0: ; %entry
1666 ; GFX11-NEXT: s_lshl_b32 m0, s2, 1
1667 ; GFX11-NEXT: v_movreld_b32_e32 v0, v16
1668 ; GFX11-NEXT: v_movreld_b32_e32 v1, v17
1669 ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
1670 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1671 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
1672 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1673 ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
1674 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1675 ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
1676 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1677 ; GFX11-NEXT: s_endpgm
1679 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1680 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1681 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1682 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1683 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1684 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1685 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1686 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1687 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1691 define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) {
1692 ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v:
1693 ; GPRIDX: ; %bb.0: ; %entry
1694 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18
1695 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
1696 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
1697 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18
1698 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc
1699 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc
1700 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18
1701 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc
1702 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc
1703 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18
1704 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc
1705 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc
1706 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v18
1707 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc
1708 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc
1709 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v18
1710 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v16, vcc
1711 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc
1712 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v18
1713 ; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
1714 ; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
1715 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v18
1716 ; GPRIDX-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
1717 ; GPRIDX-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
1718 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1719 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1720 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1721 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1722 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1723 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1724 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1725 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
1726 ; GPRIDX-NEXT: s_endpgm
1728 ; GFX10-LABEL: dyn_insertelement_v8f64_v_v_v:
1729 ; GFX10: ; %bb.0: ; %entry
1730 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18
1731 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v18
1732 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v18
1733 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
1734 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
1735 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18
1736 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0
1737 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0
1738 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v18
1739 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1
1740 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc_lo
1741 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc_lo
1742 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18
1743 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0
1744 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0
1745 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v18
1746 ; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1
1747 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc_lo
1748 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo
1749 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18
1750 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
1751 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0
1752 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc_lo
1753 ; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc_lo
1754 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1755 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1756 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1757 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1758 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1759 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1760 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
1761 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1762 ; GFX10-NEXT: s_endpgm
1764 ; GFX11-LABEL: dyn_insertelement_v8f64_v_v_v:
1765 ; GFX11: ; %bb.0: ; %entry
1766 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18
1767 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v18
1768 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v18
1769 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v16 :: v_dual_cndmask_b32 v1, v1, v17
1770 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18
1771 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0
1772 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0
1773 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v18
1774 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1
1775 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v16 :: v_dual_cndmask_b32 v5, v5, v17
1776 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18
1777 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0
1778 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0
1779 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v18
1780 ; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1
1781 ; GFX11-NEXT: v_dual_cndmask_b32 v8, v8, v16 :: v_dual_cndmask_b32 v9, v9, v17
1782 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18
1783 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
1784 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0
1785 ; GFX11-NEXT: v_dual_cndmask_b32 v12, v12, v16 :: v_dual_cndmask_b32 v13, v13, v17
1786 ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
1787 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1788 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
1789 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1790 ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
1791 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1792 ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
1793 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1794 ; GFX11-NEXT: s_endpgm
1796 %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1797 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1798 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1799 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1800 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1801 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1802 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1803 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1804 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1808 define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1809 ; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s:
1810 ; GPRIDX: ; %bb.0: ; %entry
1811 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 0
1812 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s2
1813 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1
1814 ; GPRIDX-NEXT: s_cselect_b32 s1, s5, s3
1815 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2
1816 ; GPRIDX-NEXT: s_cselect_b32 s2, s5, s4
1817 ; GPRIDX-NEXT: ; return to shader part epilog
1819 ; GFX10PLUS-LABEL: dyn_insertelement_v3i32_s_s_s:
1820 ; GFX10PLUS: ; %bb.0: ; %entry
1821 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s6, 0
1822 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s2
1823 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s6, 1
1824 ; GFX10PLUS-NEXT: s_cselect_b32 s1, s5, s3
1825 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s6, 2
1826 ; GFX10PLUS-NEXT: s_cselect_b32 s2, s5, s4
1827 ; GFX10PLUS-NEXT: ; return to shader part epilog
1829 %insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx
1830 ret <3 x i32> %insert
1833 define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) {
1834 ; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s:
1835 ; GPRIDX: ; %bb.0: ; %entry
1836 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
1837 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1838 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
1839 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1840 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
1841 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1842 ; GPRIDX-NEXT: ; return to shader part epilog
1844 ; GFX10PLUS-LABEL: dyn_insertelement_v3i32_v_v_s:
1845 ; GFX10PLUS: ; %bb.0: ; %entry
1846 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
1847 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1848 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
1849 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1850 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
1851 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1852 ; GFX10PLUS-NEXT: ; return to shader part epilog
1854 %insert = insertelement <3 x float> %vec, float %val, i32 %idx
1855 ret <3 x float> %insert
1858 define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1859 ; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s:
1860 ; GPRIDX: ; %bb.0: ; %entry
1861 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 0
1862 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s2
1863 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1
1864 ; GPRIDX-NEXT: s_cselect_b32 s1, s7, s3
1865 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2
1866 ; GPRIDX-NEXT: s_cselect_b32 s2, s7, s4
1867 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3
1868 ; GPRIDX-NEXT: s_cselect_b32 s3, s7, s5
1869 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4
1870 ; GPRIDX-NEXT: s_cselect_b32 s4, s7, s6
1871 ; GPRIDX-NEXT: ; return to shader part epilog
1873 ; GFX10PLUS-LABEL: dyn_insertelement_v5i32_s_s_s:
1874 ; GFX10PLUS: ; %bb.0: ; %entry
1875 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 0
1876 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s2
1877 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1
1878 ; GFX10PLUS-NEXT: s_cselect_b32 s1, s7, s3
1879 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2
1880 ; GFX10PLUS-NEXT: s_cselect_b32 s2, s7, s4
1881 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3
1882 ; GFX10PLUS-NEXT: s_cselect_b32 s3, s7, s5
1883 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4
1884 ; GFX10PLUS-NEXT: s_cselect_b32 s4, s7, s6
1885 ; GFX10PLUS-NEXT: ; return to shader part epilog
1887 %insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx
1888 ret <5 x i32> %insert
1891 define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) {
1892 ; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s:
1893 ; GPRIDX: ; %bb.0: ; %entry
1894 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
1895 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1896 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
1897 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1898 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
1899 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
1900 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
1901 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1902 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
1903 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1904 ; GPRIDX-NEXT: ; return to shader part epilog
1906 ; GFX10PLUS-LABEL: dyn_insertelement_v5i32_v_v_s:
1907 ; GFX10PLUS: ; %bb.0: ; %entry
1908 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
1909 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1910 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
1911 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1912 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
1913 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
1914 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
1915 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
1916 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
1917 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1918 ; GFX10PLUS-NEXT: ; return to shader part epilog
1920 %insert = insertelement <5 x float> %vec, float %val, i32 %idx
1921 ret <5 x float> %insert
1924 define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1925 ; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s:
1926 ; GPRIDX: ; %bb.0: ; %entry
1927 ; GPRIDX-NEXT: s_mov_b32 s0, s2
1928 ; GPRIDX-NEXT: s_mov_b32 s1, s3
1929 ; GPRIDX-NEXT: s_mov_b32 s2, s4
1930 ; GPRIDX-NEXT: s_mov_b32 s3, s5
1931 ; GPRIDX-NEXT: s_mov_b32 s4, s6
1932 ; GPRIDX-NEXT: s_mov_b32 s5, s7
1933 ; GPRIDX-NEXT: s_mov_b32 s6, s8
1934 ; GPRIDX-NEXT: s_mov_b32 s7, s9
1935 ; GPRIDX-NEXT: s_mov_b32 s8, s10
1936 ; GPRIDX-NEXT: s_mov_b32 s9, s11
1937 ; GPRIDX-NEXT: s_mov_b32 s10, s12
1938 ; GPRIDX-NEXT: s_mov_b32 s11, s13
1939 ; GPRIDX-NEXT: s_mov_b32 s12, s14
1940 ; GPRIDX-NEXT: s_mov_b32 s13, s15
1941 ; GPRIDX-NEXT: s_mov_b32 s14, s16
1942 ; GPRIDX-NEXT: s_mov_b32 s15, s17
1943 ; GPRIDX-NEXT: s_mov_b32 s16, s18
1944 ; GPRIDX-NEXT: s_mov_b32 s17, s19
1945 ; GPRIDX-NEXT: s_mov_b32 s18, s20
1946 ; GPRIDX-NEXT: s_mov_b32 s19, s21
1947 ; GPRIDX-NEXT: s_mov_b32 s20, s22
1948 ; GPRIDX-NEXT: s_mov_b32 s21, s23
1949 ; GPRIDX-NEXT: s_mov_b32 s22, s24
1950 ; GPRIDX-NEXT: s_mov_b32 s23, s25
1951 ; GPRIDX-NEXT: s_mov_b32 s24, s26
1952 ; GPRIDX-NEXT: s_mov_b32 s25, s27
1953 ; GPRIDX-NEXT: s_mov_b32 s26, s28
1954 ; GPRIDX-NEXT: s_mov_b32 s27, s29
1955 ; GPRIDX-NEXT: s_mov_b32 s28, s30
1956 ; GPRIDX-NEXT: s_mov_b32 s29, s31
1957 ; GPRIDX-NEXT: s_mov_b32 s31, s33
1958 ; GPRIDX-NEXT: s_mov_b32 s30, s32
1959 ; GPRIDX-NEXT: s_mov_b32 m0, s35
1960 ; GPRIDX-NEXT: s_nop 0
1961 ; GPRIDX-NEXT: s_movreld_b32 s0, s34
1962 ; GPRIDX-NEXT: ; return to shader part epilog
1964 ; GFX10PLUS-LABEL: dyn_insertelement_v32i32_s_s_s:
1965 ; GFX10PLUS: ; %bb.0: ; %entry
1966 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
1967 ; GFX10PLUS-NEXT: s_mov_b32 m0, s35
1968 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
1969 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1970 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1971 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
1972 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
1973 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
1974 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
1975 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
1976 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
1977 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
1978 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
1979 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
1980 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
1981 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
1982 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
1983 ; GFX10PLUS-NEXT: s_mov_b32 s16, s18
1984 ; GFX10PLUS-NEXT: s_mov_b32 s17, s19
1985 ; GFX10PLUS-NEXT: s_mov_b32 s18, s20
1986 ; GFX10PLUS-NEXT: s_mov_b32 s19, s21
1987 ; GFX10PLUS-NEXT: s_mov_b32 s20, s22
1988 ; GFX10PLUS-NEXT: s_mov_b32 s21, s23
1989 ; GFX10PLUS-NEXT: s_mov_b32 s22, s24
1990 ; GFX10PLUS-NEXT: s_mov_b32 s23, s25
1991 ; GFX10PLUS-NEXT: s_mov_b32 s24, s26
1992 ; GFX10PLUS-NEXT: s_mov_b32 s25, s27
1993 ; GFX10PLUS-NEXT: s_mov_b32 s26, s28
1994 ; GFX10PLUS-NEXT: s_mov_b32 s27, s29
1995 ; GFX10PLUS-NEXT: s_mov_b32 s28, s30
1996 ; GFX10PLUS-NEXT: s_mov_b32 s29, s31
1997 ; GFX10PLUS-NEXT: s_mov_b32 s31, s33
1998 ; GFX10PLUS-NEXT: s_mov_b32 s30, s32
1999 ; GFX10PLUS-NEXT: s_movreld_b32 s0, s34
2000 ; GFX10PLUS-NEXT: ; return to shader part epilog
2002 %insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx
2003 ret <32 x i32> %insert
2006 define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) {
2007 ; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s:
2008 ; GPRIDX: ; %bb.0: ; %entry
2009 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
2010 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v32
2011 ; GPRIDX-NEXT: s_set_gpr_idx_off
2012 ; GPRIDX-NEXT: ; return to shader part epilog
2014 ; GFX10PLUS-LABEL: dyn_insertelement_v32i32_v_v_s:
2015 ; GFX10PLUS: ; %bb.0: ; %entry
2016 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2017 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v32
2018 ; GFX10PLUS-NEXT: ; return to shader part epilog
2020 %insert = insertelement <32 x float> %vec, float %val, i32 %idx
2021 ret <32 x float> %insert
2024 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
2025 ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
2026 ; GPRIDX: ; %bb.0: ; %entry
2027 ; GPRIDX-NEXT: s_add_i32 s11, s11, 1
2028 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0
2029 ; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2
2030 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1
2031 ; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3
2032 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2
2033 ; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4
2034 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3
2035 ; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5
2036 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4
2037 ; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6
2038 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5
2039 ; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7
2040 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6
2041 ; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8
2042 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7
2043 ; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9
2044 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
2045 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
2046 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2047 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2048 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
2049 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
2050 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
2051 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
2052 ; GPRIDX-NEXT: ; return to shader part epilog
2054 ; GFX10-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
2055 ; GFX10: ; %bb.0: ; %entry
2056 ; GFX10-NEXT: s_mov_b32 s1, s3
2057 ; GFX10-NEXT: s_mov_b32 m0, s11
2058 ; GFX10-NEXT: s_mov_b32 s0, s2
2059 ; GFX10-NEXT: s_mov_b32 s2, s4
2060 ; GFX10-NEXT: s_mov_b32 s3, s5
2061 ; GFX10-NEXT: s_mov_b32 s4, s6
2062 ; GFX10-NEXT: s_mov_b32 s5, s7
2063 ; GFX10-NEXT: s_mov_b32 s6, s8
2064 ; GFX10-NEXT: s_mov_b32 s7, s9
2065 ; GFX10-NEXT: s_movreld_b32 s1, s10
2066 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2067 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2068 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
2069 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
2070 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
2071 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
2072 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
2073 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
2074 ; GFX10-NEXT: ; return to shader part epilog
2076 ; GFX11-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
2077 ; GFX11: ; %bb.0: ; %entry
2078 ; GFX11-NEXT: s_mov_b32 s1, s3
2079 ; GFX11-NEXT: s_mov_b32 m0, s11
2080 ; GFX11-NEXT: s_mov_b32 s0, s2
2081 ; GFX11-NEXT: s_mov_b32 s2, s4
2082 ; GFX11-NEXT: s_mov_b32 s3, s5
2083 ; GFX11-NEXT: s_mov_b32 s4, s6
2084 ; GFX11-NEXT: s_mov_b32 s5, s7
2085 ; GFX11-NEXT: s_mov_b32 s6, s8
2086 ; GFX11-NEXT: s_mov_b32 s7, s9
2087 ; GFX11-NEXT: s_movreld_b32 s1, s10
2088 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2089 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
2090 ; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
2091 ; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
2092 ; GFX11-NEXT: ; return to shader part epilog
2094 %idx.add = add i32 %idx, 1
2095 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2096 ret <8 x float> %insert
2099 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
2100 ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
2101 ; GPRIDX: ; %bb.0: ; %entry
2102 ; GPRIDX-NEXT: s_add_i32 s11, s11, 7
2103 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0
2104 ; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2
2105 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1
2106 ; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3
2107 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2
2108 ; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4
2109 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3
2110 ; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5
2111 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4
2112 ; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6
2113 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5
2114 ; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7
2115 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6
2116 ; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8
2117 ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7
2118 ; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9
2119 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
2120 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
2121 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2122 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2123 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
2124 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
2125 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
2126 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
2127 ; GPRIDX-NEXT: ; return to shader part epilog
2129 ; GFX10-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
2130 ; GFX10: ; %bb.0: ; %entry
2131 ; GFX10-NEXT: s_mov_b32 s1, s3
2132 ; GFX10-NEXT: s_mov_b32 s3, s5
2133 ; GFX10-NEXT: s_mov_b32 s5, s7
2134 ; GFX10-NEXT: s_mov_b32 s7, s9
2135 ; GFX10-NEXT: s_mov_b32 m0, s11
2136 ; GFX10-NEXT: s_mov_b32 s0, s2
2137 ; GFX10-NEXT: s_mov_b32 s2, s4
2138 ; GFX10-NEXT: s_mov_b32 s4, s6
2139 ; GFX10-NEXT: s_mov_b32 s6, s8
2140 ; GFX10-NEXT: s_movreld_b32 s7, s10
2141 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2142 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2143 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
2144 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
2145 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
2146 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
2147 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
2148 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
2149 ; GFX10-NEXT: ; return to shader part epilog
2151 ; GFX11-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
2152 ; GFX11: ; %bb.0: ; %entry
2153 ; GFX11-NEXT: s_mov_b32 s1, s3
2154 ; GFX11-NEXT: s_mov_b32 s3, s5
2155 ; GFX11-NEXT: s_mov_b32 s5, s7
2156 ; GFX11-NEXT: s_mov_b32 s7, s9
2157 ; GFX11-NEXT: s_mov_b32 m0, s11
2158 ; GFX11-NEXT: s_mov_b32 s0, s2
2159 ; GFX11-NEXT: s_mov_b32 s2, s4
2160 ; GFX11-NEXT: s_mov_b32 s4, s6
2161 ; GFX11-NEXT: s_mov_b32 s6, s8
2162 ; GFX11-NEXT: s_movreld_b32 s7, s10
2163 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2164 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
2165 ; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
2166 ; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
2167 ; GFX11-NEXT: ; return to shader part epilog
2169 %idx.add = add i32 %idx, 7
2170 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2171 ret <8 x float> %insert
2174 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) {
2175 ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
2176 ; GPRIDX: ; %bb.0: ; %entry
2177 ; GPRIDX-NEXT: v_add_u32_e32 v9, 1, v9
2178 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
2179 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
2180 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9
2181 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
2182 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9
2183 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2184 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9
2185 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
2186 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9
2187 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
2188 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9
2189 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
2190 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9
2191 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
2192 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9
2193 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
2194 ; GPRIDX-NEXT: ; return to shader part epilog
2196 ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
2197 ; GFX10PLUS: ; %bb.0: ; %entry
2198 ; GFX10PLUS-NEXT: v_add_nc_u32_e32 v9, 1, v9
2199 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
2200 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2201 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
2202 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo
2203 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9
2204 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo
2205 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9
2206 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo
2207 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9
2208 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo
2209 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9
2210 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo
2211 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9
2212 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo
2213 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9
2214 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo
2215 ; GFX10PLUS-NEXT: ; return to shader part epilog
2217 %idx.add = add i32 %idx, 1
2218 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2219 ret <8 x float> %insert
2222 define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) {
2223 ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
2224 ; GPRIDX: ; %bb.0: ; %entry
2225 ; GPRIDX-NEXT: v_add_u32_e32 v9, 7, v9
2226 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
2227 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
2228 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9
2229 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
2230 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9
2231 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
2232 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9
2233 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
2234 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9
2235 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
2236 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9
2237 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
2238 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9
2239 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
2240 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9
2241 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
2242 ; GPRIDX-NEXT: ; return to shader part epilog
2244 ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
2245 ; GFX10PLUS: ; %bb.0: ; %entry
2246 ; GFX10PLUS-NEXT: v_add_nc_u32_e32 v9, 7, v9
2247 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
2248 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2249 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
2250 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo
2251 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9
2252 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo
2253 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9
2254 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo
2255 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9
2256 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo
2257 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9
2258 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo
2259 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9
2260 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo
2261 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9
2262 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo
2263 ; GFX10PLUS-NEXT: ; return to shader part epilog
2265 %idx.add = add i32 %idx, 7
2266 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2267 ret <8 x float> %insert
2270 define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
2271 ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2272 ; GPRIDX: ; %bb.0: ; %entry
2273 ; GPRIDX-NEXT: s_mov_b32 s0, s2
2274 ; GPRIDX-NEXT: s_mov_b32 s1, s3
2275 ; GPRIDX-NEXT: s_mov_b32 s2, s4
2276 ; GPRIDX-NEXT: s_mov_b32 s3, s5
2277 ; GPRIDX-NEXT: s_mov_b32 s4, s6
2278 ; GPRIDX-NEXT: s_mov_b32 s5, s7
2279 ; GPRIDX-NEXT: s_mov_b32 s6, s8
2280 ; GPRIDX-NEXT: s_mov_b32 s7, s9
2281 ; GPRIDX-NEXT: s_mov_b32 s8, s10
2282 ; GPRIDX-NEXT: s_mov_b32 s9, s11
2283 ; GPRIDX-NEXT: s_mov_b32 s10, s12
2284 ; GPRIDX-NEXT: s_mov_b32 s11, s13
2285 ; GPRIDX-NEXT: s_mov_b32 s12, s14
2286 ; GPRIDX-NEXT: s_mov_b32 s13, s15
2287 ; GPRIDX-NEXT: s_mov_b32 s14, s16
2288 ; GPRIDX-NEXT: s_mov_b32 s15, s17
2289 ; GPRIDX-NEXT: s_mov_b32 m0, s20
2290 ; GPRIDX-NEXT: s_nop 0
2291 ; GPRIDX-NEXT: s_movreld_b64 s[2:3], s[18:19]
2292 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
2293 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
2294 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2295 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2296 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2297 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2298 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s4
2299 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s5
2300 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s6
2301 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s7
2302 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2303 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2304 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s8
2305 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s9
2306 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s10
2307 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s11
2308 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2309 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2310 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s12
2311 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s13
2312 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s14
2313 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s15
2314 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2315 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2316 ; GPRIDX-NEXT: s_endpgm
2318 ; GFX10-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2319 ; GFX10: ; %bb.0: ; %entry
2320 ; GFX10-NEXT: s_mov_b32 s0, s2
2321 ; GFX10-NEXT: s_mov_b32 s1, s3
2322 ; GFX10-NEXT: s_mov_b32 s2, s4
2323 ; GFX10-NEXT: s_mov_b32 s3, s5
2324 ; GFX10-NEXT: s_mov_b32 m0, s20
2325 ; GFX10-NEXT: s_mov_b32 s4, s6
2326 ; GFX10-NEXT: s_mov_b32 s5, s7
2327 ; GFX10-NEXT: s_mov_b32 s6, s8
2328 ; GFX10-NEXT: s_mov_b32 s7, s9
2329 ; GFX10-NEXT: s_mov_b32 s8, s10
2330 ; GFX10-NEXT: s_mov_b32 s9, s11
2331 ; GFX10-NEXT: s_mov_b32 s10, s12
2332 ; GFX10-NEXT: s_mov_b32 s11, s13
2333 ; GFX10-NEXT: s_mov_b32 s12, s14
2334 ; GFX10-NEXT: s_mov_b32 s13, s15
2335 ; GFX10-NEXT: s_mov_b32 s14, s16
2336 ; GFX10-NEXT: s_mov_b32 s15, s17
2337 ; GFX10-NEXT: s_movreld_b64 s[2:3], s[18:19]
2338 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2339 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2340 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
2341 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
2342 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
2343 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
2344 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
2345 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
2346 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
2347 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
2348 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
2349 ; GFX10-NEXT: v_mov_b32_e32 v11, s11
2350 ; GFX10-NEXT: v_mov_b32_e32 v12, s12
2351 ; GFX10-NEXT: v_mov_b32_e32 v13, s13
2352 ; GFX10-NEXT: v_mov_b32_e32 v14, s14
2353 ; GFX10-NEXT: v_mov_b32_e32 v15, s15
2354 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2355 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2356 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
2357 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2358 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
2359 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2360 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
2361 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2362 ; GFX10-NEXT: s_endpgm
2364 ; GFX11-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2365 ; GFX11: ; %bb.0: ; %entry
2366 ; GFX11-NEXT: s_mov_b32 s0, s2
2367 ; GFX11-NEXT: s_mov_b32 s1, s3
2368 ; GFX11-NEXT: s_mov_b32 s2, s4
2369 ; GFX11-NEXT: s_mov_b32 s3, s5
2370 ; GFX11-NEXT: s_mov_b32 m0, s20
2371 ; GFX11-NEXT: s_mov_b32 s4, s6
2372 ; GFX11-NEXT: s_mov_b32 s5, s7
2373 ; GFX11-NEXT: s_mov_b32 s6, s8
2374 ; GFX11-NEXT: s_mov_b32 s7, s9
2375 ; GFX11-NEXT: s_mov_b32 s8, s10
2376 ; GFX11-NEXT: s_mov_b32 s9, s11
2377 ; GFX11-NEXT: s_mov_b32 s10, s12
2378 ; GFX11-NEXT: s_mov_b32 s11, s13
2379 ; GFX11-NEXT: s_mov_b32 s12, s14
2380 ; GFX11-NEXT: s_mov_b32 s13, s15
2381 ; GFX11-NEXT: s_mov_b32 s14, s16
2382 ; GFX11-NEXT: s_mov_b32 s15, s17
2383 ; GFX11-NEXT: s_movreld_b64 s[2:3], s[18:19]
2384 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2385 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
2386 ; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
2387 ; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
2388 ; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
2389 ; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
2390 ; GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
2391 ; GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
2392 ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
2393 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2394 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
2395 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2396 ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
2397 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2398 ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
2399 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2400 ; GFX11-NEXT: s_endpgm
2402 %idx.add = add i32 %idx, 1
2403 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
2404 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
2405 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
2406 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
2407 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
2408 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
2409 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
2410 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
2411 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
2415 define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) {
2416 ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2417 ; GPRIDX: ; %bb.0: ; %entry
2418 ; GPRIDX-NEXT: v_add_u32_e32 v18, 1, v18
2419 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18
2420 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
2421 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
2422 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18
2423 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc
2424 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc
2425 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18
2426 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc
2427 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc
2428 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18
2429 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc
2430 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc
2431 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v18
2432 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc
2433 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc
2434 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v18
2435 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v16, vcc
2436 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc
2437 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v18
2438 ; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
2439 ; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
2440 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v18
2441 ; GPRIDX-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
2442 ; GPRIDX-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
2443 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2444 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2445 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
2446 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2447 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
2448 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2449 ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
2450 ; GPRIDX-NEXT: s_waitcnt vmcnt(0)
2451 ; GPRIDX-NEXT: s_endpgm
2453 ; GFX10-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2454 ; GFX10: ; %bb.0: ; %entry
2455 ; GFX10-NEXT: v_add_nc_u32_e32 v18, 1, v18
2456 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18
2457 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v18
2458 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v18
2459 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
2460 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
2461 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18
2462 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0
2463 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0
2464 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v18
2465 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1
2466 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc_lo
2467 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc_lo
2468 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18
2469 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0
2470 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0
2471 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v18
2472 ; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1
2473 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc_lo
2474 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo
2475 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18
2476 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
2477 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0
2478 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc_lo
2479 ; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc_lo
2480 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
2481 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2482 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
2483 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2484 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
2485 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2486 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
2487 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2488 ; GFX10-NEXT: s_endpgm
2490 ; GFX11-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2491 ; GFX11: ; %bb.0: ; %entry
2492 ; GFX11-NEXT: v_add_nc_u32_e32 v18, 1, v18
2493 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18
2494 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
2495 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v18
2496 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
2497 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18
2498 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v18
2499 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0
2500 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0
2501 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc_lo
2502 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v18
2503 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc_lo
2504 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18
2505 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1
2506 ; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1
2507 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0
2508 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0
2509 ; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo
2510 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v18
2511 ; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc_lo
2512 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18
2513 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
2514 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0
2515 ; GFX11-NEXT: v_dual_cndmask_b32 v13, v13, v17 :: v_dual_cndmask_b32 v12, v12, v16
2516 ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc
2517 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2518 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc
2519 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2520 ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc
2521 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2522 ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
2523 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2524 ; GFX11-NEXT: s_endpgm
2526 %idx.add = add i32 %idx, 1
2527 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
2528 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
2529 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
2530 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
2531 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
2532 store volatile <2 x double> %vec.0, ptr addrspace(1) undef
2533 store volatile <2 x double> %vec.1, ptr addrspace(1) undef
2534 store volatile <2 x double> %vec.2, ptr addrspace(1) undef
2535 store volatile <2 x double> %vec.3, ptr addrspace(1) undef
2539 define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_s(<9 x float> inreg %vec, float %val, i32 inreg %idx) {
2540 ; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_s:
2541 ; GPRIDX: ; %bb.0: ; %entry
2542 ; GPRIDX-NEXT: s_mov_b32 s0, s2
2543 ; GPRIDX-NEXT: s_mov_b32 s1, s3
2544 ; GPRIDX-NEXT: s_mov_b32 s2, s4
2545 ; GPRIDX-NEXT: s_mov_b32 s3, s5
2546 ; GPRIDX-NEXT: s_mov_b32 s4, s6
2547 ; GPRIDX-NEXT: s_mov_b32 s5, s7
2548 ; GPRIDX-NEXT: s_mov_b32 s6, s8
2549 ; GPRIDX-NEXT: s_mov_b32 s7, s9
2550 ; GPRIDX-NEXT: s_mov_b32 s8, s10
2551 ; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
2552 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
2553 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
2554 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2555 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2556 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
2557 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
2558 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
2559 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
2560 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
2561 ; GPRIDX-NEXT: s_set_gpr_idx_on s11, gpr_idx(DST)
2562 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
2563 ; GPRIDX-NEXT: s_set_gpr_idx_off
2564 ; GPRIDX-NEXT: ; return to shader part epilog
2566 ; GFX10-LABEL: dyn_insertelement_v9f32_s_v_s:
2567 ; GFX10: ; %bb.0: ; %entry
2568 ; GFX10-NEXT: s_mov_b32 s0, s2
2569 ; GFX10-NEXT: s_mov_b32 s1, s3
2570 ; GFX10-NEXT: s_mov_b32 s2, s4
2571 ; GFX10-NEXT: s_mov_b32 s3, s5
2572 ; GFX10-NEXT: s_mov_b32 s4, s6
2573 ; GFX10-NEXT: s_mov_b32 s5, s7
2574 ; GFX10-NEXT: s_mov_b32 s6, s8
2575 ; GFX10-NEXT: s_mov_b32 s7, s9
2576 ; GFX10-NEXT: s_mov_b32 s8, s10
2577 ; GFX10-NEXT: v_mov_b32_e32 v9, v0
2578 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2579 ; GFX10-NEXT: s_mov_b32 m0, s11
2580 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2581 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
2582 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
2583 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
2584 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
2585 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
2586 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
2587 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
2588 ; GFX10-NEXT: v_movreld_b32_e32 v0, v9
2589 ; GFX10-NEXT: ; return to shader part epilog
2591 ; GFX11-LABEL: dyn_insertelement_v9f32_s_v_s:
2592 ; GFX11: ; %bb.0: ; %entry
2593 ; GFX11-NEXT: s_mov_b32 s0, s2
2594 ; GFX11-NEXT: s_mov_b32 s1, s3
2595 ; GFX11-NEXT: s_mov_b32 s2, s4
2596 ; GFX11-NEXT: s_mov_b32 s3, s5
2597 ; GFX11-NEXT: s_mov_b32 s4, s6
2598 ; GFX11-NEXT: s_mov_b32 s5, s7
2599 ; GFX11-NEXT: s_mov_b32 s6, s8
2600 ; GFX11-NEXT: s_mov_b32 s7, s9
2601 ; GFX11-NEXT: s_mov_b32 s8, s10
2602 ; GFX11-NEXT: v_dual_mov_b32 v9, v0 :: v_dual_mov_b32 v0, s0
2603 ; GFX11-NEXT: s_mov_b32 m0, s11
2604 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
2605 ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4
2606 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
2607 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8
2608 ; GFX11-NEXT: v_movreld_b32_e32 v0, v9
2609 ; GFX11-NEXT: ; return to shader part epilog
2611 %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2612 ret <9 x float> %insert
2615 define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_v(<9 x float> inreg %vec, float %val, i32 %idx) {
2616 ; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_v:
2617 ; GPRIDX: ; %bb.0: ; %entry
2618 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2619 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2620 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2621 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc
2622 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
2623 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
2624 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v3, v0, vcc
2625 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
2626 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
2627 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
2628 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
2629 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
2630 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc
2631 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
2632 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
2633 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc
2634 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
2635 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
2636 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
2637 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
2638 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
2639 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc
2640 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
2641 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
2642 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v11, v0, vcc
2643 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1
2644 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v12, v0, vcc
2645 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
2646 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v9
2647 ; GPRIDX-NEXT: ; return to shader part epilog
2649 ; GFX10-LABEL: dyn_insertelement_v9f32_s_v_v:
2650 ; GFX10: ; %bb.0: ; %entry
2651 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2652 ; GFX10-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2653 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
2654 ; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo
2655 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
2656 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2657 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
2658 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2659 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
2660 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2661 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
2662 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2663 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
2664 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2665 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
2666 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2667 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
2668 ; GFX10-NEXT: v_mov_b32_e32 v1, v9
2669 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
2670 ; GFX10-NEXT: v_mov_b32_e32 v0, v10
2671 ; GFX10-NEXT: ; return to shader part epilog
2673 ; GFX11-LABEL: dyn_insertelement_v9f32_s_v_v:
2674 ; GFX11: ; %bb.0: ; %entry
2675 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2676 ; GFX11-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2677 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
2678 ; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo
2679 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
2680 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2681 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
2682 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2683 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
2684 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2685 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
2686 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2687 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
2688 ; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2689 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
2690 ; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2691 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
2692 ; GFX11-NEXT: v_dual_mov_b32 v1, v9 :: v_dual_cndmask_b32 v8, s10, v0
2693 ; GFX11-NEXT: v_mov_b32_e32 v0, v10
2694 ; GFX11-NEXT: ; return to shader part epilog
2696 %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2697 ret <9 x float> %insert
2700 define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_s(<9 x float> %vec, float %val, i32 inreg %idx) {
2701 ; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_s:
2702 ; GPRIDX: ; %bb.0: ; %entry
2703 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
2704 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
2705 ; GPRIDX-NEXT: s_set_gpr_idx_off
2706 ; GPRIDX-NEXT: ; return to shader part epilog
2708 ; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_s:
2709 ; GFX10PLUS: ; %bb.0: ; %entry
2710 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2711 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v9
2712 ; GFX10PLUS-NEXT: ; return to shader part epilog
2714 %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2715 ret <9 x float> %insert
2718 define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_v(<9 x float> %vec, float %val, i32 %idx) {
2719 ; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_v:
2720 ; GPRIDX: ; %bb.0: ; %entry
2721 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
2722 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
2723 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v10
2724 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
2725 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v10
2726 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc
2727 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v10
2728 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
2729 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v10
2730 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
2731 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v10
2732 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
2733 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v10
2734 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc
2735 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v10
2736 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
2737 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v10
2738 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
2739 ; GPRIDX-NEXT: ; return to shader part epilog
2741 ; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_v:
2742 ; GFX10PLUS: ; %bb.0: ; %entry
2743 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v10
2744 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
2745 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
2746 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2747 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v10
2748 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo
2749 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v10
2750 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo
2751 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v10
2752 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc_lo
2753 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v10
2754 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc_lo
2755 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v10
2756 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo
2757 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v10
2758 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc_lo
2759 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v10
2760 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo
2761 ; GFX10PLUS-NEXT: ; return to shader part epilog
2763 %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2764 ret <9 x float> %insert
2767 define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_s(<10 x float> inreg %vec, float %val, i32 inreg %idx) {
2768 ; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_s:
2769 ; GPRIDX: ; %bb.0: ; %entry
2770 ; GPRIDX-NEXT: s_mov_b32 s0, s2
2771 ; GPRIDX-NEXT: s_mov_b32 s1, s3
2772 ; GPRIDX-NEXT: s_mov_b32 s2, s4
2773 ; GPRIDX-NEXT: s_mov_b32 s3, s5
2774 ; GPRIDX-NEXT: s_mov_b32 s4, s6
2775 ; GPRIDX-NEXT: s_mov_b32 s5, s7
2776 ; GPRIDX-NEXT: s_mov_b32 s6, s8
2777 ; GPRIDX-NEXT: s_mov_b32 s7, s9
2778 ; GPRIDX-NEXT: s_mov_b32 s8, s10
2779 ; GPRIDX-NEXT: s_mov_b32 s9, s11
2780 ; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
2781 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
2782 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
2783 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2784 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2785 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
2786 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
2787 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
2788 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
2789 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
2790 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
2791 ; GPRIDX-NEXT: s_set_gpr_idx_on s12, gpr_idx(DST)
2792 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
2793 ; GPRIDX-NEXT: s_set_gpr_idx_off
2794 ; GPRIDX-NEXT: ; return to shader part epilog
2796 ; GFX10-LABEL: dyn_insertelement_v10f32_s_v_s:
2797 ; GFX10: ; %bb.0: ; %entry
2798 ; GFX10-NEXT: s_mov_b32 s0, s2
2799 ; GFX10-NEXT: s_mov_b32 s1, s3
2800 ; GFX10-NEXT: s_mov_b32 s2, s4
2801 ; GFX10-NEXT: s_mov_b32 s3, s5
2802 ; GFX10-NEXT: s_mov_b32 s4, s6
2803 ; GFX10-NEXT: s_mov_b32 s5, s7
2804 ; GFX10-NEXT: s_mov_b32 s6, s8
2805 ; GFX10-NEXT: s_mov_b32 s7, s9
2806 ; GFX10-NEXT: s_mov_b32 s8, s10
2807 ; GFX10-NEXT: s_mov_b32 s9, s11
2808 ; GFX10-NEXT: v_mov_b32_e32 v10, v0
2809 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2810 ; GFX10-NEXT: s_mov_b32 m0, s12
2811 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2812 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
2813 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
2814 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
2815 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
2816 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
2817 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
2818 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
2819 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
2820 ; GFX10-NEXT: v_movreld_b32_e32 v0, v10
2821 ; GFX10-NEXT: ; return to shader part epilog
2823 ; GFX11-LABEL: dyn_insertelement_v10f32_s_v_s:
2824 ; GFX11: ; %bb.0: ; %entry
2825 ; GFX11-NEXT: s_mov_b32 s0, s2
2826 ; GFX11-NEXT: s_mov_b32 s1, s3
2827 ; GFX11-NEXT: s_mov_b32 s2, s4
2828 ; GFX11-NEXT: s_mov_b32 s3, s5
2829 ; GFX11-NEXT: s_mov_b32 s4, s6
2830 ; GFX11-NEXT: s_mov_b32 s5, s7
2831 ; GFX11-NEXT: s_mov_b32 s6, s8
2832 ; GFX11-NEXT: s_mov_b32 s7, s9
2833 ; GFX11-NEXT: s_mov_b32 s8, s10
2834 ; GFX11-NEXT: s_mov_b32 s9, s11
2835 ; GFX11-NEXT: v_mov_b32_e32 v10, v0
2836 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
2837 ; GFX11-NEXT: s_mov_b32 m0, s12
2838 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
2839 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
2840 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
2841 ; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
2842 ; GFX11-NEXT: v_movreld_b32_e32 v0, v10
2843 ; GFX11-NEXT: ; return to shader part epilog
2845 %insert = insertelement <10 x float> %vec, float %val, i32 %idx
2846 ret <10 x float> %insert
2849 define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_v(<10 x float> inreg %vec, float %val, i32 %idx) {
2850 ; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_v:
2851 ; GPRIDX: ; %bb.0: ; %entry
2852 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
2853 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2854 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
2855 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc
2856 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
2857 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
2858 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v3, v0, vcc
2859 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
2860 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
2861 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
2862 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
2863 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
2864 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc
2865 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
2866 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
2867 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc
2868 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
2869 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
2870 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
2871 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
2872 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
2873 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc
2874 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
2875 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
2876 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc
2877 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1
2878 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
2879 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v12, v0, vcc
2880 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1
2881 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v13, v0, vcc
2882 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
2883 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
2884 ; GPRIDX-NEXT: ; return to shader part epilog
2886 ; GFX10-LABEL: dyn_insertelement_v10f32_s_v_v:
2887 ; GFX10: ; %bb.0: ; %entry
2888 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2889 ; GFX10-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2890 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
2891 ; GFX10-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo
2892 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
2893 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2894 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
2895 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2896 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
2897 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2898 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
2899 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2900 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
2901 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2902 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
2903 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2904 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
2905 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
2906 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
2907 ; GFX10-NEXT: v_mov_b32_e32 v1, v11
2908 ; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo
2909 ; GFX10-NEXT: v_mov_b32_e32 v0, v10
2910 ; GFX10-NEXT: ; return to shader part epilog
2912 ; GFX11-LABEL: dyn_insertelement_v10f32_s_v_v:
2913 ; GFX11: ; %bb.0: ; %entry
2914 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2915 ; GFX11-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2916 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
2917 ; GFX11-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo
2918 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
2919 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2920 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
2921 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2922 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
2923 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2924 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
2925 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2926 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
2927 ; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2928 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
2929 ; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2930 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
2931 ; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
2932 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
2933 ; GFX11-NEXT: v_mov_b32_e32 v1, v11
2934 ; GFX11-NEXT: v_dual_cndmask_b32 v9, s11, v0 :: v_dual_mov_b32 v0, v10
2935 ; GFX11-NEXT: ; return to shader part epilog
2937 %insert = insertelement <10 x float> %vec, float %val, i32 %idx
2938 ret <10 x float> %insert
2941 define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_s(<10 x float> %vec, float %val, i32 inreg %idx) {
2942 ; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_s:
2943 ; GPRIDX: ; %bb.0: ; %entry
2944 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
2945 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
2946 ; GPRIDX-NEXT: s_set_gpr_idx_off
2947 ; GPRIDX-NEXT: ; return to shader part epilog
2949 ; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_s:
2950 ; GFX10PLUS: ; %bb.0: ; %entry
2951 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2952 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v10
2953 ; GFX10PLUS-NEXT: ; return to shader part epilog
2955 %insert = insertelement <10 x float> %vec, float %val, i32 %idx
2956 ret <10 x float> %insert
2959 define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_v(<10 x float> %vec, float %val, i32 %idx) {
2960 ; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_v:
2961 ; GPRIDX: ; %bb.0: ; %entry
2962 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11
2963 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
2964 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v11
2965 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
2966 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v11
2967 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
2968 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v11
2969 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
2970 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v11
2971 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
2972 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v11
2973 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
2974 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v11
2975 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
2976 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v11
2977 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc
2978 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v11
2979 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
2980 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v11
2981 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc
2982 ; GPRIDX-NEXT: ; return to shader part epilog
2984 ; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_v:
2985 ; GFX10PLUS: ; %bb.0: ; %entry
2986 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v11
2987 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2988 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
2989 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc_lo
2990 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v11
2991 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
2992 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v11
2993 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc_lo
2994 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v11
2995 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo
2996 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v11
2997 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc_lo
2998 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v11
2999 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo
3000 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v11
3001 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc_lo
3002 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v11
3003 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo
3004 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v11
3005 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc_lo
3006 ; GFX10PLUS-NEXT: ; return to shader part epilog
3008 %insert = insertelement <10 x float> %vec, float %val, i32 %idx
3009 ret <10 x float> %insert
3012 define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_s(<11 x float> inreg %vec, float %val, i32 inreg %idx) {
3013 ; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_s:
3014 ; GPRIDX: ; %bb.0: ; %entry
3015 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3016 ; GPRIDX-NEXT: s_mov_b32 s1, s3
3017 ; GPRIDX-NEXT: s_mov_b32 s2, s4
3018 ; GPRIDX-NEXT: s_mov_b32 s3, s5
3019 ; GPRIDX-NEXT: s_mov_b32 s4, s6
3020 ; GPRIDX-NEXT: s_mov_b32 s5, s7
3021 ; GPRIDX-NEXT: s_mov_b32 s6, s8
3022 ; GPRIDX-NEXT: s_mov_b32 s7, s9
3023 ; GPRIDX-NEXT: s_mov_b32 s8, s10
3024 ; GPRIDX-NEXT: s_mov_b32 s9, s11
3025 ; GPRIDX-NEXT: s_mov_b32 s10, s12
3026 ; GPRIDX-NEXT: v_mov_b32_e32 v11, v0
3027 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
3028 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
3029 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
3030 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
3031 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
3032 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
3033 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
3034 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
3035 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
3036 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
3037 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
3038 ; GPRIDX-NEXT: s_set_gpr_idx_on s13, gpr_idx(DST)
3039 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v11
3040 ; GPRIDX-NEXT: s_set_gpr_idx_off
3041 ; GPRIDX-NEXT: ; return to shader part epilog
3043 ; GFX10-LABEL: dyn_insertelement_v11f32_s_v_s:
3044 ; GFX10: ; %bb.0: ; %entry
3045 ; GFX10-NEXT: s_mov_b32 s0, s2
3046 ; GFX10-NEXT: s_mov_b32 s1, s3
3047 ; GFX10-NEXT: s_mov_b32 s2, s4
3048 ; GFX10-NEXT: s_mov_b32 s3, s5
3049 ; GFX10-NEXT: s_mov_b32 s4, s6
3050 ; GFX10-NEXT: s_mov_b32 s5, s7
3051 ; GFX10-NEXT: s_mov_b32 s6, s8
3052 ; GFX10-NEXT: s_mov_b32 s7, s9
3053 ; GFX10-NEXT: s_mov_b32 s8, s10
3054 ; GFX10-NEXT: s_mov_b32 s9, s11
3055 ; GFX10-NEXT: s_mov_b32 s10, s12
3056 ; GFX10-NEXT: v_mov_b32_e32 v11, v0
3057 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
3058 ; GFX10-NEXT: s_mov_b32 m0, s13
3059 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
3060 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
3061 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
3062 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
3063 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
3064 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
3065 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
3066 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
3067 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
3068 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
3069 ; GFX10-NEXT: v_movreld_b32_e32 v0, v11
3070 ; GFX10-NEXT: ; return to shader part epilog
3072 ; GFX11-LABEL: dyn_insertelement_v11f32_s_v_s:
3073 ; GFX11: ; %bb.0: ; %entry
3074 ; GFX11-NEXT: s_mov_b32 s0, s2
3075 ; GFX11-NEXT: s_mov_b32 s1, s3
3076 ; GFX11-NEXT: s_mov_b32 s2, s4
3077 ; GFX11-NEXT: s_mov_b32 s3, s5
3078 ; GFX11-NEXT: s_mov_b32 s4, s6
3079 ; GFX11-NEXT: s_mov_b32 s5, s7
3080 ; GFX11-NEXT: s_mov_b32 s6, s8
3081 ; GFX11-NEXT: s_mov_b32 s7, s9
3082 ; GFX11-NEXT: s_mov_b32 s8, s10
3083 ; GFX11-NEXT: s_mov_b32 s9, s11
3084 ; GFX11-NEXT: s_mov_b32 s10, s12
3085 ; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, s0
3086 ; GFX11-NEXT: s_mov_b32 m0, s13
3087 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
3088 ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4
3089 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
3090 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8
3091 ; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v10, s10
3092 ; GFX11-NEXT: v_movreld_b32_e32 v0, v11
3093 ; GFX11-NEXT: ; return to shader part epilog
3095 %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3096 ret <11 x float> %insert
3099 define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_v(<11 x float> inreg %vec, float %val, i32 %idx) {
3100 ; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_v:
3101 ; GPRIDX: ; %bb.0: ; %entry
3102 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
3103 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3104 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
3105 ; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v2, v0, vcc
3106 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
3107 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
3108 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v3, v0, vcc
3109 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
3110 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
3111 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
3112 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
3113 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
3114 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc
3115 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
3116 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
3117 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc
3118 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
3119 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
3120 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
3121 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
3122 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
3123 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc
3124 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
3125 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
3126 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc
3127 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1
3128 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
3129 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v10, v0, vcc
3130 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1
3131 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
3132 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v13, v0, vcc
3133 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v1
3134 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v14, v0, vcc
3135 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
3136 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
3137 ; GPRIDX-NEXT: ; return to shader part epilog
3139 ; GFX10-LABEL: dyn_insertelement_v11f32_s_v_v:
3140 ; GFX10: ; %bb.0: ; %entry
3141 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3142 ; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3143 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
3144 ; GFX10-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo
3145 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
3146 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3147 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
3148 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3149 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
3150 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3151 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
3152 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3153 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
3154 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3155 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
3156 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3157 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
3158 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3159 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
3160 ; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3161 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
3162 ; GFX10-NEXT: v_mov_b32_e32 v1, v11
3163 ; GFX10-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo
3164 ; GFX10-NEXT: v_mov_b32_e32 v0, v12
3165 ; GFX10-NEXT: ; return to shader part epilog
3167 ; GFX11-LABEL: dyn_insertelement_v11f32_s_v_v:
3168 ; GFX11: ; %bb.0: ; %entry
3169 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3170 ; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3171 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
3172 ; GFX11-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo
3173 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
3174 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3175 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
3176 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3177 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
3178 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3179 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
3180 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3181 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
3182 ; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3183 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
3184 ; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3185 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
3186 ; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3187 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
3188 ; GFX11-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3189 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
3190 ; GFX11-NEXT: v_dual_mov_b32 v1, v11 :: v_dual_cndmask_b32 v10, s12, v0
3191 ; GFX11-NEXT: v_mov_b32_e32 v0, v12
3192 ; GFX11-NEXT: ; return to shader part epilog
3194 %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3195 ret <11 x float> %insert
3198 define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_s(<11 x float> %vec, float %val, i32 inreg %idx) {
3199 ; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_s:
3200 ; GPRIDX: ; %bb.0: ; %entry
3201 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
3202 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v11
3203 ; GPRIDX-NEXT: s_set_gpr_idx_off
3204 ; GPRIDX-NEXT: ; return to shader part epilog
3206 ; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_s:
3207 ; GFX10PLUS: ; %bb.0: ; %entry
3208 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
3209 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v11
3210 ; GFX10PLUS-NEXT: ; return to shader part epilog
3212 %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3213 ret <11 x float> %insert
3216 define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_v(<11 x float> %vec, float %val, i32 %idx) {
3217 ; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_v:
3218 ; GPRIDX: ; %bb.0: ; %entry
3219 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
3220 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
3221 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12
3222 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
3223 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12
3224 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v11, vcc
3225 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12
3226 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
3227 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12
3228 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v11, vcc
3229 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12
3230 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
3231 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v12
3232 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc
3233 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v12
3234 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
3235 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v12
3236 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc
3237 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v12
3238 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
3239 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v12
3240 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
3241 ; GPRIDX-NEXT: ; return to shader part epilog
3243 ; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_v:
3244 ; GFX10PLUS: ; %bb.0: ; %entry
3245 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12
3246 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3247 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
3248 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
3249 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
3250 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v11, vcc_lo
3251 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12
3252 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
3253 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12
3254 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v11, vcc_lo
3255 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12
3256 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
3257 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v12
3258 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc_lo
3259 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v12
3260 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo
3261 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v12
3262 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc_lo
3263 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v12
3264 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo
3265 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v12
3266 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc_lo
3267 ; GFX10PLUS-NEXT: ; return to shader part epilog
3269 %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3270 ret <11 x float> %insert
3273 define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_s(<12 x float> inreg %vec, float %val, i32 inreg %idx) {
3274 ; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_s:
3275 ; GPRIDX: ; %bb.0: ; %entry
3276 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3277 ; GPRIDX-NEXT: s_mov_b32 s1, s3
3278 ; GPRIDX-NEXT: s_mov_b32 s2, s4
3279 ; GPRIDX-NEXT: s_mov_b32 s3, s5
3280 ; GPRIDX-NEXT: s_mov_b32 s4, s6
3281 ; GPRIDX-NEXT: s_mov_b32 s5, s7
3282 ; GPRIDX-NEXT: s_mov_b32 s6, s8
3283 ; GPRIDX-NEXT: s_mov_b32 s7, s9
3284 ; GPRIDX-NEXT: s_mov_b32 s8, s10
3285 ; GPRIDX-NEXT: s_mov_b32 s9, s11
3286 ; GPRIDX-NEXT: s_mov_b32 s10, s12
3287 ; GPRIDX-NEXT: s_mov_b32 s11, s13
3288 ; GPRIDX-NEXT: v_mov_b32_e32 v12, v0
3289 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
3290 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
3291 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
3292 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
3293 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
3294 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
3295 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
3296 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
3297 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
3298 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
3299 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
3300 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
3301 ; GPRIDX-NEXT: s_set_gpr_idx_on s14, gpr_idx(DST)
3302 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
3303 ; GPRIDX-NEXT: s_set_gpr_idx_off
3304 ; GPRIDX-NEXT: ; return to shader part epilog
3306 ; GFX10-LABEL: dyn_insertelement_v12f32_s_v_s:
3307 ; GFX10: ; %bb.0: ; %entry
3308 ; GFX10-NEXT: s_mov_b32 s0, s2
3309 ; GFX10-NEXT: s_mov_b32 s1, s3
3310 ; GFX10-NEXT: s_mov_b32 s2, s4
3311 ; GFX10-NEXT: s_mov_b32 s3, s5
3312 ; GFX10-NEXT: s_mov_b32 s4, s6
3313 ; GFX10-NEXT: s_mov_b32 s5, s7
3314 ; GFX10-NEXT: s_mov_b32 s6, s8
3315 ; GFX10-NEXT: s_mov_b32 s7, s9
3316 ; GFX10-NEXT: s_mov_b32 s8, s10
3317 ; GFX10-NEXT: s_mov_b32 s9, s11
3318 ; GFX10-NEXT: s_mov_b32 s10, s12
3319 ; GFX10-NEXT: s_mov_b32 s11, s13
3320 ; GFX10-NEXT: v_mov_b32_e32 v12, v0
3321 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
3322 ; GFX10-NEXT: s_mov_b32 m0, s14
3323 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
3324 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
3325 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
3326 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
3327 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
3328 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
3329 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
3330 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
3331 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
3332 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
3333 ; GFX10-NEXT: v_mov_b32_e32 v11, s11
3334 ; GFX10-NEXT: v_movreld_b32_e32 v0, v12
3335 ; GFX10-NEXT: ; return to shader part epilog
3337 ; GFX11-LABEL: dyn_insertelement_v12f32_s_v_s:
3338 ; GFX11: ; %bb.0: ; %entry
3339 ; GFX11-NEXT: s_mov_b32 s0, s2
3340 ; GFX11-NEXT: s_mov_b32 s1, s3
3341 ; GFX11-NEXT: s_mov_b32 s2, s4
3342 ; GFX11-NEXT: s_mov_b32 s3, s5
3343 ; GFX11-NEXT: s_mov_b32 s4, s6
3344 ; GFX11-NEXT: s_mov_b32 s5, s7
3345 ; GFX11-NEXT: s_mov_b32 s6, s8
3346 ; GFX11-NEXT: s_mov_b32 s7, s9
3347 ; GFX11-NEXT: s_mov_b32 s8, s10
3348 ; GFX11-NEXT: s_mov_b32 s9, s11
3349 ; GFX11-NEXT: s_mov_b32 s10, s12
3350 ; GFX11-NEXT: s_mov_b32 s11, s13
3351 ; GFX11-NEXT: v_mov_b32_e32 v12, v0
3352 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
3353 ; GFX11-NEXT: s_mov_b32 m0, s14
3354 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
3355 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
3356 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
3357 ; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
3358 ; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
3359 ; GFX11-NEXT: v_movreld_b32_e32 v0, v12
3360 ; GFX11-NEXT: ; return to shader part epilog
3362 %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3363 ret <12 x float> %insert
3366 define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_v(<12 x float> inreg %vec, float %val, i32 %idx) {
3367 ; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_v:
3368 ; GPRIDX: ; %bb.0: ; %entry
3369 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
3370 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3371 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
3372 ; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v2, v0, vcc
3373 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
3374 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
3375 ; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v3, v0, vcc
3376 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
3377 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
3378 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
3379 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
3380 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
3381 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc
3382 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
3383 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
3384 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc
3385 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
3386 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
3387 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
3388 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
3389 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
3390 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc
3391 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
3392 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
3393 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc
3394 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1
3395 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
3396 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v10, v0, vcc
3397 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1
3398 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
3399 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc
3400 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v1
3401 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
3402 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v14, v0, vcc
3403 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v1
3404 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v15, v0, vcc
3405 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
3406 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v13
3407 ; GPRIDX-NEXT: ; return to shader part epilog
3409 ; GFX10-LABEL: dyn_insertelement_v12f32_s_v_v:
3410 ; GFX10: ; %bb.0: ; %entry
3411 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3412 ; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3413 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
3414 ; GFX10-NEXT: v_cndmask_b32_e32 v13, s3, v0, vcc_lo
3415 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
3416 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3417 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
3418 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3419 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
3420 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3421 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
3422 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3423 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
3424 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3425 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
3426 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3427 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
3428 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3429 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
3430 ; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3431 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
3432 ; GFX10-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo
3433 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1
3434 ; GFX10-NEXT: v_mov_b32_e32 v1, v13
3435 ; GFX10-NEXT: v_cndmask_b32_e32 v11, s13, v0, vcc_lo
3436 ; GFX10-NEXT: v_mov_b32_e32 v0, v12
3437 ; GFX10-NEXT: ; return to shader part epilog
3439 ; GFX11-LABEL: dyn_insertelement_v12f32_s_v_v:
3440 ; GFX11: ; %bb.0: ; %entry
3441 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3442 ; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3443 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
3444 ; GFX11-NEXT: v_cndmask_b32_e32 v13, s3, v0, vcc_lo
3445 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
3446 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3447 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
3448 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3449 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
3450 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3451 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
3452 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3453 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
3454 ; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3455 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1
3456 ; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3457 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1
3458 ; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3459 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1
3460 ; GFX11-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3461 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1
3462 ; GFX11-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo
3463 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1
3464 ; GFX11-NEXT: v_mov_b32_e32 v1, v13
3465 ; GFX11-NEXT: v_dual_cndmask_b32 v11, s13, v0 :: v_dual_mov_b32 v0, v12
3466 ; GFX11-NEXT: ; return to shader part epilog
3468 %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3469 ret <12 x float> %insert
3472 define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_s(<12 x float> %vec, float %val, i32 inreg %idx) {
3473 ; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_s:
3474 ; GPRIDX: ; %bb.0: ; %entry
3475 ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
3476 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v12
3477 ; GPRIDX-NEXT: s_set_gpr_idx_off
3478 ; GPRIDX-NEXT: ; return to shader part epilog
3480 ; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_s:
3481 ; GFX10PLUS: ; %bb.0: ; %entry
3482 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2
3483 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v12
3484 ; GFX10PLUS-NEXT: ; return to shader part epilog
3486 %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3487 ret <12 x float> %insert
3490 define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_v(<12 x float> %vec, float %val, i32 %idx) {
3491 ; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_v:
3492 ; GPRIDX: ; %bb.0: ; %entry
3493 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
3494 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
3495 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v13
3496 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
3497 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v13
3498 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc
3499 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v13
3500 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v12, vcc
3501 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v13
3502 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
3503 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v13
3504 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc
3505 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v13
3506 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc
3507 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v13
3508 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc
3509 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v13
3510 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
3511 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v13
3512 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc
3513 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v13
3514 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
3515 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v13
3516 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc
3517 ; GPRIDX-NEXT: ; return to shader part epilog
3519 ; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_v:
3520 ; GFX10PLUS: ; %bb.0: ; %entry
3521 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v13
3522 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3523 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
3524 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc_lo
3525 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v13
3526 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc_lo
3527 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v13
3528 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v12, vcc_lo
3529 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v13
3530 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
3531 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v13
3532 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc_lo
3533 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v13
3534 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc_lo
3535 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v13
3536 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc_lo
3537 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v13
3538 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc_lo
3539 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v13
3540 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo
3541 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v13
3542 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc_lo
3543 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v13
3544 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc_lo
3545 ; GFX10PLUS-NEXT: ; return to shader part epilog
3547 %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3548 ret <12 x float> %insert
3551 define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
3552 ; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s:
3553 ; GPRIDX: ; %bb.0: ; %entry
3554 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3555 ; GPRIDX-NEXT: s_mov_b32 s1, s3
3556 ; GPRIDX-NEXT: s_mov_b32 s2, s4
3557 ; GPRIDX-NEXT: s_mov_b32 s3, s5
3558 ; GPRIDX-NEXT: s_mov_b32 s4, s6
3559 ; GPRIDX-NEXT: s_mov_b32 s5, s7
3560 ; GPRIDX-NEXT: s_mov_b32 s6, s8
3561 ; GPRIDX-NEXT: s_mov_b32 s7, s9
3562 ; GPRIDX-NEXT: s_mov_b32 s8, s10
3563 ; GPRIDX-NEXT: s_mov_b32 s9, s11
3564 ; GPRIDX-NEXT: s_mov_b32 s10, s12
3565 ; GPRIDX-NEXT: s_mov_b32 s11, s13
3566 ; GPRIDX-NEXT: s_mov_b32 s12, s14
3567 ; GPRIDX-NEXT: s_mov_b32 s13, s15
3568 ; GPRIDX-NEXT: s_mov_b32 s14, s16
3569 ; GPRIDX-NEXT: s_mov_b32 s15, s17
3570 ; GPRIDX-NEXT: s_mov_b32 m0, s19
3571 ; GPRIDX-NEXT: s_nop 0
3572 ; GPRIDX-NEXT: s_movreld_b32 s0, s18
3573 ; GPRIDX-NEXT: ; return to shader part epilog
3575 ; GFX10PLUS-LABEL: dyn_insertelement_v16i32_s_s_s:
3576 ; GFX10PLUS: ; %bb.0: ; %entry
3577 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3578 ; GFX10PLUS-NEXT: s_mov_b32 m0, s19
3579 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3580 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3581 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3582 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3583 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3584 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3585 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3586 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3587 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3588 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3589 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3590 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3591 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3592 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
3593 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
3594 ; GFX10PLUS-NEXT: s_movreld_b32 s0, s18
3595 ; GFX10PLUS-NEXT: ; return to shader part epilog
3597 %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx
3598 ret <16 x i32> %insert
3601 define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_s_s(<16 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
3602 ; GPRIDX-LABEL: dyn_insertelement_v16f32_s_s_s:
3603 ; GPRIDX: ; %bb.0: ; %entry
3604 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3605 ; GPRIDX-NEXT: s_mov_b32 s1, s3
3606 ; GPRIDX-NEXT: s_mov_b32 s2, s4
3607 ; GPRIDX-NEXT: s_mov_b32 s3, s5
3608 ; GPRIDX-NEXT: s_mov_b32 s4, s6
3609 ; GPRIDX-NEXT: s_mov_b32 s5, s7
3610 ; GPRIDX-NEXT: s_mov_b32 s6, s8
3611 ; GPRIDX-NEXT: s_mov_b32 s7, s9
3612 ; GPRIDX-NEXT: s_mov_b32 s8, s10
3613 ; GPRIDX-NEXT: s_mov_b32 s9, s11
3614 ; GPRIDX-NEXT: s_mov_b32 s10, s12
3615 ; GPRIDX-NEXT: s_mov_b32 s11, s13
3616 ; GPRIDX-NEXT: s_mov_b32 s12, s14
3617 ; GPRIDX-NEXT: s_mov_b32 s13, s15
3618 ; GPRIDX-NEXT: s_mov_b32 s14, s16
3619 ; GPRIDX-NEXT: s_mov_b32 s15, s17
3620 ; GPRIDX-NEXT: s_mov_b32 m0, s19
3621 ; GPRIDX-NEXT: s_nop 0
3622 ; GPRIDX-NEXT: s_movreld_b32 s0, s18
3623 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
3624 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
3625 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
3626 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
3627 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
3628 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
3629 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
3630 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
3631 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
3632 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
3633 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
3634 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
3635 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s12
3636 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s13
3637 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s14
3638 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s15
3639 ; GPRIDX-NEXT: ; return to shader part epilog
3641 ; GFX10-LABEL: dyn_insertelement_v16f32_s_s_s:
3642 ; GFX10: ; %bb.0: ; %entry
3643 ; GFX10-NEXT: s_mov_b32 s0, s2
3644 ; GFX10-NEXT: s_mov_b32 m0, s19
3645 ; GFX10-NEXT: s_mov_b32 s1, s3
3646 ; GFX10-NEXT: s_mov_b32 s2, s4
3647 ; GFX10-NEXT: s_mov_b32 s3, s5
3648 ; GFX10-NEXT: s_mov_b32 s4, s6
3649 ; GFX10-NEXT: s_mov_b32 s5, s7
3650 ; GFX10-NEXT: s_mov_b32 s6, s8
3651 ; GFX10-NEXT: s_mov_b32 s7, s9
3652 ; GFX10-NEXT: s_mov_b32 s8, s10
3653 ; GFX10-NEXT: s_mov_b32 s9, s11
3654 ; GFX10-NEXT: s_mov_b32 s10, s12
3655 ; GFX10-NEXT: s_mov_b32 s11, s13
3656 ; GFX10-NEXT: s_mov_b32 s12, s14
3657 ; GFX10-NEXT: s_mov_b32 s13, s15
3658 ; GFX10-NEXT: s_mov_b32 s14, s16
3659 ; GFX10-NEXT: s_mov_b32 s15, s17
3660 ; GFX10-NEXT: s_movreld_b32 s0, s18
3661 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
3662 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
3663 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
3664 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
3665 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
3666 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
3667 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
3668 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
3669 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
3670 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
3671 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
3672 ; GFX10-NEXT: v_mov_b32_e32 v11, s11
3673 ; GFX10-NEXT: v_mov_b32_e32 v12, s12
3674 ; GFX10-NEXT: v_mov_b32_e32 v13, s13
3675 ; GFX10-NEXT: v_mov_b32_e32 v14, s14
3676 ; GFX10-NEXT: v_mov_b32_e32 v15, s15
3677 ; GFX10-NEXT: ; return to shader part epilog
3679 ; GFX11-LABEL: dyn_insertelement_v16f32_s_s_s:
3680 ; GFX11: ; %bb.0: ; %entry
3681 ; GFX11-NEXT: s_mov_b32 s0, s2
3682 ; GFX11-NEXT: s_mov_b32 m0, s19
3683 ; GFX11-NEXT: s_mov_b32 s1, s3
3684 ; GFX11-NEXT: s_mov_b32 s2, s4
3685 ; GFX11-NEXT: s_mov_b32 s3, s5
3686 ; GFX11-NEXT: s_mov_b32 s4, s6
3687 ; GFX11-NEXT: s_mov_b32 s5, s7
3688 ; GFX11-NEXT: s_mov_b32 s6, s8
3689 ; GFX11-NEXT: s_mov_b32 s7, s9
3690 ; GFX11-NEXT: s_mov_b32 s8, s10
3691 ; GFX11-NEXT: s_mov_b32 s9, s11
3692 ; GFX11-NEXT: s_mov_b32 s10, s12
3693 ; GFX11-NEXT: s_mov_b32 s11, s13
3694 ; GFX11-NEXT: s_mov_b32 s12, s14
3695 ; GFX11-NEXT: s_mov_b32 s13, s15
3696 ; GFX11-NEXT: s_mov_b32 s14, s16
3697 ; GFX11-NEXT: s_mov_b32 s15, s17
3698 ; GFX11-NEXT: s_movreld_b32 s0, s18
3699 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3700 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
3701 ; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
3702 ; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
3703 ; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
3704 ; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
3705 ; GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
3706 ; GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
3707 ; GFX11-NEXT: ; return to shader part epilog
3709 %insert = insertelement <16 x float> %vec, float %val, i32 %idx
3710 ret <16 x float> %insert
3713 define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_s_s(<32 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
3714 ; GPRIDX-LABEL: dyn_insertelement_v32f32_s_s_s:
3715 ; GPRIDX: ; %bb.0: ; %entry
3716 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3717 ; GPRIDX-NEXT: s_mov_b32 s1, s3
3718 ; GPRIDX-NEXT: s_mov_b32 s2, s4
3719 ; GPRIDX-NEXT: s_mov_b32 s3, s5
3720 ; GPRIDX-NEXT: s_mov_b32 s4, s6
3721 ; GPRIDX-NEXT: s_mov_b32 s5, s7
3722 ; GPRIDX-NEXT: s_mov_b32 s6, s8
3723 ; GPRIDX-NEXT: s_mov_b32 s7, s9
3724 ; GPRIDX-NEXT: s_mov_b32 s8, s10
3725 ; GPRIDX-NEXT: s_mov_b32 s9, s11
3726 ; GPRIDX-NEXT: s_mov_b32 s10, s12
3727 ; GPRIDX-NEXT: s_mov_b32 s11, s13
3728 ; GPRIDX-NEXT: s_mov_b32 s12, s14
3729 ; GPRIDX-NEXT: s_mov_b32 s13, s15
3730 ; GPRIDX-NEXT: s_mov_b32 s14, s16
3731 ; GPRIDX-NEXT: s_mov_b32 s15, s17
3732 ; GPRIDX-NEXT: s_mov_b32 s16, s18
3733 ; GPRIDX-NEXT: s_mov_b32 s17, s19
3734 ; GPRIDX-NEXT: s_mov_b32 s18, s20
3735 ; GPRIDX-NEXT: s_mov_b32 s19, s21
3736 ; GPRIDX-NEXT: s_mov_b32 s20, s22
3737 ; GPRIDX-NEXT: s_mov_b32 s21, s23
3738 ; GPRIDX-NEXT: s_mov_b32 s22, s24
3739 ; GPRIDX-NEXT: s_mov_b32 s23, s25
3740 ; GPRIDX-NEXT: s_mov_b32 s24, s26
3741 ; GPRIDX-NEXT: s_mov_b32 s25, s27
3742 ; GPRIDX-NEXT: s_mov_b32 s26, s28
3743 ; GPRIDX-NEXT: s_mov_b32 s27, s29
3744 ; GPRIDX-NEXT: s_mov_b32 s28, s30
3745 ; GPRIDX-NEXT: s_mov_b32 s29, s31
3746 ; GPRIDX-NEXT: s_mov_b32 s31, s33
3747 ; GPRIDX-NEXT: s_mov_b32 s30, s32
3748 ; GPRIDX-NEXT: s_mov_b32 m0, s35
3749 ; GPRIDX-NEXT: s_nop 0
3750 ; GPRIDX-NEXT: s_movreld_b32 s0, s34
3751 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
3752 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
3753 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
3754 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
3755 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
3756 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
3757 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
3758 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
3759 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
3760 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
3761 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
3762 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
3763 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s12
3764 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s13
3765 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s14
3766 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s15
3767 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s16
3768 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s17
3769 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s18
3770 ; GPRIDX-NEXT: v_mov_b32_e32 v19, s19
3771 ; GPRIDX-NEXT: v_mov_b32_e32 v20, s20
3772 ; GPRIDX-NEXT: v_mov_b32_e32 v21, s21
3773 ; GPRIDX-NEXT: v_mov_b32_e32 v22, s22
3774 ; GPRIDX-NEXT: v_mov_b32_e32 v23, s23
3775 ; GPRIDX-NEXT: v_mov_b32_e32 v24, s24
3776 ; GPRIDX-NEXT: v_mov_b32_e32 v25, s25
3777 ; GPRIDX-NEXT: v_mov_b32_e32 v26, s26
3778 ; GPRIDX-NEXT: v_mov_b32_e32 v27, s27
3779 ; GPRIDX-NEXT: v_mov_b32_e32 v28, s28
3780 ; GPRIDX-NEXT: v_mov_b32_e32 v29, s29
3781 ; GPRIDX-NEXT: v_mov_b32_e32 v30, s30
3782 ; GPRIDX-NEXT: v_mov_b32_e32 v31, s31
3783 ; GPRIDX-NEXT: ; return to shader part epilog
3785 ; GFX10-LABEL: dyn_insertelement_v32f32_s_s_s:
3786 ; GFX10: ; %bb.0: ; %entry
3787 ; GFX10-NEXT: s_mov_b32 s0, s2
3788 ; GFX10-NEXT: s_mov_b32 m0, s35
3789 ; GFX10-NEXT: s_mov_b32 s1, s3
3790 ; GFX10-NEXT: s_mov_b32 s2, s4
3791 ; GFX10-NEXT: s_mov_b32 s3, s5
3792 ; GFX10-NEXT: s_mov_b32 s4, s6
3793 ; GFX10-NEXT: s_mov_b32 s5, s7
3794 ; GFX10-NEXT: s_mov_b32 s6, s8
3795 ; GFX10-NEXT: s_mov_b32 s7, s9
3796 ; GFX10-NEXT: s_mov_b32 s8, s10
3797 ; GFX10-NEXT: s_mov_b32 s9, s11
3798 ; GFX10-NEXT: s_mov_b32 s10, s12
3799 ; GFX10-NEXT: s_mov_b32 s11, s13
3800 ; GFX10-NEXT: s_mov_b32 s12, s14
3801 ; GFX10-NEXT: s_mov_b32 s13, s15
3802 ; GFX10-NEXT: s_mov_b32 s14, s16
3803 ; GFX10-NEXT: s_mov_b32 s15, s17
3804 ; GFX10-NEXT: s_mov_b32 s16, s18
3805 ; GFX10-NEXT: s_mov_b32 s17, s19
3806 ; GFX10-NEXT: s_mov_b32 s18, s20
3807 ; GFX10-NEXT: s_mov_b32 s19, s21
3808 ; GFX10-NEXT: s_mov_b32 s20, s22
3809 ; GFX10-NEXT: s_mov_b32 s21, s23
3810 ; GFX10-NEXT: s_mov_b32 s22, s24
3811 ; GFX10-NEXT: s_mov_b32 s23, s25
3812 ; GFX10-NEXT: s_mov_b32 s24, s26
3813 ; GFX10-NEXT: s_mov_b32 s25, s27
3814 ; GFX10-NEXT: s_mov_b32 s26, s28
3815 ; GFX10-NEXT: s_mov_b32 s27, s29
3816 ; GFX10-NEXT: s_mov_b32 s28, s30
3817 ; GFX10-NEXT: s_mov_b32 s29, s31
3818 ; GFX10-NEXT: s_mov_b32 s31, s33
3819 ; GFX10-NEXT: s_mov_b32 s30, s32
3820 ; GFX10-NEXT: s_movreld_b32 s0, s34
3821 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
3822 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
3823 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
3824 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
3825 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
3826 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
3827 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
3828 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
3829 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
3830 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
3831 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
3832 ; GFX10-NEXT: v_mov_b32_e32 v11, s11
3833 ; GFX10-NEXT: v_mov_b32_e32 v12, s12
3834 ; GFX10-NEXT: v_mov_b32_e32 v13, s13
3835 ; GFX10-NEXT: v_mov_b32_e32 v14, s14
3836 ; GFX10-NEXT: v_mov_b32_e32 v15, s15
3837 ; GFX10-NEXT: v_mov_b32_e32 v16, s16
3838 ; GFX10-NEXT: v_mov_b32_e32 v17, s17
3839 ; GFX10-NEXT: v_mov_b32_e32 v18, s18
3840 ; GFX10-NEXT: v_mov_b32_e32 v19, s19
3841 ; GFX10-NEXT: v_mov_b32_e32 v20, s20
3842 ; GFX10-NEXT: v_mov_b32_e32 v21, s21
3843 ; GFX10-NEXT: v_mov_b32_e32 v22, s22
3844 ; GFX10-NEXT: v_mov_b32_e32 v23, s23
3845 ; GFX10-NEXT: v_mov_b32_e32 v24, s24
3846 ; GFX10-NEXT: v_mov_b32_e32 v25, s25
3847 ; GFX10-NEXT: v_mov_b32_e32 v26, s26
3848 ; GFX10-NEXT: v_mov_b32_e32 v27, s27
3849 ; GFX10-NEXT: v_mov_b32_e32 v28, s28
3850 ; GFX10-NEXT: v_mov_b32_e32 v29, s29
3851 ; GFX10-NEXT: v_mov_b32_e32 v30, s30
3852 ; GFX10-NEXT: v_mov_b32_e32 v31, s31
3853 ; GFX10-NEXT: ; return to shader part epilog
3855 ; GFX11-LABEL: dyn_insertelement_v32f32_s_s_s:
3856 ; GFX11: ; %bb.0: ; %entry
3857 ; GFX11-NEXT: s_mov_b32 s0, s2
3858 ; GFX11-NEXT: s_mov_b32 m0, s35
3859 ; GFX11-NEXT: s_mov_b32 s1, s3
3860 ; GFX11-NEXT: s_mov_b32 s2, s4
3861 ; GFX11-NEXT: s_mov_b32 s3, s5
3862 ; GFX11-NEXT: s_mov_b32 s4, s6
3863 ; GFX11-NEXT: s_mov_b32 s5, s7
3864 ; GFX11-NEXT: s_mov_b32 s6, s8
3865 ; GFX11-NEXT: s_mov_b32 s7, s9
3866 ; GFX11-NEXT: s_mov_b32 s8, s10
3867 ; GFX11-NEXT: s_mov_b32 s9, s11
3868 ; GFX11-NEXT: s_mov_b32 s10, s12
3869 ; GFX11-NEXT: s_mov_b32 s11, s13
3870 ; GFX11-NEXT: s_mov_b32 s12, s14
3871 ; GFX11-NEXT: s_mov_b32 s13, s15
3872 ; GFX11-NEXT: s_mov_b32 s14, s16
3873 ; GFX11-NEXT: s_mov_b32 s15, s17
3874 ; GFX11-NEXT: s_mov_b32 s16, s18
3875 ; GFX11-NEXT: s_mov_b32 s17, s19
3876 ; GFX11-NEXT: s_mov_b32 s18, s20
3877 ; GFX11-NEXT: s_mov_b32 s19, s21
3878 ; GFX11-NEXT: s_mov_b32 s20, s22
3879 ; GFX11-NEXT: s_mov_b32 s21, s23
3880 ; GFX11-NEXT: s_mov_b32 s22, s24
3881 ; GFX11-NEXT: s_mov_b32 s23, s25
3882 ; GFX11-NEXT: s_mov_b32 s24, s26
3883 ; GFX11-NEXT: s_mov_b32 s25, s27
3884 ; GFX11-NEXT: s_mov_b32 s26, s28
3885 ; GFX11-NEXT: s_mov_b32 s27, s29
3886 ; GFX11-NEXT: s_mov_b32 s28, s30
3887 ; GFX11-NEXT: s_mov_b32 s29, s31
3888 ; GFX11-NEXT: s_mov_b32 s31, s33
3889 ; GFX11-NEXT: s_mov_b32 s30, s32
3890 ; GFX11-NEXT: s_movreld_b32 s0, s34
3891 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3892 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
3893 ; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
3894 ; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
3895 ; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
3896 ; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
3897 ; GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
3898 ; GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
3899 ; GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
3900 ; GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
3901 ; GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
3902 ; GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
3903 ; GFX11-NEXT: v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25
3904 ; GFX11-NEXT: v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27
3905 ; GFX11-NEXT: v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29
3906 ; GFX11-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
3907 ; GFX11-NEXT: ; return to shader part epilog
3909 %insert = insertelement <32 x float> %vec, float %val, i32 %idx
3910 ret <32 x float> %insert
3913 define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_s_s(<16 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
3914 ; GPRIDX-LABEL: dyn_insertelement_v16i64_s_s_s:
3915 ; GPRIDX: ; %bb.0: ; %entry
3916 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3917 ; GPRIDX-NEXT: s_mov_b32 s1, s3
3918 ; GPRIDX-NEXT: s_mov_b32 s2, s4
3919 ; GPRIDX-NEXT: s_mov_b32 s3, s5
3920 ; GPRIDX-NEXT: s_mov_b32 s4, s6
3921 ; GPRIDX-NEXT: s_mov_b32 s5, s7
3922 ; GPRIDX-NEXT: s_mov_b32 s6, s8
3923 ; GPRIDX-NEXT: s_mov_b32 s7, s9
3924 ; GPRIDX-NEXT: s_mov_b32 s8, s10
3925 ; GPRIDX-NEXT: s_mov_b32 s9, s11
3926 ; GPRIDX-NEXT: s_mov_b32 s10, s12
3927 ; GPRIDX-NEXT: s_mov_b32 s11, s13
3928 ; GPRIDX-NEXT: s_mov_b32 s12, s14
3929 ; GPRIDX-NEXT: s_mov_b32 s13, s15
3930 ; GPRIDX-NEXT: s_mov_b32 s14, s16
3931 ; GPRIDX-NEXT: s_mov_b32 s15, s17
3932 ; GPRIDX-NEXT: s_mov_b32 s16, s18
3933 ; GPRIDX-NEXT: s_mov_b32 s17, s19
3934 ; GPRIDX-NEXT: s_mov_b32 s18, s20
3935 ; GPRIDX-NEXT: s_mov_b32 s19, s21
3936 ; GPRIDX-NEXT: s_mov_b32 s20, s22
3937 ; GPRIDX-NEXT: s_mov_b32 s21, s23
3938 ; GPRIDX-NEXT: s_mov_b32 s22, s24
3939 ; GPRIDX-NEXT: s_mov_b32 s23, s25
3940 ; GPRIDX-NEXT: s_mov_b32 s24, s26
3941 ; GPRIDX-NEXT: s_mov_b32 s25, s27
3942 ; GPRIDX-NEXT: s_mov_b32 s26, s28
3943 ; GPRIDX-NEXT: s_mov_b32 s27, s29
3944 ; GPRIDX-NEXT: s_mov_b32 s28, s30
3945 ; GPRIDX-NEXT: s_mov_b32 s29, s31
3946 ; GPRIDX-NEXT: s_mov_b32 s31, s33
3947 ; GPRIDX-NEXT: s_mov_b32 s30, s32
3948 ; GPRIDX-NEXT: s_mov_b32 m0, s36
3949 ; GPRIDX-NEXT: s_nop 0
3950 ; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35]
3951 ; GPRIDX-NEXT: ; return to shader part epilog
3953 ; GFX10PLUS-LABEL: dyn_insertelement_v16i64_s_s_s:
3954 ; GFX10PLUS: ; %bb.0: ; %entry
3955 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
3956 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
3957 ; GFX10PLUS-NEXT: s_mov_b32 m0, s36
3958 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
3959 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
3960 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
3961 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
3962 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
3963 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
3964 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
3965 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
3966 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
3967 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
3968 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
3969 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
3970 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
3971 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
3972 ; GFX10PLUS-NEXT: s_mov_b32 s16, s18
3973 ; GFX10PLUS-NEXT: s_mov_b32 s17, s19
3974 ; GFX10PLUS-NEXT: s_mov_b32 s18, s20
3975 ; GFX10PLUS-NEXT: s_mov_b32 s19, s21
3976 ; GFX10PLUS-NEXT: s_mov_b32 s20, s22
3977 ; GFX10PLUS-NEXT: s_mov_b32 s21, s23
3978 ; GFX10PLUS-NEXT: s_mov_b32 s22, s24
3979 ; GFX10PLUS-NEXT: s_mov_b32 s23, s25
3980 ; GFX10PLUS-NEXT: s_mov_b32 s24, s26
3981 ; GFX10PLUS-NEXT: s_mov_b32 s25, s27
3982 ; GFX10PLUS-NEXT: s_mov_b32 s26, s28
3983 ; GFX10PLUS-NEXT: s_mov_b32 s27, s29
3984 ; GFX10PLUS-NEXT: s_mov_b32 s28, s30
3985 ; GFX10PLUS-NEXT: s_mov_b32 s29, s31
3986 ; GFX10PLUS-NEXT: s_mov_b32 s31, s33
3987 ; GFX10PLUS-NEXT: s_mov_b32 s30, s32
3988 ; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[34:35]
3989 ; GFX10PLUS-NEXT: ; return to shader part epilog
3991 %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx
3992 ret <16 x i64> %insert
3995 define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_s_s(<16 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
3996 ; GPRIDX-LABEL: dyn_insertelement_v16f64_s_s_s:
3997 ; GPRIDX: ; %bb.0: ; %entry
3998 ; GPRIDX-NEXT: s_mov_b32 s0, s2
3999 ; GPRIDX-NEXT: s_mov_b32 s1, s3
4000 ; GPRIDX-NEXT: s_mov_b32 s2, s4
4001 ; GPRIDX-NEXT: s_mov_b32 s3, s5
4002 ; GPRIDX-NEXT: s_mov_b32 s4, s6
4003 ; GPRIDX-NEXT: s_mov_b32 s5, s7
4004 ; GPRIDX-NEXT: s_mov_b32 s6, s8
4005 ; GPRIDX-NEXT: s_mov_b32 s7, s9
4006 ; GPRIDX-NEXT: s_mov_b32 s8, s10
4007 ; GPRIDX-NEXT: s_mov_b32 s9, s11
4008 ; GPRIDX-NEXT: s_mov_b32 s10, s12
4009 ; GPRIDX-NEXT: s_mov_b32 s11, s13
4010 ; GPRIDX-NEXT: s_mov_b32 s12, s14
4011 ; GPRIDX-NEXT: s_mov_b32 s13, s15
4012 ; GPRIDX-NEXT: s_mov_b32 s14, s16
4013 ; GPRIDX-NEXT: s_mov_b32 s15, s17
4014 ; GPRIDX-NEXT: s_mov_b32 s16, s18
4015 ; GPRIDX-NEXT: s_mov_b32 s17, s19
4016 ; GPRIDX-NEXT: s_mov_b32 s18, s20
4017 ; GPRIDX-NEXT: s_mov_b32 s19, s21
4018 ; GPRIDX-NEXT: s_mov_b32 s20, s22
4019 ; GPRIDX-NEXT: s_mov_b32 s21, s23
4020 ; GPRIDX-NEXT: s_mov_b32 s22, s24
4021 ; GPRIDX-NEXT: s_mov_b32 s23, s25
4022 ; GPRIDX-NEXT: s_mov_b32 s24, s26
4023 ; GPRIDX-NEXT: s_mov_b32 s25, s27
4024 ; GPRIDX-NEXT: s_mov_b32 s26, s28
4025 ; GPRIDX-NEXT: s_mov_b32 s27, s29
4026 ; GPRIDX-NEXT: s_mov_b32 s28, s30
4027 ; GPRIDX-NEXT: s_mov_b32 s29, s31
4028 ; GPRIDX-NEXT: s_mov_b32 s31, s33
4029 ; GPRIDX-NEXT: s_mov_b32 s30, s32
4030 ; GPRIDX-NEXT: s_mov_b32 m0, s36
4031 ; GPRIDX-NEXT: s_nop 0
4032 ; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35]
4033 ; GPRIDX-NEXT: ; return to shader part epilog
4035 ; GFX10PLUS-LABEL: dyn_insertelement_v16f64_s_s_s:
4036 ; GFX10PLUS: ; %bb.0: ; %entry
4037 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
4038 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
4039 ; GFX10PLUS-NEXT: s_mov_b32 m0, s36
4040 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
4041 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
4042 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
4043 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
4044 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
4045 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
4046 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
4047 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
4048 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
4049 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
4050 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
4051 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
4052 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16
4053 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17
4054 ; GFX10PLUS-NEXT: s_mov_b32 s16, s18
4055 ; GFX10PLUS-NEXT: s_mov_b32 s17, s19
4056 ; GFX10PLUS-NEXT: s_mov_b32 s18, s20
4057 ; GFX10PLUS-NEXT: s_mov_b32 s19, s21
4058 ; GFX10PLUS-NEXT: s_mov_b32 s20, s22
4059 ; GFX10PLUS-NEXT: s_mov_b32 s21, s23
4060 ; GFX10PLUS-NEXT: s_mov_b32 s22, s24
4061 ; GFX10PLUS-NEXT: s_mov_b32 s23, s25
4062 ; GFX10PLUS-NEXT: s_mov_b32 s24, s26
4063 ; GFX10PLUS-NEXT: s_mov_b32 s25, s27
4064 ; GFX10PLUS-NEXT: s_mov_b32 s26, s28
4065 ; GFX10PLUS-NEXT: s_mov_b32 s27, s29
4066 ; GFX10PLUS-NEXT: s_mov_b32 s28, s30
4067 ; GFX10PLUS-NEXT: s_mov_b32 s29, s31
4068 ; GFX10PLUS-NEXT: s_mov_b32 s31, s33
4069 ; GFX10PLUS-NEXT: s_mov_b32 s30, s32
4070 ; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[34:35]
4071 ; GFX10PLUS-NEXT: ; return to shader part epilog
4073 %insert = insertelement <16 x double> %vec, double %val, i32 %idx
4074 ret <16 x double> %insert
4077 define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_v_s(<16 x i32> inreg %vec, i32 %val, i32 inreg %idx) {
4078 ; GPRIDX-LABEL: dyn_insertelement_v16i32_s_v_s:
4079 ; GPRIDX: ; %bb.0: ; %entry
4080 ; GPRIDX-NEXT: s_mov_b32 s1, s3
4081 ; GPRIDX-NEXT: s_mov_b32 s3, s5
4082 ; GPRIDX-NEXT: s_mov_b32 s5, s7
4083 ; GPRIDX-NEXT: s_mov_b32 s7, s9
4084 ; GPRIDX-NEXT: s_mov_b32 s9, s11
4085 ; GPRIDX-NEXT: s_mov_b32 s11, s13
4086 ; GPRIDX-NEXT: s_mov_b32 s13, s15
4087 ; GPRIDX-NEXT: s_mov_b32 s15, s17
4088 ; GPRIDX-NEXT: s_mov_b32 s0, s2
4089 ; GPRIDX-NEXT: s_mov_b32 s2, s4
4090 ; GPRIDX-NEXT: s_mov_b32 s4, s6
4091 ; GPRIDX-NEXT: s_mov_b32 s6, s8
4092 ; GPRIDX-NEXT: s_mov_b32 s8, s10
4093 ; GPRIDX-NEXT: s_mov_b32 s10, s12
4094 ; GPRIDX-NEXT: s_mov_b32 s12, s14
4095 ; GPRIDX-NEXT: s_mov_b32 s14, s16
4096 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s15
4097 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s14
4098 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s13
4099 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s12
4100 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s11
4101 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s10
4102 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s9
4103 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s8
4104 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s7
4105 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s6
4106 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s5
4107 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s4
4108 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s3
4109 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s2
4110 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s1
4111 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s0
4112 ; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST)
4113 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v0
4114 ; GPRIDX-NEXT: s_set_gpr_idx_off
4115 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v1
4116 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v2
4117 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v3
4118 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v4
4119 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5
4120 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v6
4121 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7
4122 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v8
4123 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9
4124 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v10
4125 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11
4126 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v12
4127 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v13
4128 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v14
4129 ; GPRIDX-NEXT: v_readfirstlane_b32 s14, v15
4130 ; GPRIDX-NEXT: v_readfirstlane_b32 s15, v16
4131 ; GPRIDX-NEXT: ; return to shader part epilog
4133 ; GFX10-LABEL: dyn_insertelement_v16i32_s_v_s:
4134 ; GFX10: ; %bb.0: ; %entry
4135 ; GFX10-NEXT: s_mov_b32 s1, s3
4136 ; GFX10-NEXT: s_mov_b32 s3, s5
4137 ; GFX10-NEXT: s_mov_b32 s5, s7
4138 ; GFX10-NEXT: s_mov_b32 s7, s9
4139 ; GFX10-NEXT: s_mov_b32 s9, s11
4140 ; GFX10-NEXT: s_mov_b32 s11, s13
4141 ; GFX10-NEXT: s_mov_b32 s13, s15
4142 ; GFX10-NEXT: s_mov_b32 s15, s17
4143 ; GFX10-NEXT: s_mov_b32 s0, s2
4144 ; GFX10-NEXT: s_mov_b32 s2, s4
4145 ; GFX10-NEXT: s_mov_b32 s4, s6
4146 ; GFX10-NEXT: s_mov_b32 s6, s8
4147 ; GFX10-NEXT: s_mov_b32 s8, s10
4148 ; GFX10-NEXT: s_mov_b32 s10, s12
4149 ; GFX10-NEXT: s_mov_b32 s12, s14
4150 ; GFX10-NEXT: s_mov_b32 s14, s16
4151 ; GFX10-NEXT: v_mov_b32_e32 v16, s15
4152 ; GFX10-NEXT: v_mov_b32_e32 v1, s0
4153 ; GFX10-NEXT: s_mov_b32 m0, s18
4154 ; GFX10-NEXT: v_mov_b32_e32 v15, s14
4155 ; GFX10-NEXT: v_mov_b32_e32 v14, s13
4156 ; GFX10-NEXT: v_mov_b32_e32 v13, s12
4157 ; GFX10-NEXT: v_mov_b32_e32 v12, s11
4158 ; GFX10-NEXT: v_mov_b32_e32 v11, s10
4159 ; GFX10-NEXT: v_mov_b32_e32 v10, s9
4160 ; GFX10-NEXT: v_mov_b32_e32 v9, s8
4161 ; GFX10-NEXT: v_mov_b32_e32 v8, s7
4162 ; GFX10-NEXT: v_mov_b32_e32 v7, s6
4163 ; GFX10-NEXT: v_mov_b32_e32 v6, s5
4164 ; GFX10-NEXT: v_mov_b32_e32 v5, s4
4165 ; GFX10-NEXT: v_mov_b32_e32 v4, s3
4166 ; GFX10-NEXT: v_mov_b32_e32 v3, s2
4167 ; GFX10-NEXT: v_mov_b32_e32 v2, s1
4168 ; GFX10-NEXT: v_movreld_b32_e32 v1, v0
4169 ; GFX10-NEXT: v_readfirstlane_b32 s0, v1
4170 ; GFX10-NEXT: v_readfirstlane_b32 s1, v2
4171 ; GFX10-NEXT: v_readfirstlane_b32 s2, v3
4172 ; GFX10-NEXT: v_readfirstlane_b32 s3, v4
4173 ; GFX10-NEXT: v_readfirstlane_b32 s4, v5
4174 ; GFX10-NEXT: v_readfirstlane_b32 s5, v6
4175 ; GFX10-NEXT: v_readfirstlane_b32 s6, v7
4176 ; GFX10-NEXT: v_readfirstlane_b32 s7, v8
4177 ; GFX10-NEXT: v_readfirstlane_b32 s8, v9
4178 ; GFX10-NEXT: v_readfirstlane_b32 s9, v10
4179 ; GFX10-NEXT: v_readfirstlane_b32 s10, v11
4180 ; GFX10-NEXT: v_readfirstlane_b32 s11, v12
4181 ; GFX10-NEXT: v_readfirstlane_b32 s12, v13
4182 ; GFX10-NEXT: v_readfirstlane_b32 s13, v14
4183 ; GFX10-NEXT: v_readfirstlane_b32 s14, v15
4184 ; GFX10-NEXT: v_readfirstlane_b32 s15, v16
4185 ; GFX10-NEXT: ; return to shader part epilog
4187 ; GFX11-LABEL: dyn_insertelement_v16i32_s_v_s:
4188 ; GFX11: ; %bb.0: ; %entry
4189 ; GFX11-NEXT: s_mov_b32 s1, s3
4190 ; GFX11-NEXT: s_mov_b32 s3, s5
4191 ; GFX11-NEXT: s_mov_b32 s5, s7
4192 ; GFX11-NEXT: s_mov_b32 s7, s9
4193 ; GFX11-NEXT: s_mov_b32 s9, s11
4194 ; GFX11-NEXT: s_mov_b32 s11, s13
4195 ; GFX11-NEXT: s_mov_b32 s13, s15
4196 ; GFX11-NEXT: s_mov_b32 s15, s17
4197 ; GFX11-NEXT: s_mov_b32 s0, s2
4198 ; GFX11-NEXT: s_mov_b32 s2, s4
4199 ; GFX11-NEXT: s_mov_b32 s4, s6
4200 ; GFX11-NEXT: s_mov_b32 s6, s8
4201 ; GFX11-NEXT: s_mov_b32 s8, s10
4202 ; GFX11-NEXT: s_mov_b32 s10, s12
4203 ; GFX11-NEXT: s_mov_b32 s12, s14
4204 ; GFX11-NEXT: s_mov_b32 s14, s16
4205 ; GFX11-NEXT: v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14
4206 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
4207 ; GFX11-NEXT: s_mov_b32 m0, s18
4208 ; GFX11-NEXT: v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12
4209 ; GFX11-NEXT: v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10
4210 ; GFX11-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8
4211 ; GFX11-NEXT: v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6
4212 ; GFX11-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4
4213 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
4214 ; GFX11-NEXT: v_movreld_b32_e32 v1, v0
4215 ; GFX11-NEXT: v_readfirstlane_b32 s0, v1
4216 ; GFX11-NEXT: v_readfirstlane_b32 s1, v2
4217 ; GFX11-NEXT: v_readfirstlane_b32 s2, v3
4218 ; GFX11-NEXT: v_readfirstlane_b32 s3, v4
4219 ; GFX11-NEXT: v_readfirstlane_b32 s4, v5
4220 ; GFX11-NEXT: v_readfirstlane_b32 s5, v6
4221 ; GFX11-NEXT: v_readfirstlane_b32 s6, v7
4222 ; GFX11-NEXT: v_readfirstlane_b32 s7, v8
4223 ; GFX11-NEXT: v_readfirstlane_b32 s8, v9
4224 ; GFX11-NEXT: v_readfirstlane_b32 s9, v10
4225 ; GFX11-NEXT: v_readfirstlane_b32 s10, v11
4226 ; GFX11-NEXT: v_readfirstlane_b32 s11, v12
4227 ; GFX11-NEXT: v_readfirstlane_b32 s12, v13
4228 ; GFX11-NEXT: v_readfirstlane_b32 s13, v14
4229 ; GFX11-NEXT: v_readfirstlane_b32 s14, v15
4230 ; GFX11-NEXT: v_readfirstlane_b32 s15, v16
4231 ; GFX11-NEXT: ; return to shader part epilog
4233 %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx
4234 ret <16 x i32> %insert
4237 define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_v_s(<16 x float> inreg %vec, float %val, i32 inreg %idx) {
4238 ; GPRIDX-LABEL: dyn_insertelement_v16f32_s_v_s:
4239 ; GPRIDX: ; %bb.0: ; %entry
4240 ; GPRIDX-NEXT: s_mov_b32 s0, s2
4241 ; GPRIDX-NEXT: s_mov_b32 s1, s3
4242 ; GPRIDX-NEXT: s_mov_b32 s2, s4
4243 ; GPRIDX-NEXT: s_mov_b32 s3, s5
4244 ; GPRIDX-NEXT: s_mov_b32 s4, s6
4245 ; GPRIDX-NEXT: s_mov_b32 s5, s7
4246 ; GPRIDX-NEXT: s_mov_b32 s6, s8
4247 ; GPRIDX-NEXT: s_mov_b32 s7, s9
4248 ; GPRIDX-NEXT: s_mov_b32 s8, s10
4249 ; GPRIDX-NEXT: s_mov_b32 s9, s11
4250 ; GPRIDX-NEXT: s_mov_b32 s10, s12
4251 ; GPRIDX-NEXT: s_mov_b32 s11, s13
4252 ; GPRIDX-NEXT: s_mov_b32 s12, s14
4253 ; GPRIDX-NEXT: s_mov_b32 s13, s15
4254 ; GPRIDX-NEXT: s_mov_b32 s14, s16
4255 ; GPRIDX-NEXT: s_mov_b32 s15, s17
4256 ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
4257 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
4258 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
4259 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
4260 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
4261 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
4262 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
4263 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
4264 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
4265 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
4266 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
4267 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
4268 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
4269 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s12
4270 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s13
4271 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s14
4272 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s15
4273 ; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST)
4274 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
4275 ; GPRIDX-NEXT: s_set_gpr_idx_off
4276 ; GPRIDX-NEXT: ; return to shader part epilog
4278 ; GFX10-LABEL: dyn_insertelement_v16f32_s_v_s:
4279 ; GFX10: ; %bb.0: ; %entry
4280 ; GFX10-NEXT: s_mov_b32 s0, s2
4281 ; GFX10-NEXT: s_mov_b32 s1, s3
4282 ; GFX10-NEXT: s_mov_b32 s2, s4
4283 ; GFX10-NEXT: s_mov_b32 s3, s5
4284 ; GFX10-NEXT: s_mov_b32 s4, s6
4285 ; GFX10-NEXT: s_mov_b32 s5, s7
4286 ; GFX10-NEXT: s_mov_b32 s6, s8
4287 ; GFX10-NEXT: s_mov_b32 s7, s9
4288 ; GFX10-NEXT: s_mov_b32 s8, s10
4289 ; GFX10-NEXT: s_mov_b32 s9, s11
4290 ; GFX10-NEXT: s_mov_b32 s10, s12
4291 ; GFX10-NEXT: s_mov_b32 s11, s13
4292 ; GFX10-NEXT: s_mov_b32 s12, s14
4293 ; GFX10-NEXT: s_mov_b32 s13, s15
4294 ; GFX10-NEXT: s_mov_b32 s14, s16
4295 ; GFX10-NEXT: s_mov_b32 s15, s17
4296 ; GFX10-NEXT: v_mov_b32_e32 v16, v0
4297 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
4298 ; GFX10-NEXT: s_mov_b32 m0, s18
4299 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
4300 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
4301 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
4302 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
4303 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
4304 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
4305 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
4306 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
4307 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
4308 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
4309 ; GFX10-NEXT: v_mov_b32_e32 v11, s11
4310 ; GFX10-NEXT: v_mov_b32_e32 v12, s12
4311 ; GFX10-NEXT: v_mov_b32_e32 v13, s13
4312 ; GFX10-NEXT: v_mov_b32_e32 v14, s14
4313 ; GFX10-NEXT: v_mov_b32_e32 v15, s15
4314 ; GFX10-NEXT: v_movreld_b32_e32 v0, v16
4315 ; GFX10-NEXT: ; return to shader part epilog
4317 ; GFX11-LABEL: dyn_insertelement_v16f32_s_v_s:
4318 ; GFX11: ; %bb.0: ; %entry
4319 ; GFX11-NEXT: s_mov_b32 s0, s2
4320 ; GFX11-NEXT: s_mov_b32 s1, s3
4321 ; GFX11-NEXT: s_mov_b32 s2, s4
4322 ; GFX11-NEXT: s_mov_b32 s3, s5
4323 ; GFX11-NEXT: s_mov_b32 s4, s6
4324 ; GFX11-NEXT: s_mov_b32 s5, s7
4325 ; GFX11-NEXT: s_mov_b32 s6, s8
4326 ; GFX11-NEXT: s_mov_b32 s7, s9
4327 ; GFX11-NEXT: s_mov_b32 s8, s10
4328 ; GFX11-NEXT: s_mov_b32 s9, s11
4329 ; GFX11-NEXT: s_mov_b32 s10, s12
4330 ; GFX11-NEXT: s_mov_b32 s11, s13
4331 ; GFX11-NEXT: s_mov_b32 s12, s14
4332 ; GFX11-NEXT: s_mov_b32 s13, s15
4333 ; GFX11-NEXT: s_mov_b32 s14, s16
4334 ; GFX11-NEXT: s_mov_b32 s15, s17
4335 ; GFX11-NEXT: v_mov_b32_e32 v16, v0
4336 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
4337 ; GFX11-NEXT: s_mov_b32 m0, s18
4338 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
4339 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
4340 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
4341 ; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
4342 ; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
4343 ; GFX11-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s12
4344 ; GFX11-NEXT: v_dual_mov_b32 v15, s15 :: v_dual_mov_b32 v14, s14
4345 ; GFX11-NEXT: v_movreld_b32_e32 v0, v16
4346 ; GFX11-NEXT: ; return to shader part epilog
4348 %insert = insertelement <16 x float> %vec, float %val, i32 %idx
4349 ret <16 x float> %insert
4352 define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_v_s(<32 x float> inreg %vec, float %val, i32 inreg %idx) {
4353 ; GPRIDX-LABEL: dyn_insertelement_v32f32_s_v_s:
4354 ; GPRIDX: ; %bb.0: ; %entry
4355 ; GPRIDX-NEXT: s_mov_b32 s0, s2
4356 ; GPRIDX-NEXT: s_mov_b32 s1, s3
4357 ; GPRIDX-NEXT: s_mov_b32 s2, s4
4358 ; GPRIDX-NEXT: s_mov_b32 s3, s5
4359 ; GPRIDX-NEXT: s_mov_b32 s4, s6
4360 ; GPRIDX-NEXT: s_mov_b32 s5, s7
4361 ; GPRIDX-NEXT: s_mov_b32 s6, s8
4362 ; GPRIDX-NEXT: s_mov_b32 s7, s9
4363 ; GPRIDX-NEXT: s_mov_b32 s8, s10
4364 ; GPRIDX-NEXT: s_mov_b32 s9, s11
4365 ; GPRIDX-NEXT: s_mov_b32 s10, s12
4366 ; GPRIDX-NEXT: s_mov_b32 s11, s13
4367 ; GPRIDX-NEXT: s_mov_b32 s12, s14
4368 ; GPRIDX-NEXT: s_mov_b32 s13, s15
4369 ; GPRIDX-NEXT: s_mov_b32 s14, s16
4370 ; GPRIDX-NEXT: s_mov_b32 s15, s17
4371 ; GPRIDX-NEXT: s_mov_b32 s16, s18
4372 ; GPRIDX-NEXT: s_mov_b32 s17, s19
4373 ; GPRIDX-NEXT: s_mov_b32 s18, s20
4374 ; GPRIDX-NEXT: s_mov_b32 s19, s21
4375 ; GPRIDX-NEXT: s_mov_b32 s20, s22
4376 ; GPRIDX-NEXT: s_mov_b32 s21, s23
4377 ; GPRIDX-NEXT: s_mov_b32 s22, s24
4378 ; GPRIDX-NEXT: s_mov_b32 s23, s25
4379 ; GPRIDX-NEXT: s_mov_b32 s24, s26
4380 ; GPRIDX-NEXT: s_mov_b32 s25, s27
4381 ; GPRIDX-NEXT: s_mov_b32 s26, s28
4382 ; GPRIDX-NEXT: s_mov_b32 s27, s29
4383 ; GPRIDX-NEXT: s_mov_b32 s28, s30
4384 ; GPRIDX-NEXT: s_mov_b32 s29, s31
4385 ; GPRIDX-NEXT: s_mov_b32 s31, s33
4386 ; GPRIDX-NEXT: s_mov_b32 s30, s32
4387 ; GPRIDX-NEXT: v_mov_b32_e32 v32, v0
4388 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
4389 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
4390 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
4391 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
4392 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
4393 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
4394 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
4395 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
4396 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
4397 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s9
4398 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s10
4399 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s11
4400 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s12
4401 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s13
4402 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s14
4403 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s15
4404 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s16
4405 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s17
4406 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s18
4407 ; GPRIDX-NEXT: v_mov_b32_e32 v19, s19
4408 ; GPRIDX-NEXT: v_mov_b32_e32 v20, s20
4409 ; GPRIDX-NEXT: v_mov_b32_e32 v21, s21
4410 ; GPRIDX-NEXT: v_mov_b32_e32 v22, s22
4411 ; GPRIDX-NEXT: v_mov_b32_e32 v23, s23
4412 ; GPRIDX-NEXT: v_mov_b32_e32 v24, s24
4413 ; GPRIDX-NEXT: v_mov_b32_e32 v25, s25
4414 ; GPRIDX-NEXT: v_mov_b32_e32 v26, s26
4415 ; GPRIDX-NEXT: v_mov_b32_e32 v27, s27
4416 ; GPRIDX-NEXT: v_mov_b32_e32 v28, s28
4417 ; GPRIDX-NEXT: v_mov_b32_e32 v29, s29
4418 ; GPRIDX-NEXT: v_mov_b32_e32 v30, s30
4419 ; GPRIDX-NEXT: v_mov_b32_e32 v31, s31
4420 ; GPRIDX-NEXT: s_set_gpr_idx_on s34, gpr_idx(DST)
4421 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v32
4422 ; GPRIDX-NEXT: s_set_gpr_idx_off
4423 ; GPRIDX-NEXT: ; return to shader part epilog
4425 ; GFX10-LABEL: dyn_insertelement_v32f32_s_v_s:
4426 ; GFX10: ; %bb.0: ; %entry
4427 ; GFX10-NEXT: s_mov_b32 s0, s2
4428 ; GFX10-NEXT: s_mov_b32 s1, s3
4429 ; GFX10-NEXT: s_mov_b32 s2, s4
4430 ; GFX10-NEXT: s_mov_b32 s3, s5
4431 ; GFX10-NEXT: s_mov_b32 s4, s6
4432 ; GFX10-NEXT: s_mov_b32 s5, s7
4433 ; GFX10-NEXT: s_mov_b32 s6, s8
4434 ; GFX10-NEXT: s_mov_b32 s7, s9
4435 ; GFX10-NEXT: s_mov_b32 s8, s10
4436 ; GFX10-NEXT: s_mov_b32 s9, s11
4437 ; GFX10-NEXT: s_mov_b32 s10, s12
4438 ; GFX10-NEXT: s_mov_b32 s11, s13
4439 ; GFX10-NEXT: s_mov_b32 s12, s14
4440 ; GFX10-NEXT: s_mov_b32 s13, s15
4441 ; GFX10-NEXT: s_mov_b32 s14, s16
4442 ; GFX10-NEXT: s_mov_b32 s15, s17
4443 ; GFX10-NEXT: s_mov_b32 s16, s18
4444 ; GFX10-NEXT: s_mov_b32 s17, s19
4445 ; GFX10-NEXT: s_mov_b32 s18, s20
4446 ; GFX10-NEXT: s_mov_b32 s19, s21
4447 ; GFX10-NEXT: s_mov_b32 s20, s22
4448 ; GFX10-NEXT: s_mov_b32 s21, s23
4449 ; GFX10-NEXT: s_mov_b32 s22, s24
4450 ; GFX10-NEXT: s_mov_b32 s23, s25
4451 ; GFX10-NEXT: s_mov_b32 s24, s26
4452 ; GFX10-NEXT: s_mov_b32 s25, s27
4453 ; GFX10-NEXT: s_mov_b32 s26, s28
4454 ; GFX10-NEXT: s_mov_b32 s27, s29
4455 ; GFX10-NEXT: s_mov_b32 s28, s30
4456 ; GFX10-NEXT: s_mov_b32 s29, s31
4457 ; GFX10-NEXT: s_mov_b32 s31, s33
4458 ; GFX10-NEXT: s_mov_b32 s30, s32
4459 ; GFX10-NEXT: v_mov_b32_e32 v32, v0
4460 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
4461 ; GFX10-NEXT: s_mov_b32 m0, s34
4462 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
4463 ; GFX10-NEXT: v_mov_b32_e32 v2, s2
4464 ; GFX10-NEXT: v_mov_b32_e32 v3, s3
4465 ; GFX10-NEXT: v_mov_b32_e32 v4, s4
4466 ; GFX10-NEXT: v_mov_b32_e32 v5, s5
4467 ; GFX10-NEXT: v_mov_b32_e32 v6, s6
4468 ; GFX10-NEXT: v_mov_b32_e32 v7, s7
4469 ; GFX10-NEXT: v_mov_b32_e32 v8, s8
4470 ; GFX10-NEXT: v_mov_b32_e32 v9, s9
4471 ; GFX10-NEXT: v_mov_b32_e32 v10, s10
4472 ; GFX10-NEXT: v_mov_b32_e32 v11, s11
4473 ; GFX10-NEXT: v_mov_b32_e32 v12, s12
4474 ; GFX10-NEXT: v_mov_b32_e32 v13, s13
4475 ; GFX10-NEXT: v_mov_b32_e32 v14, s14
4476 ; GFX10-NEXT: v_mov_b32_e32 v15, s15
4477 ; GFX10-NEXT: v_mov_b32_e32 v16, s16
4478 ; GFX10-NEXT: v_mov_b32_e32 v17, s17
4479 ; GFX10-NEXT: v_mov_b32_e32 v18, s18
4480 ; GFX10-NEXT: v_mov_b32_e32 v19, s19
4481 ; GFX10-NEXT: v_mov_b32_e32 v20, s20
4482 ; GFX10-NEXT: v_mov_b32_e32 v21, s21
4483 ; GFX10-NEXT: v_mov_b32_e32 v22, s22
4484 ; GFX10-NEXT: v_mov_b32_e32 v23, s23
4485 ; GFX10-NEXT: v_mov_b32_e32 v24, s24
4486 ; GFX10-NEXT: v_mov_b32_e32 v25, s25
4487 ; GFX10-NEXT: v_mov_b32_e32 v26, s26
4488 ; GFX10-NEXT: v_mov_b32_e32 v27, s27
4489 ; GFX10-NEXT: v_mov_b32_e32 v28, s28
4490 ; GFX10-NEXT: v_mov_b32_e32 v29, s29
4491 ; GFX10-NEXT: v_mov_b32_e32 v30, s30
4492 ; GFX10-NEXT: v_mov_b32_e32 v31, s31
4493 ; GFX10-NEXT: v_movreld_b32_e32 v0, v32
4494 ; GFX10-NEXT: ; return to shader part epilog
4496 ; GFX11-LABEL: dyn_insertelement_v32f32_s_v_s:
4497 ; GFX11: ; %bb.0: ; %entry
4498 ; GFX11-NEXT: s_mov_b32 s0, s2
4499 ; GFX11-NEXT: s_mov_b32 s1, s3
4500 ; GFX11-NEXT: s_mov_b32 s2, s4
4501 ; GFX11-NEXT: s_mov_b32 s3, s5
4502 ; GFX11-NEXT: s_mov_b32 s4, s6
4503 ; GFX11-NEXT: s_mov_b32 s5, s7
4504 ; GFX11-NEXT: s_mov_b32 s6, s8
4505 ; GFX11-NEXT: s_mov_b32 s7, s9
4506 ; GFX11-NEXT: s_mov_b32 s8, s10
4507 ; GFX11-NEXT: s_mov_b32 s9, s11
4508 ; GFX11-NEXT: s_mov_b32 s10, s12
4509 ; GFX11-NEXT: s_mov_b32 s11, s13
4510 ; GFX11-NEXT: s_mov_b32 s12, s14
4511 ; GFX11-NEXT: s_mov_b32 s13, s15
4512 ; GFX11-NEXT: s_mov_b32 s14, s16
4513 ; GFX11-NEXT: s_mov_b32 s15, s17
4514 ; GFX11-NEXT: s_mov_b32 s16, s18
4515 ; GFX11-NEXT: s_mov_b32 s17, s19
4516 ; GFX11-NEXT: s_mov_b32 s18, s20
4517 ; GFX11-NEXT: s_mov_b32 s19, s21
4518 ; GFX11-NEXT: s_mov_b32 s20, s22
4519 ; GFX11-NEXT: s_mov_b32 s21, s23
4520 ; GFX11-NEXT: s_mov_b32 s22, s24
4521 ; GFX11-NEXT: s_mov_b32 s23, s25
4522 ; GFX11-NEXT: s_mov_b32 s24, s26
4523 ; GFX11-NEXT: s_mov_b32 s25, s27
4524 ; GFX11-NEXT: s_mov_b32 s26, s28
4525 ; GFX11-NEXT: s_mov_b32 s27, s29
4526 ; GFX11-NEXT: s_mov_b32 s28, s30
4527 ; GFX11-NEXT: s_mov_b32 s29, s31
4528 ; GFX11-NEXT: s_mov_b32 s31, s33
4529 ; GFX11-NEXT: s_mov_b32 s30, s32
4530 ; GFX11-NEXT: v_mov_b32_e32 v32, v0
4531 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
4532 ; GFX11-NEXT: s_mov_b32 m0, s34
4533 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
4534 ; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
4535 ; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
4536 ; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
4537 ; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
4538 ; GFX11-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s12
4539 ; GFX11-NEXT: v_dual_mov_b32 v15, s15 :: v_dual_mov_b32 v14, s14
4540 ; GFX11-NEXT: v_dual_mov_b32 v17, s17 :: v_dual_mov_b32 v16, s16
4541 ; GFX11-NEXT: v_dual_mov_b32 v19, s19 :: v_dual_mov_b32 v18, s18
4542 ; GFX11-NEXT: v_dual_mov_b32 v21, s21 :: v_dual_mov_b32 v20, s20
4543 ; GFX11-NEXT: v_dual_mov_b32 v23, s23 :: v_dual_mov_b32 v22, s22
4544 ; GFX11-NEXT: v_dual_mov_b32 v25, s25 :: v_dual_mov_b32 v24, s24
4545 ; GFX11-NEXT: v_dual_mov_b32 v27, s27 :: v_dual_mov_b32 v26, s26
4546 ; GFX11-NEXT: v_dual_mov_b32 v29, s29 :: v_dual_mov_b32 v28, s28
4547 ; GFX11-NEXT: v_dual_mov_b32 v31, s31 :: v_dual_mov_b32 v30, s30
4548 ; GFX11-NEXT: v_movreld_b32_e32 v0, v32
4549 ; GFX11-NEXT: ; return to shader part epilog
4551 %insert = insertelement <32 x float> %vec, float %val, i32 %idx
4552 ret <32 x float> %insert
4555 define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %vec, i64 %val, i32 inreg %idx) {
4556 ; GPRIDX-LABEL: dyn_insertelement_v16i64_s_v_s:
4557 ; GPRIDX: ; %bb.0: ; %entry
4558 ; GPRIDX-NEXT: s_mov_b32 s1, s3
4559 ; GPRIDX-NEXT: s_mov_b32 s3, s5
4560 ; GPRIDX-NEXT: s_mov_b32 s5, s7
4561 ; GPRIDX-NEXT: s_mov_b32 s7, s9
4562 ; GPRIDX-NEXT: s_mov_b32 s9, s11
4563 ; GPRIDX-NEXT: s_mov_b32 s11, s13
4564 ; GPRIDX-NEXT: s_mov_b32 s13, s15
4565 ; GPRIDX-NEXT: s_mov_b32 s15, s17
4566 ; GPRIDX-NEXT: s_mov_b32 s17, s19
4567 ; GPRIDX-NEXT: s_mov_b32 s19, s21
4568 ; GPRIDX-NEXT: s_mov_b32 s21, s23
4569 ; GPRIDX-NEXT: s_mov_b32 s23, s25
4570 ; GPRIDX-NEXT: s_mov_b32 s25, s27
4571 ; GPRIDX-NEXT: s_mov_b32 s27, s29
4572 ; GPRIDX-NEXT: s_mov_b32 s29, s31
4573 ; GPRIDX-NEXT: s_mov_b32 s31, s33
4574 ; GPRIDX-NEXT: s_mov_b32 s0, s2
4575 ; GPRIDX-NEXT: s_mov_b32 s2, s4
4576 ; GPRIDX-NEXT: s_mov_b32 s4, s6
4577 ; GPRIDX-NEXT: s_mov_b32 s6, s8
4578 ; GPRIDX-NEXT: s_mov_b32 s8, s10
4579 ; GPRIDX-NEXT: s_mov_b32 s10, s12
4580 ; GPRIDX-NEXT: s_mov_b32 s12, s14
4581 ; GPRIDX-NEXT: s_mov_b32 s14, s16
4582 ; GPRIDX-NEXT: s_mov_b32 s16, s18
4583 ; GPRIDX-NEXT: s_mov_b32 s18, s20
4584 ; GPRIDX-NEXT: s_mov_b32 s20, s22
4585 ; GPRIDX-NEXT: s_mov_b32 s22, s24
4586 ; GPRIDX-NEXT: s_mov_b32 s24, s26
4587 ; GPRIDX-NEXT: s_mov_b32 s26, s28
4588 ; GPRIDX-NEXT: s_mov_b32 s28, s30
4589 ; GPRIDX-NEXT: s_mov_b32 s30, s32
4590 ; GPRIDX-NEXT: v_mov_b32_e32 v33, s31
4591 ; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1
4592 ; GPRIDX-NEXT: v_mov_b32_e32 v32, s30
4593 ; GPRIDX-NEXT: v_mov_b32_e32 v31, s29
4594 ; GPRIDX-NEXT: v_mov_b32_e32 v30, s28
4595 ; GPRIDX-NEXT: v_mov_b32_e32 v29, s27
4596 ; GPRIDX-NEXT: v_mov_b32_e32 v28, s26
4597 ; GPRIDX-NEXT: v_mov_b32_e32 v27, s25
4598 ; GPRIDX-NEXT: v_mov_b32_e32 v26, s24
4599 ; GPRIDX-NEXT: v_mov_b32_e32 v25, s23
4600 ; GPRIDX-NEXT: v_mov_b32_e32 v24, s22
4601 ; GPRIDX-NEXT: v_mov_b32_e32 v23, s21
4602 ; GPRIDX-NEXT: v_mov_b32_e32 v22, s20
4603 ; GPRIDX-NEXT: v_mov_b32_e32 v21, s19
4604 ; GPRIDX-NEXT: v_mov_b32_e32 v20, s18
4605 ; GPRIDX-NEXT: v_mov_b32_e32 v19, s17
4606 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s16
4607 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s15
4608 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s14
4609 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
4610 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
4611 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
4612 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
4613 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
4614 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
4615 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
4616 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
4617 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
4618 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
4619 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
4620 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
4621 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
4622 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
4623 ; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
4624 ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
4625 ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
4626 ; GPRIDX-NEXT: s_set_gpr_idx_off
4627 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
4628 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3
4629 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4
4630 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5
4631 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6
4632 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7
4633 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
4634 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9
4635 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10
4636 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11
4637 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12
4638 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13
4639 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14
4640 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15
4641 ; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16
4642 ; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17
4643 ; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18
4644 ; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19
4645 ; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20
4646 ; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21
4647 ; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22
4648 ; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23
4649 ; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24
4650 ; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25
4651 ; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26
4652 ; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27
4653 ; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28
4654 ; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29
4655 ; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30
4656 ; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31
4657 ; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32
4658 ; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33
4659 ; GPRIDX-NEXT: ; return to shader part epilog
4661 ; GFX10-LABEL: dyn_insertelement_v16i64_s_v_s:
4662 ; GFX10: ; %bb.0: ; %entry
4663 ; GFX10-NEXT: s_mov_b32 s1, s3
4664 ; GFX10-NEXT: s_mov_b32 s3, s5
4665 ; GFX10-NEXT: s_mov_b32 s5, s7
4666 ; GFX10-NEXT: s_mov_b32 s7, s9
4667 ; GFX10-NEXT: s_mov_b32 s9, s11
4668 ; GFX10-NEXT: s_mov_b32 s11, s13
4669 ; GFX10-NEXT: s_mov_b32 s13, s15
4670 ; GFX10-NEXT: s_mov_b32 s15, s17
4671 ; GFX10-NEXT: s_mov_b32 s17, s19
4672 ; GFX10-NEXT: s_mov_b32 s19, s21
4673 ; GFX10-NEXT: s_mov_b32 s21, s23
4674 ; GFX10-NEXT: s_mov_b32 s23, s25
4675 ; GFX10-NEXT: s_mov_b32 s25, s27
4676 ; GFX10-NEXT: s_mov_b32 s27, s29
4677 ; GFX10-NEXT: s_mov_b32 s29, s31
4678 ; GFX10-NEXT: s_mov_b32 s31, s33
4679 ; GFX10-NEXT: s_mov_b32 s0, s2
4680 ; GFX10-NEXT: s_mov_b32 s2, s4
4681 ; GFX10-NEXT: s_mov_b32 s4, s6
4682 ; GFX10-NEXT: s_mov_b32 s6, s8
4683 ; GFX10-NEXT: s_mov_b32 s8, s10
4684 ; GFX10-NEXT: s_mov_b32 s10, s12
4685 ; GFX10-NEXT: s_mov_b32 s12, s14
4686 ; GFX10-NEXT: s_mov_b32 s14, s16
4687 ; GFX10-NEXT: s_mov_b32 s16, s18
4688 ; GFX10-NEXT: s_mov_b32 s18, s20
4689 ; GFX10-NEXT: s_mov_b32 s20, s22
4690 ; GFX10-NEXT: s_mov_b32 s22, s24
4691 ; GFX10-NEXT: s_mov_b32 s24, s26
4692 ; GFX10-NEXT: s_mov_b32 s26, s28
4693 ; GFX10-NEXT: s_mov_b32 s28, s30
4694 ; GFX10-NEXT: s_mov_b32 s30, s32
4695 ; GFX10-NEXT: v_mov_b32_e32 v33, s31
4696 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
4697 ; GFX10-NEXT: s_lshl_b32 m0, s34, 1
4698 ; GFX10-NEXT: v_mov_b32_e32 v32, s30
4699 ; GFX10-NEXT: v_mov_b32_e32 v31, s29
4700 ; GFX10-NEXT: v_mov_b32_e32 v30, s28
4701 ; GFX10-NEXT: v_mov_b32_e32 v29, s27
4702 ; GFX10-NEXT: v_mov_b32_e32 v28, s26
4703 ; GFX10-NEXT: v_mov_b32_e32 v27, s25
4704 ; GFX10-NEXT: v_mov_b32_e32 v26, s24
4705 ; GFX10-NEXT: v_mov_b32_e32 v25, s23
4706 ; GFX10-NEXT: v_mov_b32_e32 v24, s22
4707 ; GFX10-NEXT: v_mov_b32_e32 v23, s21
4708 ; GFX10-NEXT: v_mov_b32_e32 v22, s20
4709 ; GFX10-NEXT: v_mov_b32_e32 v21, s19
4710 ; GFX10-NEXT: v_mov_b32_e32 v20, s18
4711 ; GFX10-NEXT: v_mov_b32_e32 v19, s17
4712 ; GFX10-NEXT: v_mov_b32_e32 v18, s16
4713 ; GFX10-NEXT: v_mov_b32_e32 v17, s15
4714 ; GFX10-NEXT: v_mov_b32_e32 v16, s14
4715 ; GFX10-NEXT: v_mov_b32_e32 v15, s13
4716 ; GFX10-NEXT: v_mov_b32_e32 v14, s12
4717 ; GFX10-NEXT: v_mov_b32_e32 v13, s11
4718 ; GFX10-NEXT: v_mov_b32_e32 v12, s10
4719 ; GFX10-NEXT: v_mov_b32_e32 v11, s9
4720 ; GFX10-NEXT: v_mov_b32_e32 v10, s8
4721 ; GFX10-NEXT: v_mov_b32_e32 v9, s7
4722 ; GFX10-NEXT: v_mov_b32_e32 v8, s6
4723 ; GFX10-NEXT: v_mov_b32_e32 v7, s5
4724 ; GFX10-NEXT: v_mov_b32_e32 v6, s4
4725 ; GFX10-NEXT: v_mov_b32_e32 v5, s3
4726 ; GFX10-NEXT: v_mov_b32_e32 v4, s2
4727 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
4728 ; GFX10-NEXT: v_movreld_b32_e32 v2, v0
4729 ; GFX10-NEXT: v_movreld_b32_e32 v3, v1
4730 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
4731 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
4732 ; GFX10-NEXT: v_readfirstlane_b32 s2, v4
4733 ; GFX10-NEXT: v_readfirstlane_b32 s3, v5
4734 ; GFX10-NEXT: v_readfirstlane_b32 s4, v6
4735 ; GFX10-NEXT: v_readfirstlane_b32 s5, v7
4736 ; GFX10-NEXT: v_readfirstlane_b32 s6, v8
4737 ; GFX10-NEXT: v_readfirstlane_b32 s7, v9
4738 ; GFX10-NEXT: v_readfirstlane_b32 s8, v10
4739 ; GFX10-NEXT: v_readfirstlane_b32 s9, v11
4740 ; GFX10-NEXT: v_readfirstlane_b32 s10, v12
4741 ; GFX10-NEXT: v_readfirstlane_b32 s11, v13
4742 ; GFX10-NEXT: v_readfirstlane_b32 s12, v14
4743 ; GFX10-NEXT: v_readfirstlane_b32 s13, v15
4744 ; GFX10-NEXT: v_readfirstlane_b32 s14, v16
4745 ; GFX10-NEXT: v_readfirstlane_b32 s15, v17
4746 ; GFX10-NEXT: v_readfirstlane_b32 s16, v18
4747 ; GFX10-NEXT: v_readfirstlane_b32 s17, v19
4748 ; GFX10-NEXT: v_readfirstlane_b32 s18, v20
4749 ; GFX10-NEXT: v_readfirstlane_b32 s19, v21
4750 ; GFX10-NEXT: v_readfirstlane_b32 s20, v22
4751 ; GFX10-NEXT: v_readfirstlane_b32 s21, v23
4752 ; GFX10-NEXT: v_readfirstlane_b32 s22, v24
4753 ; GFX10-NEXT: v_readfirstlane_b32 s23, v25
4754 ; GFX10-NEXT: v_readfirstlane_b32 s24, v26
4755 ; GFX10-NEXT: v_readfirstlane_b32 s25, v27
4756 ; GFX10-NEXT: v_readfirstlane_b32 s26, v28
4757 ; GFX10-NEXT: v_readfirstlane_b32 s27, v29
4758 ; GFX10-NEXT: v_readfirstlane_b32 s28, v30
4759 ; GFX10-NEXT: v_readfirstlane_b32 s29, v31
4760 ; GFX10-NEXT: v_readfirstlane_b32 s30, v32
4761 ; GFX10-NEXT: v_readfirstlane_b32 s31, v33
4762 ; GFX10-NEXT: ; return to shader part epilog
4764 ; GFX11-LABEL: dyn_insertelement_v16i64_s_v_s:
4765 ; GFX11: ; %bb.0: ; %entry
4766 ; GFX11-NEXT: s_mov_b32 s1, s3
4767 ; GFX11-NEXT: s_mov_b32 s3, s5
4768 ; GFX11-NEXT: s_mov_b32 s5, s7
4769 ; GFX11-NEXT: s_mov_b32 s7, s9
4770 ; GFX11-NEXT: s_mov_b32 s9, s11
4771 ; GFX11-NEXT: s_mov_b32 s11, s13
4772 ; GFX11-NEXT: s_mov_b32 s13, s15
4773 ; GFX11-NEXT: s_mov_b32 s15, s17
4774 ; GFX11-NEXT: s_mov_b32 s17, s19
4775 ; GFX11-NEXT: s_mov_b32 s19, s21
4776 ; GFX11-NEXT: s_mov_b32 s21, s23
4777 ; GFX11-NEXT: s_mov_b32 s23, s25
4778 ; GFX11-NEXT: s_mov_b32 s25, s27
4779 ; GFX11-NEXT: s_mov_b32 s27, s29
4780 ; GFX11-NEXT: s_mov_b32 s29, s31
4781 ; GFX11-NEXT: s_mov_b32 s31, s33
4782 ; GFX11-NEXT: s_mov_b32 s0, s2
4783 ; GFX11-NEXT: s_mov_b32 s2, s4
4784 ; GFX11-NEXT: s_mov_b32 s4, s6
4785 ; GFX11-NEXT: s_mov_b32 s6, s8
4786 ; GFX11-NEXT: s_mov_b32 s8, s10
4787 ; GFX11-NEXT: s_mov_b32 s10, s12
4788 ; GFX11-NEXT: s_mov_b32 s12, s14
4789 ; GFX11-NEXT: s_mov_b32 s14, s16
4790 ; GFX11-NEXT: s_mov_b32 s16, s18
4791 ; GFX11-NEXT: s_mov_b32 s18, s20
4792 ; GFX11-NEXT: s_mov_b32 s20, s22
4793 ; GFX11-NEXT: s_mov_b32 s22, s24
4794 ; GFX11-NEXT: s_mov_b32 s24, s26
4795 ; GFX11-NEXT: s_mov_b32 s26, s28
4796 ; GFX11-NEXT: s_mov_b32 s28, s30
4797 ; GFX11-NEXT: s_mov_b32 s30, s32
4798 ; GFX11-NEXT: v_dual_mov_b32 v33, s31 :: v_dual_mov_b32 v32, s30
4799 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
4800 ; GFX11-NEXT: s_lshl_b32 m0, s34, 1
4801 ; GFX11-NEXT: v_dual_mov_b32 v31, s29 :: v_dual_mov_b32 v30, s28
4802 ; GFX11-NEXT: v_dual_mov_b32 v29, s27 :: v_dual_mov_b32 v28, s26
4803 ; GFX11-NEXT: v_dual_mov_b32 v27, s25 :: v_dual_mov_b32 v26, s24
4804 ; GFX11-NEXT: v_dual_mov_b32 v25, s23 :: v_dual_mov_b32 v24, s22
4805 ; GFX11-NEXT: v_dual_mov_b32 v23, s21 :: v_dual_mov_b32 v22, s20
4806 ; GFX11-NEXT: v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18
4807 ; GFX11-NEXT: v_dual_mov_b32 v19, s17 :: v_dual_mov_b32 v18, s16
4808 ; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
4809 ; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
4810 ; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
4811 ; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
4812 ; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
4813 ; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
4814 ; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
4815 ; GFX11-NEXT: v_movreld_b32_e32 v2, v0
4816 ; GFX11-NEXT: v_movreld_b32_e32 v3, v1
4817 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
4818 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
4819 ; GFX11-NEXT: v_readfirstlane_b32 s2, v4
4820 ; GFX11-NEXT: v_readfirstlane_b32 s3, v5
4821 ; GFX11-NEXT: v_readfirstlane_b32 s4, v6
4822 ; GFX11-NEXT: v_readfirstlane_b32 s5, v7
4823 ; GFX11-NEXT: v_readfirstlane_b32 s6, v8
4824 ; GFX11-NEXT: v_readfirstlane_b32 s7, v9
4825 ; GFX11-NEXT: v_readfirstlane_b32 s8, v10
4826 ; GFX11-NEXT: v_readfirstlane_b32 s9, v11
4827 ; GFX11-NEXT: v_readfirstlane_b32 s10, v12
4828 ; GFX11-NEXT: v_readfirstlane_b32 s11, v13
4829 ; GFX11-NEXT: v_readfirstlane_b32 s12, v14
4830 ; GFX11-NEXT: v_readfirstlane_b32 s13, v15
4831 ; GFX11-NEXT: v_readfirstlane_b32 s14, v16
4832 ; GFX11-NEXT: v_readfirstlane_b32 s15, v17
4833 ; GFX11-NEXT: v_readfirstlane_b32 s16, v18
4834 ; GFX11-NEXT: v_readfirstlane_b32 s17, v19
4835 ; GFX11-NEXT: v_readfirstlane_b32 s18, v20
4836 ; GFX11-NEXT: v_readfirstlane_b32 s19, v21
4837 ; GFX11-NEXT: v_readfirstlane_b32 s20, v22
4838 ; GFX11-NEXT: v_readfirstlane_b32 s21, v23
4839 ; GFX11-NEXT: v_readfirstlane_b32 s22, v24
4840 ; GFX11-NEXT: v_readfirstlane_b32 s23, v25
4841 ; GFX11-NEXT: v_readfirstlane_b32 s24, v26
4842 ; GFX11-NEXT: v_readfirstlane_b32 s25, v27
4843 ; GFX11-NEXT: v_readfirstlane_b32 s26, v28
4844 ; GFX11-NEXT: v_readfirstlane_b32 s27, v29
4845 ; GFX11-NEXT: v_readfirstlane_b32 s28, v30
4846 ; GFX11-NEXT: v_readfirstlane_b32 s29, v31
4847 ; GFX11-NEXT: v_readfirstlane_b32 s30, v32
4848 ; GFX11-NEXT: v_readfirstlane_b32 s31, v33
4849 ; GFX11-NEXT: ; return to shader part epilog
4851 %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx
4852 ret <16 x i64> %insert
4855 define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inreg %vec, double %val, i32 inreg %idx) {
4856 ; GPRIDX-LABEL: dyn_insertelement_v16f64_s_v_s:
4857 ; GPRIDX: ; %bb.0: ; %entry
4858 ; GPRIDX-NEXT: s_mov_b32 s1, s3
4859 ; GPRIDX-NEXT: s_mov_b32 s3, s5
4860 ; GPRIDX-NEXT: s_mov_b32 s5, s7
4861 ; GPRIDX-NEXT: s_mov_b32 s7, s9
4862 ; GPRIDX-NEXT: s_mov_b32 s9, s11
4863 ; GPRIDX-NEXT: s_mov_b32 s11, s13
4864 ; GPRIDX-NEXT: s_mov_b32 s13, s15
4865 ; GPRIDX-NEXT: s_mov_b32 s15, s17
4866 ; GPRIDX-NEXT: s_mov_b32 s17, s19
4867 ; GPRIDX-NEXT: s_mov_b32 s19, s21
4868 ; GPRIDX-NEXT: s_mov_b32 s21, s23
4869 ; GPRIDX-NEXT: s_mov_b32 s23, s25
4870 ; GPRIDX-NEXT: s_mov_b32 s25, s27
4871 ; GPRIDX-NEXT: s_mov_b32 s27, s29
4872 ; GPRIDX-NEXT: s_mov_b32 s29, s31
4873 ; GPRIDX-NEXT: s_mov_b32 s31, s33
4874 ; GPRIDX-NEXT: s_mov_b32 s0, s2
4875 ; GPRIDX-NEXT: s_mov_b32 s2, s4
4876 ; GPRIDX-NEXT: s_mov_b32 s4, s6
4877 ; GPRIDX-NEXT: s_mov_b32 s6, s8
4878 ; GPRIDX-NEXT: s_mov_b32 s8, s10
4879 ; GPRIDX-NEXT: s_mov_b32 s10, s12
4880 ; GPRIDX-NEXT: s_mov_b32 s12, s14
4881 ; GPRIDX-NEXT: s_mov_b32 s14, s16
4882 ; GPRIDX-NEXT: s_mov_b32 s16, s18
4883 ; GPRIDX-NEXT: s_mov_b32 s18, s20
4884 ; GPRIDX-NEXT: s_mov_b32 s20, s22
4885 ; GPRIDX-NEXT: s_mov_b32 s22, s24
4886 ; GPRIDX-NEXT: s_mov_b32 s24, s26
4887 ; GPRIDX-NEXT: s_mov_b32 s26, s28
4888 ; GPRIDX-NEXT: s_mov_b32 s28, s30
4889 ; GPRIDX-NEXT: s_mov_b32 s30, s32
4890 ; GPRIDX-NEXT: v_mov_b32_e32 v33, s31
4891 ; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1
4892 ; GPRIDX-NEXT: v_mov_b32_e32 v32, s30
4893 ; GPRIDX-NEXT: v_mov_b32_e32 v31, s29
4894 ; GPRIDX-NEXT: v_mov_b32_e32 v30, s28
4895 ; GPRIDX-NEXT: v_mov_b32_e32 v29, s27
4896 ; GPRIDX-NEXT: v_mov_b32_e32 v28, s26
4897 ; GPRIDX-NEXT: v_mov_b32_e32 v27, s25
4898 ; GPRIDX-NEXT: v_mov_b32_e32 v26, s24
4899 ; GPRIDX-NEXT: v_mov_b32_e32 v25, s23
4900 ; GPRIDX-NEXT: v_mov_b32_e32 v24, s22
4901 ; GPRIDX-NEXT: v_mov_b32_e32 v23, s21
4902 ; GPRIDX-NEXT: v_mov_b32_e32 v22, s20
4903 ; GPRIDX-NEXT: v_mov_b32_e32 v21, s19
4904 ; GPRIDX-NEXT: v_mov_b32_e32 v20, s18
4905 ; GPRIDX-NEXT: v_mov_b32_e32 v19, s17
4906 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s16
4907 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s15
4908 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s14
4909 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
4910 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
4911 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
4912 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
4913 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
4914 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
4915 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
4916 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
4917 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
4918 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
4919 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
4920 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
4921 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
4922 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
4923 ; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
4924 ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
4925 ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
4926 ; GPRIDX-NEXT: s_set_gpr_idx_off
4927 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
4928 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3
4929 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4
4930 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5
4931 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6
4932 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7
4933 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
4934 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9
4935 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10
4936 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11
4937 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12
4938 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13
4939 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14
4940 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15
4941 ; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16
4942 ; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17
4943 ; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18
4944 ; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19
4945 ; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20
4946 ; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21
4947 ; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22
4948 ; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23
4949 ; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24
4950 ; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25
4951 ; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26
4952 ; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27
4953 ; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28
4954 ; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29
4955 ; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30
4956 ; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31
4957 ; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32
4958 ; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33
4959 ; GPRIDX-NEXT: ; return to shader part epilog
4961 ; GFX10-LABEL: dyn_insertelement_v16f64_s_v_s:
4962 ; GFX10: ; %bb.0: ; %entry
4963 ; GFX10-NEXT: s_mov_b32 s1, s3
4964 ; GFX10-NEXT: s_mov_b32 s3, s5
4965 ; GFX10-NEXT: s_mov_b32 s5, s7
4966 ; GFX10-NEXT: s_mov_b32 s7, s9
4967 ; GFX10-NEXT: s_mov_b32 s9, s11
4968 ; GFX10-NEXT: s_mov_b32 s11, s13
4969 ; GFX10-NEXT: s_mov_b32 s13, s15
4970 ; GFX10-NEXT: s_mov_b32 s15, s17
4971 ; GFX10-NEXT: s_mov_b32 s17, s19
4972 ; GFX10-NEXT: s_mov_b32 s19, s21
4973 ; GFX10-NEXT: s_mov_b32 s21, s23
4974 ; GFX10-NEXT: s_mov_b32 s23, s25
4975 ; GFX10-NEXT: s_mov_b32 s25, s27
4976 ; GFX10-NEXT: s_mov_b32 s27, s29
4977 ; GFX10-NEXT: s_mov_b32 s29, s31
4978 ; GFX10-NEXT: s_mov_b32 s31, s33
4979 ; GFX10-NEXT: s_mov_b32 s0, s2
4980 ; GFX10-NEXT: s_mov_b32 s2, s4
4981 ; GFX10-NEXT: s_mov_b32 s4, s6
4982 ; GFX10-NEXT: s_mov_b32 s6, s8
4983 ; GFX10-NEXT: s_mov_b32 s8, s10
4984 ; GFX10-NEXT: s_mov_b32 s10, s12
4985 ; GFX10-NEXT: s_mov_b32 s12, s14
4986 ; GFX10-NEXT: s_mov_b32 s14, s16
4987 ; GFX10-NEXT: s_mov_b32 s16, s18
4988 ; GFX10-NEXT: s_mov_b32 s18, s20
4989 ; GFX10-NEXT: s_mov_b32 s20, s22
4990 ; GFX10-NEXT: s_mov_b32 s22, s24
4991 ; GFX10-NEXT: s_mov_b32 s24, s26
4992 ; GFX10-NEXT: s_mov_b32 s26, s28
4993 ; GFX10-NEXT: s_mov_b32 s28, s30
4994 ; GFX10-NEXT: s_mov_b32 s30, s32
4995 ; GFX10-NEXT: v_mov_b32_e32 v33, s31
4996 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
4997 ; GFX10-NEXT: s_lshl_b32 m0, s34, 1
4998 ; GFX10-NEXT: v_mov_b32_e32 v32, s30
4999 ; GFX10-NEXT: v_mov_b32_e32 v31, s29
5000 ; GFX10-NEXT: v_mov_b32_e32 v30, s28
5001 ; GFX10-NEXT: v_mov_b32_e32 v29, s27
5002 ; GFX10-NEXT: v_mov_b32_e32 v28, s26
5003 ; GFX10-NEXT: v_mov_b32_e32 v27, s25
5004 ; GFX10-NEXT: v_mov_b32_e32 v26, s24
5005 ; GFX10-NEXT: v_mov_b32_e32 v25, s23
5006 ; GFX10-NEXT: v_mov_b32_e32 v24, s22
5007 ; GFX10-NEXT: v_mov_b32_e32 v23, s21
5008 ; GFX10-NEXT: v_mov_b32_e32 v22, s20
5009 ; GFX10-NEXT: v_mov_b32_e32 v21, s19
5010 ; GFX10-NEXT: v_mov_b32_e32 v20, s18
5011 ; GFX10-NEXT: v_mov_b32_e32 v19, s17
5012 ; GFX10-NEXT: v_mov_b32_e32 v18, s16
5013 ; GFX10-NEXT: v_mov_b32_e32 v17, s15
5014 ; GFX10-NEXT: v_mov_b32_e32 v16, s14
5015 ; GFX10-NEXT: v_mov_b32_e32 v15, s13
5016 ; GFX10-NEXT: v_mov_b32_e32 v14, s12
5017 ; GFX10-NEXT: v_mov_b32_e32 v13, s11
5018 ; GFX10-NEXT: v_mov_b32_e32 v12, s10
5019 ; GFX10-NEXT: v_mov_b32_e32 v11, s9
5020 ; GFX10-NEXT: v_mov_b32_e32 v10, s8
5021 ; GFX10-NEXT: v_mov_b32_e32 v9, s7
5022 ; GFX10-NEXT: v_mov_b32_e32 v8, s6
5023 ; GFX10-NEXT: v_mov_b32_e32 v7, s5
5024 ; GFX10-NEXT: v_mov_b32_e32 v6, s4
5025 ; GFX10-NEXT: v_mov_b32_e32 v5, s3
5026 ; GFX10-NEXT: v_mov_b32_e32 v4, s2
5027 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
5028 ; GFX10-NEXT: v_movreld_b32_e32 v2, v0
5029 ; GFX10-NEXT: v_movreld_b32_e32 v3, v1
5030 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
5031 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
5032 ; GFX10-NEXT: v_readfirstlane_b32 s2, v4
5033 ; GFX10-NEXT: v_readfirstlane_b32 s3, v5
5034 ; GFX10-NEXT: v_readfirstlane_b32 s4, v6
5035 ; GFX10-NEXT: v_readfirstlane_b32 s5, v7
5036 ; GFX10-NEXT: v_readfirstlane_b32 s6, v8
5037 ; GFX10-NEXT: v_readfirstlane_b32 s7, v9
5038 ; GFX10-NEXT: v_readfirstlane_b32 s8, v10
5039 ; GFX10-NEXT: v_readfirstlane_b32 s9, v11
5040 ; GFX10-NEXT: v_readfirstlane_b32 s10, v12
5041 ; GFX10-NEXT: v_readfirstlane_b32 s11, v13
5042 ; GFX10-NEXT: v_readfirstlane_b32 s12, v14
5043 ; GFX10-NEXT: v_readfirstlane_b32 s13, v15
5044 ; GFX10-NEXT: v_readfirstlane_b32 s14, v16
5045 ; GFX10-NEXT: v_readfirstlane_b32 s15, v17
5046 ; GFX10-NEXT: v_readfirstlane_b32 s16, v18
5047 ; GFX10-NEXT: v_readfirstlane_b32 s17, v19
5048 ; GFX10-NEXT: v_readfirstlane_b32 s18, v20
5049 ; GFX10-NEXT: v_readfirstlane_b32 s19, v21
5050 ; GFX10-NEXT: v_readfirstlane_b32 s20, v22
5051 ; GFX10-NEXT: v_readfirstlane_b32 s21, v23
5052 ; GFX10-NEXT: v_readfirstlane_b32 s22, v24
5053 ; GFX10-NEXT: v_readfirstlane_b32 s23, v25
5054 ; GFX10-NEXT: v_readfirstlane_b32 s24, v26
5055 ; GFX10-NEXT: v_readfirstlane_b32 s25, v27
5056 ; GFX10-NEXT: v_readfirstlane_b32 s26, v28
5057 ; GFX10-NEXT: v_readfirstlane_b32 s27, v29
5058 ; GFX10-NEXT: v_readfirstlane_b32 s28, v30
5059 ; GFX10-NEXT: v_readfirstlane_b32 s29, v31
5060 ; GFX10-NEXT: v_readfirstlane_b32 s30, v32
5061 ; GFX10-NEXT: v_readfirstlane_b32 s31, v33
5062 ; GFX10-NEXT: ; return to shader part epilog
5064 ; GFX11-LABEL: dyn_insertelement_v16f64_s_v_s:
5065 ; GFX11: ; %bb.0: ; %entry
5066 ; GFX11-NEXT: s_mov_b32 s1, s3
5067 ; GFX11-NEXT: s_mov_b32 s3, s5
5068 ; GFX11-NEXT: s_mov_b32 s5, s7
5069 ; GFX11-NEXT: s_mov_b32 s7, s9
5070 ; GFX11-NEXT: s_mov_b32 s9, s11
5071 ; GFX11-NEXT: s_mov_b32 s11, s13
5072 ; GFX11-NEXT: s_mov_b32 s13, s15
5073 ; GFX11-NEXT: s_mov_b32 s15, s17
5074 ; GFX11-NEXT: s_mov_b32 s17, s19
5075 ; GFX11-NEXT: s_mov_b32 s19, s21
5076 ; GFX11-NEXT: s_mov_b32 s21, s23
5077 ; GFX11-NEXT: s_mov_b32 s23, s25
5078 ; GFX11-NEXT: s_mov_b32 s25, s27
5079 ; GFX11-NEXT: s_mov_b32 s27, s29
5080 ; GFX11-NEXT: s_mov_b32 s29, s31
5081 ; GFX11-NEXT: s_mov_b32 s31, s33
5082 ; GFX11-NEXT: s_mov_b32 s0, s2
5083 ; GFX11-NEXT: s_mov_b32 s2, s4
5084 ; GFX11-NEXT: s_mov_b32 s4, s6
5085 ; GFX11-NEXT: s_mov_b32 s6, s8
5086 ; GFX11-NEXT: s_mov_b32 s8, s10
5087 ; GFX11-NEXT: s_mov_b32 s10, s12
5088 ; GFX11-NEXT: s_mov_b32 s12, s14
5089 ; GFX11-NEXT: s_mov_b32 s14, s16
5090 ; GFX11-NEXT: s_mov_b32 s16, s18
5091 ; GFX11-NEXT: s_mov_b32 s18, s20
5092 ; GFX11-NEXT: s_mov_b32 s20, s22
5093 ; GFX11-NEXT: s_mov_b32 s22, s24
5094 ; GFX11-NEXT: s_mov_b32 s24, s26
5095 ; GFX11-NEXT: s_mov_b32 s26, s28
5096 ; GFX11-NEXT: s_mov_b32 s28, s30
5097 ; GFX11-NEXT: s_mov_b32 s30, s32
5098 ; GFX11-NEXT: v_dual_mov_b32 v33, s31 :: v_dual_mov_b32 v32, s30
5099 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
5100 ; GFX11-NEXT: s_lshl_b32 m0, s34, 1
5101 ; GFX11-NEXT: v_dual_mov_b32 v31, s29 :: v_dual_mov_b32 v30, s28
5102 ; GFX11-NEXT: v_dual_mov_b32 v29, s27 :: v_dual_mov_b32 v28, s26
5103 ; GFX11-NEXT: v_dual_mov_b32 v27, s25 :: v_dual_mov_b32 v26, s24
5104 ; GFX11-NEXT: v_dual_mov_b32 v25, s23 :: v_dual_mov_b32 v24, s22
5105 ; GFX11-NEXT: v_dual_mov_b32 v23, s21 :: v_dual_mov_b32 v22, s20
5106 ; GFX11-NEXT: v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18
5107 ; GFX11-NEXT: v_dual_mov_b32 v19, s17 :: v_dual_mov_b32 v18, s16
5108 ; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
5109 ; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
5110 ; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
5111 ; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
5112 ; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
5113 ; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
5114 ; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
5115 ; GFX11-NEXT: v_movreld_b32_e32 v2, v0
5116 ; GFX11-NEXT: v_movreld_b32_e32 v3, v1
5117 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
5118 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
5119 ; GFX11-NEXT: v_readfirstlane_b32 s2, v4
5120 ; GFX11-NEXT: v_readfirstlane_b32 s3, v5
5121 ; GFX11-NEXT: v_readfirstlane_b32 s4, v6
5122 ; GFX11-NEXT: v_readfirstlane_b32 s5, v7
5123 ; GFX11-NEXT: v_readfirstlane_b32 s6, v8
5124 ; GFX11-NEXT: v_readfirstlane_b32 s7, v9
5125 ; GFX11-NEXT: v_readfirstlane_b32 s8, v10
5126 ; GFX11-NEXT: v_readfirstlane_b32 s9, v11
5127 ; GFX11-NEXT: v_readfirstlane_b32 s10, v12
5128 ; GFX11-NEXT: v_readfirstlane_b32 s11, v13
5129 ; GFX11-NEXT: v_readfirstlane_b32 s12, v14
5130 ; GFX11-NEXT: v_readfirstlane_b32 s13, v15
5131 ; GFX11-NEXT: v_readfirstlane_b32 s14, v16
5132 ; GFX11-NEXT: v_readfirstlane_b32 s15, v17
5133 ; GFX11-NEXT: v_readfirstlane_b32 s16, v18
5134 ; GFX11-NEXT: v_readfirstlane_b32 s17, v19
5135 ; GFX11-NEXT: v_readfirstlane_b32 s18, v20
5136 ; GFX11-NEXT: v_readfirstlane_b32 s19, v21
5137 ; GFX11-NEXT: v_readfirstlane_b32 s20, v22
5138 ; GFX11-NEXT: v_readfirstlane_b32 s21, v23
5139 ; GFX11-NEXT: v_readfirstlane_b32 s22, v24
5140 ; GFX11-NEXT: v_readfirstlane_b32 s23, v25
5141 ; GFX11-NEXT: v_readfirstlane_b32 s24, v26
5142 ; GFX11-NEXT: v_readfirstlane_b32 s25, v27
5143 ; GFX11-NEXT: v_readfirstlane_b32 s26, v28
5144 ; GFX11-NEXT: v_readfirstlane_b32 s27, v29
5145 ; GFX11-NEXT: v_readfirstlane_b32 s28, v30
5146 ; GFX11-NEXT: v_readfirstlane_b32 s29, v31
5147 ; GFX11-NEXT: v_readfirstlane_b32 s30, v32
5148 ; GFX11-NEXT: v_readfirstlane_b32 s31, v33
5149 ; GFX11-NEXT: ; return to shader part epilog
5151 %insert = insertelement <16 x double> %vec, double %val, i32 %idx
5152 ret <16 x double> %insert
5155 define amdgpu_ps <7 x i32> @dyn_insertelement_v7i32_s_s_s(<7 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
5156 ; GPRIDX-LABEL: dyn_insertelement_v7i32_s_s_s:
5157 ; GPRIDX: ; %bb.0: ; %entry
5158 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0
5159 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s2
5160 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
5161 ; GPRIDX-NEXT: s_cselect_b32 s1, s9, s3
5162 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
5163 ; GPRIDX-NEXT: s_cselect_b32 s2, s9, s4
5164 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
5165 ; GPRIDX-NEXT: s_cselect_b32 s3, s9, s5
5166 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
5167 ; GPRIDX-NEXT: s_cselect_b32 s4, s9, s6
5168 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
5169 ; GPRIDX-NEXT: s_cselect_b32 s5, s9, s7
5170 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
5171 ; GPRIDX-NEXT: s_cselect_b32 s6, s9, s8
5172 ; GPRIDX-NEXT: ; return to shader part epilog
5174 ; GFX10PLUS-LABEL: dyn_insertelement_v7i32_s_s_s:
5175 ; GFX10PLUS: ; %bb.0: ; %entry
5176 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 0
5177 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s9, s2
5178 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 1
5179 ; GFX10PLUS-NEXT: s_cselect_b32 s1, s9, s3
5180 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 2
5181 ; GFX10PLUS-NEXT: s_cselect_b32 s2, s9, s4
5182 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 3
5183 ; GFX10PLUS-NEXT: s_cselect_b32 s3, s9, s5
5184 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 4
5185 ; GFX10PLUS-NEXT: s_cselect_b32 s4, s9, s6
5186 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 5
5187 ; GFX10PLUS-NEXT: s_cselect_b32 s5, s9, s7
5188 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 6
5189 ; GFX10PLUS-NEXT: s_cselect_b32 s6, s9, s8
5190 ; GFX10PLUS-NEXT: ; return to shader part epilog
5192 %insert = insertelement <7 x i32> %vec, i32 %val, i32 %idx
5193 ret <7 x i32> %insert
5196 define amdgpu_ps <7 x ptr addrspace(3)> @dyn_insertelement_v7p3i8_s_s_s(<7 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) {
5197 ; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s:
5198 ; GPRIDX: ; %bb.0: ; %entry
5199 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0
5200 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s2
5201 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1
5202 ; GPRIDX-NEXT: s_cselect_b32 s1, s9, s3
5203 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2
5204 ; GPRIDX-NEXT: s_cselect_b32 s2, s9, s4
5205 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3
5206 ; GPRIDX-NEXT: s_cselect_b32 s3, s9, s5
5207 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4
5208 ; GPRIDX-NEXT: s_cselect_b32 s4, s9, s6
5209 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5
5210 ; GPRIDX-NEXT: s_cselect_b32 s5, s9, s7
5211 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6
5212 ; GPRIDX-NEXT: s_cselect_b32 s6, s9, s8
5213 ; GPRIDX-NEXT: ; return to shader part epilog
5215 ; GFX10PLUS-LABEL: dyn_insertelement_v7p3i8_s_s_s:
5216 ; GFX10PLUS: ; %bb.0: ; %entry
5217 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 0
5218 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s9, s2
5219 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 1
5220 ; GFX10PLUS-NEXT: s_cselect_b32 s1, s9, s3
5221 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 2
5222 ; GFX10PLUS-NEXT: s_cselect_b32 s2, s9, s4
5223 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 3
5224 ; GFX10PLUS-NEXT: s_cselect_b32 s3, s9, s5
5225 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 4
5226 ; GFX10PLUS-NEXT: s_cselect_b32 s4, s9, s6
5227 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 5
5228 ; GFX10PLUS-NEXT: s_cselect_b32 s5, s9, s7
5229 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 6
5230 ; GFX10PLUS-NEXT: s_cselect_b32 s6, s9, s8
5231 ; GFX10PLUS-NEXT: ; return to shader part epilog
5233 %insert = insertelement <7 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
5234 ret <7 x ptr addrspace(3)> %insert
5237 define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) {
5238 ; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s:
5239 ; GPRIDX: ; %bb.0: ; %entry
5240 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s2
5241 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 0
5242 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s3
5243 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v1, v0, vcc
5244 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 1
5245 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4
5246 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
5247 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 2
5248 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5
5249 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
5250 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 3
5251 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s6
5252 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc
5253 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 4
5254 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s7
5255 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc
5256 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 5
5257 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s8
5258 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v0, vcc
5259 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 6
5260 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc
5261 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v7
5262 ; GPRIDX-NEXT: ; return to shader part epilog
5264 ; GFX10PLUS-LABEL: dyn_insertelement_v7f32_s_v_s:
5265 ; GFX10PLUS: ; %bb.0: ; %entry
5266 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 0
5267 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, s2, v0, vcc_lo
5268 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 1
5269 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s3, v0, vcc_lo
5270 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 2
5271 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
5272 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 3
5273 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
5274 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 4
5275 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
5276 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 5
5277 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
5278 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 6
5279 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
5280 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v7
5281 ; GFX10PLUS-NEXT: ; return to shader part epilog
5283 %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5284 ret <7 x float> %insert
5287 define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) {
5288 ; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v:
5289 ; GPRIDX: ; %bb.0: ; %entry
5290 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
5291 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
5292 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
5293 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v2, v0, vcc
5294 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
5295 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
5296 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc
5297 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1
5298 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
5299 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
5300 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1
5301 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
5302 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc
5303 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1
5304 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
5305 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc
5306 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1
5307 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
5308 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v9, v0, vcc
5309 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1
5310 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v10, v0, vcc
5311 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
5312 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v7
5313 ; GPRIDX-NEXT: ; return to shader part epilog
5315 ; GFX10-LABEL: dyn_insertelement_v7f32_s_v_v:
5316 ; GFX10: ; %bb.0: ; %entry
5317 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5318 ; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo
5319 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
5320 ; GFX10-NEXT: v_cndmask_b32_e32 v7, s3, v0, vcc_lo
5321 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
5322 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
5323 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
5324 ; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
5325 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
5326 ; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
5327 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
5328 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
5329 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
5330 ; GFX10-NEXT: v_mov_b32_e32 v1, v7
5331 ; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo
5332 ; GFX10-NEXT: v_mov_b32_e32 v0, v8
5333 ; GFX10-NEXT: ; return to shader part epilog
5335 ; GFX11-LABEL: dyn_insertelement_v7f32_s_v_v:
5336 ; GFX11: ; %bb.0: ; %entry
5337 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5338 ; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo
5339 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
5340 ; GFX11-NEXT: v_cndmask_b32_e32 v7, s3, v0, vcc_lo
5341 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1
5342 ; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo
5343 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1
5344 ; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo
5345 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1
5346 ; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo
5347 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1
5348 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo
5349 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1
5350 ; GFX11-NEXT: v_dual_mov_b32 v1, v7 :: v_dual_cndmask_b32 v6, s8, v0
5351 ; GFX11-NEXT: v_mov_b32_e32 v0, v8
5352 ; GFX11-NEXT: ; return to shader part epilog
5354 %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5355 ret <7 x float> %insert
5358 define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_s(<7 x float> %vec, float %val, i32 inreg %idx) {
5359 ; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_s:
5360 ; GPRIDX: ; %bb.0: ; %entry
5361 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
5362 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
5363 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
5364 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
5365 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
5366 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
5367 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
5368 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
5369 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
5370 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
5371 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5
5372 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
5373 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6
5374 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
5375 ; GPRIDX-NEXT: ; return to shader part epilog
5377 ; GFX10PLUS-LABEL: dyn_insertelement_v7f32_v_v_s:
5378 ; GFX10PLUS: ; %bb.0: ; %entry
5379 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
5380 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
5381 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
5382 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
5383 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
5384 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
5385 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3
5386 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
5387 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4
5388 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo
5389 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5
5390 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
5391 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6
5392 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo
5393 ; GFX10PLUS-NEXT: ; return to shader part epilog
5395 %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5396 ret <7 x float> %insert
5399 define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_v(<7 x float> %vec, float %val, i32 %idx) {
5400 ; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_v:
5401 ; GPRIDX: ; %bb.0: ; %entry
5402 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
5403 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
5404 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
5405 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
5406 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8
5407 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
5408 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
5409 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
5410 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8
5411 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
5412 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8
5413 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
5414 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8
5415 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
5416 ; GPRIDX-NEXT: ; return to shader part epilog
5418 ; GFX10PLUS-LABEL: dyn_insertelement_v7f32_v_v_v:
5419 ; GFX10PLUS: ; %bb.0: ; %entry
5420 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
5421 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo
5422 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
5423 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo
5424 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
5425 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
5426 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8
5427 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
5428 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8
5429 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo
5430 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8
5431 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
5432 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8
5433 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo
5434 ; GFX10PLUS-NEXT: ; return to shader part epilog
5436 %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5437 ret <7 x float> %insert
5440 define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_s_s(<7 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
5441 ; GPRIDX-LABEL: dyn_insertelement_v7f64_s_s_s:
5442 ; GPRIDX: ; %bb.0: ; %entry
5443 ; GPRIDX-NEXT: s_mov_b32 s0, s2
5444 ; GPRIDX-NEXT: s_mov_b32 s1, s3
5445 ; GPRIDX-NEXT: s_mov_b32 s2, s4
5446 ; GPRIDX-NEXT: s_mov_b32 s3, s5
5447 ; GPRIDX-NEXT: s_mov_b32 s4, s6
5448 ; GPRIDX-NEXT: s_mov_b32 s5, s7
5449 ; GPRIDX-NEXT: s_mov_b32 s6, s8
5450 ; GPRIDX-NEXT: s_mov_b32 s7, s9
5451 ; GPRIDX-NEXT: s_mov_b32 s8, s10
5452 ; GPRIDX-NEXT: s_mov_b32 s9, s11
5453 ; GPRIDX-NEXT: s_mov_b32 s10, s12
5454 ; GPRIDX-NEXT: s_mov_b32 s11, s13
5455 ; GPRIDX-NEXT: s_mov_b32 s12, s14
5456 ; GPRIDX-NEXT: s_mov_b32 s13, s15
5457 ; GPRIDX-NEXT: s_mov_b32 m0, s18
5458 ; GPRIDX-NEXT: s_nop 0
5459 ; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[16:17]
5460 ; GPRIDX-NEXT: ; return to shader part epilog
5462 ; GFX10PLUS-LABEL: dyn_insertelement_v7f64_s_s_s:
5463 ; GFX10PLUS: ; %bb.0: ; %entry
5464 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
5465 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
5466 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18
5467 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
5468 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
5469 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
5470 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
5471 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
5472 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
5473 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10
5474 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11
5475 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12
5476 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13
5477 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14
5478 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15
5479 ; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[16:17]
5480 ; GFX10PLUS-NEXT: ; return to shader part epilog
5482 %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5483 ret <7 x double> %insert
5486 define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_s(<7 x double> inreg %vec, double %val, i32 inreg %idx) {
5487 ; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_s:
5488 ; GPRIDX: ; %bb.0: ; %entry
5489 ; GPRIDX-NEXT: s_mov_b32 s0, s2
5490 ; GPRIDX-NEXT: s_mov_b32 s1, s3
5491 ; GPRIDX-NEXT: s_mov_b32 s2, s4
5492 ; GPRIDX-NEXT: s_mov_b32 s3, s5
5493 ; GPRIDX-NEXT: s_mov_b32 s4, s6
5494 ; GPRIDX-NEXT: s_mov_b32 s5, s7
5495 ; GPRIDX-NEXT: s_mov_b32 s6, s8
5496 ; GPRIDX-NEXT: s_mov_b32 s7, s9
5497 ; GPRIDX-NEXT: s_mov_b32 s8, s10
5498 ; GPRIDX-NEXT: s_mov_b32 s9, s11
5499 ; GPRIDX-NEXT: s_mov_b32 s10, s12
5500 ; GPRIDX-NEXT: s_mov_b32 s11, s13
5501 ; GPRIDX-NEXT: s_mov_b32 s12, s14
5502 ; GPRIDX-NEXT: s_mov_b32 s13, s15
5503 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s15
5504 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s14
5505 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
5506 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
5507 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
5508 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
5509 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
5510 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
5511 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
5512 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
5513 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
5514 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
5515 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
5516 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
5517 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
5518 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
5519 ; GPRIDX-NEXT: s_lshl_b32 s0, s16, 1
5520 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
5521 ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
5522 ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
5523 ; GPRIDX-NEXT: s_set_gpr_idx_off
5524 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
5525 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3
5526 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4
5527 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5
5528 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6
5529 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7
5530 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
5531 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9
5532 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10
5533 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11
5534 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12
5535 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13
5536 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14
5537 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15
5538 ; GPRIDX-NEXT: ; return to shader part epilog
5540 ; GFX10-LABEL: dyn_insertelement_v7f64_s_v_s:
5541 ; GFX10: ; %bb.0: ; %entry
5542 ; GFX10-NEXT: s_mov_b32 s0, s2
5543 ; GFX10-NEXT: s_mov_b32 s1, s3
5544 ; GFX10-NEXT: s_mov_b32 s2, s4
5545 ; GFX10-NEXT: s_mov_b32 s3, s5
5546 ; GFX10-NEXT: s_mov_b32 s4, s6
5547 ; GFX10-NEXT: s_mov_b32 s5, s7
5548 ; GFX10-NEXT: s_mov_b32 s6, s8
5549 ; GFX10-NEXT: s_mov_b32 s7, s9
5550 ; GFX10-NEXT: s_mov_b32 s8, s10
5551 ; GFX10-NEXT: s_mov_b32 s9, s11
5552 ; GFX10-NEXT: s_mov_b32 s10, s12
5553 ; GFX10-NEXT: s_mov_b32 s11, s13
5554 ; GFX10-NEXT: s_mov_b32 s12, s14
5555 ; GFX10-NEXT: s_mov_b32 s13, s15
5556 ; GFX10-NEXT: v_mov_b32_e32 v17, s15
5557 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
5558 ; GFX10-NEXT: s_lshl_b32 m0, s16, 1
5559 ; GFX10-NEXT: v_mov_b32_e32 v16, s14
5560 ; GFX10-NEXT: v_mov_b32_e32 v15, s13
5561 ; GFX10-NEXT: v_mov_b32_e32 v14, s12
5562 ; GFX10-NEXT: v_mov_b32_e32 v13, s11
5563 ; GFX10-NEXT: v_mov_b32_e32 v12, s10
5564 ; GFX10-NEXT: v_mov_b32_e32 v11, s9
5565 ; GFX10-NEXT: v_mov_b32_e32 v10, s8
5566 ; GFX10-NEXT: v_mov_b32_e32 v9, s7
5567 ; GFX10-NEXT: v_mov_b32_e32 v8, s6
5568 ; GFX10-NEXT: v_mov_b32_e32 v7, s5
5569 ; GFX10-NEXT: v_mov_b32_e32 v6, s4
5570 ; GFX10-NEXT: v_mov_b32_e32 v5, s3
5571 ; GFX10-NEXT: v_mov_b32_e32 v4, s2
5572 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
5573 ; GFX10-NEXT: v_movreld_b32_e32 v2, v0
5574 ; GFX10-NEXT: v_movreld_b32_e32 v3, v1
5575 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
5576 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
5577 ; GFX10-NEXT: v_readfirstlane_b32 s2, v4
5578 ; GFX10-NEXT: v_readfirstlane_b32 s3, v5
5579 ; GFX10-NEXT: v_readfirstlane_b32 s4, v6
5580 ; GFX10-NEXT: v_readfirstlane_b32 s5, v7
5581 ; GFX10-NEXT: v_readfirstlane_b32 s6, v8
5582 ; GFX10-NEXT: v_readfirstlane_b32 s7, v9
5583 ; GFX10-NEXT: v_readfirstlane_b32 s8, v10
5584 ; GFX10-NEXT: v_readfirstlane_b32 s9, v11
5585 ; GFX10-NEXT: v_readfirstlane_b32 s10, v12
5586 ; GFX10-NEXT: v_readfirstlane_b32 s11, v13
5587 ; GFX10-NEXT: v_readfirstlane_b32 s12, v14
5588 ; GFX10-NEXT: v_readfirstlane_b32 s13, v15
5589 ; GFX10-NEXT: ; return to shader part epilog
5591 ; GFX11-LABEL: dyn_insertelement_v7f64_s_v_s:
5592 ; GFX11: ; %bb.0: ; %entry
5593 ; GFX11-NEXT: s_mov_b32 s0, s2
5594 ; GFX11-NEXT: s_mov_b32 s1, s3
5595 ; GFX11-NEXT: s_mov_b32 s2, s4
5596 ; GFX11-NEXT: s_mov_b32 s3, s5
5597 ; GFX11-NEXT: s_mov_b32 s4, s6
5598 ; GFX11-NEXT: s_mov_b32 s5, s7
5599 ; GFX11-NEXT: s_mov_b32 s6, s8
5600 ; GFX11-NEXT: s_mov_b32 s7, s9
5601 ; GFX11-NEXT: s_mov_b32 s8, s10
5602 ; GFX11-NEXT: s_mov_b32 s9, s11
5603 ; GFX11-NEXT: s_mov_b32 s10, s12
5604 ; GFX11-NEXT: s_mov_b32 s11, s13
5605 ; GFX11-NEXT: s_mov_b32 s12, s14
5606 ; GFX11-NEXT: s_mov_b32 s13, s15
5607 ; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
5608 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
5609 ; GFX11-NEXT: s_lshl_b32 m0, s16, 1
5610 ; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
5611 ; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
5612 ; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
5613 ; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
5614 ; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
5615 ; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
5616 ; GFX11-NEXT: v_movreld_b32_e32 v2, v0
5617 ; GFX11-NEXT: v_movreld_b32_e32 v3, v1
5618 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
5619 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
5620 ; GFX11-NEXT: v_readfirstlane_b32 s2, v4
5621 ; GFX11-NEXT: v_readfirstlane_b32 s3, v5
5622 ; GFX11-NEXT: v_readfirstlane_b32 s4, v6
5623 ; GFX11-NEXT: v_readfirstlane_b32 s5, v7
5624 ; GFX11-NEXT: v_readfirstlane_b32 s6, v8
5625 ; GFX11-NEXT: v_readfirstlane_b32 s7, v9
5626 ; GFX11-NEXT: v_readfirstlane_b32 s8, v10
5627 ; GFX11-NEXT: v_readfirstlane_b32 s9, v11
5628 ; GFX11-NEXT: v_readfirstlane_b32 s10, v12
5629 ; GFX11-NEXT: v_readfirstlane_b32 s11, v13
5630 ; GFX11-NEXT: v_readfirstlane_b32 s12, v14
5631 ; GFX11-NEXT: v_readfirstlane_b32 s13, v15
5632 ; GFX11-NEXT: ; return to shader part epilog
5634 %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5635 ret <7 x double> %insert
5638 define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_v(<7 x double> inreg %vec, double %val, i32 %idx) {
5639 ; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_v:
5640 ; GPRIDX: ; %bb.0: ; %entry
5641 ; GPRIDX-NEXT: s_mov_b32 s0, s2
5642 ; GPRIDX-NEXT: s_mov_b32 s1, s3
5643 ; GPRIDX-NEXT: s_mov_b32 s2, s4
5644 ; GPRIDX-NEXT: s_mov_b32 s3, s5
5645 ; GPRIDX-NEXT: s_mov_b32 s4, s6
5646 ; GPRIDX-NEXT: s_mov_b32 s5, s7
5647 ; GPRIDX-NEXT: s_mov_b32 s6, s8
5648 ; GPRIDX-NEXT: s_mov_b32 s7, s9
5649 ; GPRIDX-NEXT: s_mov_b32 s8, s10
5650 ; GPRIDX-NEXT: s_mov_b32 s9, s11
5651 ; GPRIDX-NEXT: s_mov_b32 s10, s12
5652 ; GPRIDX-NEXT: s_mov_b32 s11, s13
5653 ; GPRIDX-NEXT: s_mov_b32 s12, s14
5654 ; GPRIDX-NEXT: s_mov_b32 s13, s15
5655 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s15
5656 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s14
5657 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s13
5658 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s12
5659 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s11
5660 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s10
5661 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s9
5662 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s8
5663 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s7
5664 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s6
5665 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s5
5666 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s4
5667 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s3
5668 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s2
5669 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s1
5670 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s0
5671 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
5672 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2
5673 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2
5674 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2
5675 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2
5676 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2
5677 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v2
5678 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
5679 ; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v5, v0, s[10:11]
5680 ; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v0, s[0:1]
5681 ; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v0, s[2:3]
5682 ; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v0, s[4:5]
5683 ; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v0, s[6:7]
5684 ; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v15, v0, s[8:9]
5685 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
5686 ; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v1, s[10:11]
5687 ; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1]
5688 ; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3]
5689 ; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5]
5690 ; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v14, v1, s[6:7]
5691 ; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v16, v1, s[8:9]
5692 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3
5693 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4
5694 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
5695 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6
5696 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5
5697 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8
5698 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7
5699 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v10
5700 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9
5701 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v12
5702 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11
5703 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13
5704 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v0
5705 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v1
5706 ; GPRIDX-NEXT: ; return to shader part epilog
5708 ; GFX10-LABEL: dyn_insertelement_v7f64_s_v_v:
5709 ; GFX10: ; %bb.0: ; %entry
5710 ; GFX10-NEXT: s_mov_b32 s0, s2
5711 ; GFX10-NEXT: s_mov_b32 s1, s3
5712 ; GFX10-NEXT: s_mov_b32 s2, s4
5713 ; GFX10-NEXT: s_mov_b32 s3, s5
5714 ; GFX10-NEXT: s_mov_b32 s4, s6
5715 ; GFX10-NEXT: s_mov_b32 s5, s7
5716 ; GFX10-NEXT: s_mov_b32 s6, s8
5717 ; GFX10-NEXT: s_mov_b32 s7, s9
5718 ; GFX10-NEXT: s_mov_b32 s8, s10
5719 ; GFX10-NEXT: s_mov_b32 s9, s11
5720 ; GFX10-NEXT: s_mov_b32 s10, s12
5721 ; GFX10-NEXT: s_mov_b32 s11, s13
5722 ; GFX10-NEXT: s_mov_b32 s12, s14
5723 ; GFX10-NEXT: s_mov_b32 s13, s15
5724 ; GFX10-NEXT: v_mov_b32_e32 v18, s15
5725 ; GFX10-NEXT: v_mov_b32_e32 v17, s14
5726 ; GFX10-NEXT: v_mov_b32_e32 v16, s13
5727 ; GFX10-NEXT: v_mov_b32_e32 v15, s12
5728 ; GFX10-NEXT: v_mov_b32_e32 v14, s11
5729 ; GFX10-NEXT: v_mov_b32_e32 v13, s10
5730 ; GFX10-NEXT: v_mov_b32_e32 v12, s9
5731 ; GFX10-NEXT: v_mov_b32_e32 v11, s8
5732 ; GFX10-NEXT: v_mov_b32_e32 v10, s7
5733 ; GFX10-NEXT: v_mov_b32_e32 v9, s6
5734 ; GFX10-NEXT: v_mov_b32_e32 v8, s5
5735 ; GFX10-NEXT: v_mov_b32_e32 v7, s4
5736 ; GFX10-NEXT: v_mov_b32_e32 v6, s3
5737 ; GFX10-NEXT: v_mov_b32_e32 v5, s2
5738 ; GFX10-NEXT: v_mov_b32_e32 v4, s1
5739 ; GFX10-NEXT: v_mov_b32_e32 v3, s0
5740 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
5741 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
5742 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 6, v2
5743 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo
5744 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo
5745 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
5746 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
5747 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
5748 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
5749 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo
5750 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo
5751 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2
5752 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0
5753 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0
5754 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v2
5755 ; GFX10-NEXT: v_readfirstlane_b32 s2, v5
5756 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo
5757 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v1, vcc_lo
5758 ; GFX10-NEXT: v_readfirstlane_b32 s3, v6
5759 ; GFX10-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0
5760 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0
5761 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1
5762 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1
5763 ; GFX10-NEXT: v_readfirstlane_b32 s0, v3
5764 ; GFX10-NEXT: v_readfirstlane_b32 s1, v4
5765 ; GFX10-NEXT: v_readfirstlane_b32 s4, v7
5766 ; GFX10-NEXT: v_readfirstlane_b32 s5, v8
5767 ; GFX10-NEXT: v_readfirstlane_b32 s6, v9
5768 ; GFX10-NEXT: v_readfirstlane_b32 s7, v10
5769 ; GFX10-NEXT: v_readfirstlane_b32 s8, v11
5770 ; GFX10-NEXT: v_readfirstlane_b32 s9, v2
5771 ; GFX10-NEXT: v_readfirstlane_b32 s10, v12
5772 ; GFX10-NEXT: v_readfirstlane_b32 s11, v13
5773 ; GFX10-NEXT: v_readfirstlane_b32 s12, v0
5774 ; GFX10-NEXT: v_readfirstlane_b32 s13, v1
5775 ; GFX10-NEXT: ; return to shader part epilog
5777 ; GFX11-LABEL: dyn_insertelement_v7f64_s_v_v:
5778 ; GFX11: ; %bb.0: ; %entry
5779 ; GFX11-NEXT: s_mov_b32 s0, s2
5780 ; GFX11-NEXT: s_mov_b32 s1, s3
5781 ; GFX11-NEXT: s_mov_b32 s2, s4
5782 ; GFX11-NEXT: s_mov_b32 s3, s5
5783 ; GFX11-NEXT: s_mov_b32 s4, s6
5784 ; GFX11-NEXT: s_mov_b32 s5, s7
5785 ; GFX11-NEXT: s_mov_b32 s6, s8
5786 ; GFX11-NEXT: s_mov_b32 s7, s9
5787 ; GFX11-NEXT: s_mov_b32 s8, s10
5788 ; GFX11-NEXT: s_mov_b32 s9, s11
5789 ; GFX11-NEXT: s_mov_b32 s10, s12
5790 ; GFX11-NEXT: s_mov_b32 s11, s13
5791 ; GFX11-NEXT: s_mov_b32 s12, s14
5792 ; GFX11-NEXT: s_mov_b32 s13, s15
5793 ; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14
5794 ; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12
5795 ; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10
5796 ; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
5797 ; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
5798 ; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
5799 ; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
5800 ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
5801 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
5802 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
5803 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 6, v2
5804 ; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
5805 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
5806 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
5807 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
5808 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
5809 ; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1
5810 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2
5811 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0
5812 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0
5813 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2
5814 ; GFX11-NEXT: v_readfirstlane_b32 s2, v5
5815 ; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v2, v12, v1
5816 ; GFX11-NEXT: v_readfirstlane_b32 s3, v6
5817 ; GFX11-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0
5818 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0
5819 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1
5820 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1
5821 ; GFX11-NEXT: v_readfirstlane_b32 s0, v3
5822 ; GFX11-NEXT: v_readfirstlane_b32 s1, v4
5823 ; GFX11-NEXT: v_readfirstlane_b32 s4, v7
5824 ; GFX11-NEXT: v_readfirstlane_b32 s5, v8
5825 ; GFX11-NEXT: v_readfirstlane_b32 s6, v9
5826 ; GFX11-NEXT: v_readfirstlane_b32 s7, v10
5827 ; GFX11-NEXT: v_readfirstlane_b32 s8, v11
5828 ; GFX11-NEXT: v_readfirstlane_b32 s9, v2
5829 ; GFX11-NEXT: v_readfirstlane_b32 s10, v12
5830 ; GFX11-NEXT: v_readfirstlane_b32 s11, v13
5831 ; GFX11-NEXT: v_readfirstlane_b32 s12, v0
5832 ; GFX11-NEXT: v_readfirstlane_b32 s13, v1
5833 ; GFX11-NEXT: ; return to shader part epilog
5835 %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5836 ret <7 x double> %insert
5839 define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_s(<7 x double> %vec, double %val, i32 inreg %idx) {
5840 ; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_s:
5841 ; GPRIDX: ; %bb.0: ; %entry
5842 ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
5843 ; GPRIDX-NEXT: v_mov_b32_e32 v16, v15
5844 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
5845 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v14
5846 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v16
5847 ; GPRIDX-NEXT: s_set_gpr_idx_off
5848 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0
5849 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1
5850 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
5851 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3
5852 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4
5853 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5
5854 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6
5855 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7
5856 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8
5857 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9
5858 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10
5859 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11
5860 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12
5861 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13
5862 ; GPRIDX-NEXT: ; return to shader part epilog
5864 ; GFX10PLUS-LABEL: dyn_insertelement_v7f64_v_v_s:
5865 ; GFX10PLUS: ; %bb.0: ; %entry
5866 ; GFX10PLUS-NEXT: v_mov_b32_e32 v16, v15
5867 ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1
5868 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v14
5869 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v1, v16
5870 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
5871 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v1
5872 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s2, v2
5873 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s3, v3
5874 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s4, v4
5875 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s5, v5
5876 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s6, v6
5877 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s7, v7
5878 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s8, v8
5879 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s9, v9
5880 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s10, v10
5881 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s11, v11
5882 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s12, v12
5883 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s13, v13
5884 ; GFX10PLUS-NEXT: ; return to shader part epilog
5886 %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5887 ret <7 x double> %insert
5890 define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, double %val, i32 %idx) {
5891 ; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v:
5892 ; GPRIDX: ; %bb.0: ; %entry
5893 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
5894 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
5895 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
5896 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16
5897 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc
5898 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v15, vcc
5899 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16
5900 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc
5901 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc
5902 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16
5903 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
5904 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
5905 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16
5906 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc
5907 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v15, vcc
5908 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16
5909 ; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
5910 ; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
5911 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16
5912 ; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v14, vcc
5913 ; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc
5914 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0
5915 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1
5916 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
5917 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3
5918 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4
5919 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5
5920 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6
5921 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7
5922 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8
5923 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9
5924 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10
5925 ; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11
5926 ; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12
5927 ; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13
5928 ; GPRIDX-NEXT: ; return to shader part epilog
5930 ; GFX10-LABEL: dyn_insertelement_v7f64_v_v_v:
5931 ; GFX10: ; %bb.0: ; %entry
5932 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16
5933 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v16
5934 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 6, v16
5935 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
5936 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
5937 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
5938 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v14, s0
5939 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0
5940 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v16
5941 ; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v14, s1
5942 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo
5943 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc_lo
5944 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
5945 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s0
5946 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s0
5947 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v16
5948 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v15, s1
5949 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc_lo
5950 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v15, vcc_lo
5951 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
5952 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v14, s0
5953 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v15, s0
5954 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
5955 ; GFX10-NEXT: v_readfirstlane_b32 s2, v2
5956 ; GFX10-NEXT: v_readfirstlane_b32 s3, v3
5957 ; GFX10-NEXT: v_readfirstlane_b32 s4, v4
5958 ; GFX10-NEXT: v_readfirstlane_b32 s5, v5
5959 ; GFX10-NEXT: v_readfirstlane_b32 s6, v6
5960 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7
5961 ; GFX10-NEXT: v_readfirstlane_b32 s8, v8
5962 ; GFX10-NEXT: v_readfirstlane_b32 s9, v9
5963 ; GFX10-NEXT: v_readfirstlane_b32 s10, v10
5964 ; GFX10-NEXT: v_readfirstlane_b32 s11, v11
5965 ; GFX10-NEXT: v_readfirstlane_b32 s12, v12
5966 ; GFX10-NEXT: v_readfirstlane_b32 s13, v13
5967 ; GFX10-NEXT: ; return to shader part epilog
5969 ; GFX11-LABEL: dyn_insertelement_v7f64_v_v_v:
5970 ; GFX11: ; %bb.0: ; %entry
5971 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16
5972 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v16
5973 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 6, v16
5974 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
5975 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
5976 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v14, s0
5977 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0
5978 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v16
5979 ; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v14, s1
5980 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v14 :: v_dual_cndmask_b32 v5, v5, v15
5981 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16
5982 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s0
5983 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s0
5984 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v16
5985 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v15, s1
5986 ; GFX11-NEXT: v_dual_cndmask_b32 v8, v8, v14 :: v_dual_cndmask_b32 v9, v9, v15
5987 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
5988 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v14, s0
5989 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v15, s0
5990 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
5991 ; GFX11-NEXT: v_readfirstlane_b32 s2, v2
5992 ; GFX11-NEXT: v_readfirstlane_b32 s3, v3
5993 ; GFX11-NEXT: v_readfirstlane_b32 s4, v4
5994 ; GFX11-NEXT: v_readfirstlane_b32 s5, v5
5995 ; GFX11-NEXT: v_readfirstlane_b32 s6, v6
5996 ; GFX11-NEXT: v_readfirstlane_b32 s7, v7
5997 ; GFX11-NEXT: v_readfirstlane_b32 s8, v8
5998 ; GFX11-NEXT: v_readfirstlane_b32 s9, v9
5999 ; GFX11-NEXT: v_readfirstlane_b32 s10, v10
6000 ; GFX11-NEXT: v_readfirstlane_b32 s11, v11
6001 ; GFX11-NEXT: v_readfirstlane_b32 s12, v12
6002 ; GFX11-NEXT: v_readfirstlane_b32 s13, v13
6003 ; GFX11-NEXT: ; return to shader part epilog
6005 %insert = insertelement <7 x double> %vec, double %val, i32 %idx
6006 ret <7 x double> %insert
6009 define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_s_s(<5 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
6010 ; GPRIDX-LABEL: dyn_insertelement_v5f64_s_s_s:
6011 ; GPRIDX: ; %bb.0: ; %entry
6012 ; GPRIDX-NEXT: s_cmp_eq_u32 s14, 0
6013 ; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[12:13], s[2:3]
6014 ; GPRIDX-NEXT: s_cmp_eq_u32 s14, 1
6015 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[12:13], s[4:5]
6016 ; GPRIDX-NEXT: s_cmp_eq_u32 s14, 2
6017 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[12:13], s[6:7]
6018 ; GPRIDX-NEXT: s_cmp_eq_u32 s14, 3
6019 ; GPRIDX-NEXT: s_cselect_b64 s[6:7], s[12:13], s[8:9]
6020 ; GPRIDX-NEXT: s_cmp_eq_u32 s14, 4
6021 ; GPRIDX-NEXT: s_cselect_b64 s[8:9], s[12:13], s[10:11]
6022 ; GPRIDX-NEXT: ; return to shader part epilog
6024 ; GFX10PLUS-LABEL: dyn_insertelement_v5f64_s_s_s:
6025 ; GFX10PLUS: ; %bb.0: ; %entry
6026 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 0
6027 ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[12:13], s[2:3]
6028 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 1
6029 ; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[12:13], s[4:5]
6030 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 2
6031 ; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[12:13], s[6:7]
6032 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 3
6033 ; GFX10PLUS-NEXT: s_cselect_b64 s[6:7], s[12:13], s[8:9]
6034 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 4
6035 ; GFX10PLUS-NEXT: s_cselect_b64 s[8:9], s[12:13], s[10:11]
6036 ; GFX10PLUS-NEXT: ; return to shader part epilog
6038 %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6039 ret <5 x double> %insert
6042 define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) {
6043 ; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s:
6044 ; GPRIDX: ; %bb.0: ; %entry
6045 ; GPRIDX-NEXT: s_mov_b32 s1, s3
6046 ; GPRIDX-NEXT: s_mov_b32 s3, s5
6047 ; GPRIDX-NEXT: s_mov_b32 s5, s7
6048 ; GPRIDX-NEXT: s_mov_b32 s7, s9
6049 ; GPRIDX-NEXT: s_mov_b32 s9, s11
6050 ; GPRIDX-NEXT: s_mov_b32 s0, s2
6051 ; GPRIDX-NEXT: s_mov_b32 s2, s4
6052 ; GPRIDX-NEXT: s_mov_b32 s4, s6
6053 ; GPRIDX-NEXT: s_mov_b32 s6, s8
6054 ; GPRIDX-NEXT: s_mov_b32 s8, s10
6055 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
6056 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
6057 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
6058 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 0
6059 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
6060 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
6061 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
6062 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
6063 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 1
6064 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
6065 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
6066 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
6067 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
6068 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 2
6069 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
6070 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
6071 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
6072 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
6073 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 3
6074 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
6075 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
6076 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
6077 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 4
6078 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
6079 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc
6080 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
6081 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3
6082 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4
6083 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5
6084 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6
6085 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7
6086 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
6087 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9
6088 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0
6089 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1
6090 ; GPRIDX-NEXT: ; return to shader part epilog
6092 ; GFX10-LABEL: dyn_insertelement_v5f64_s_v_s:
6093 ; GFX10: ; %bb.0: ; %entry
6094 ; GFX10-NEXT: s_mov_b32 s1, s3
6095 ; GFX10-NEXT: s_mov_b32 s3, s5
6096 ; GFX10-NEXT: s_mov_b32 s5, s7
6097 ; GFX10-NEXT: s_mov_b32 s7, s9
6098 ; GFX10-NEXT: s_mov_b32 s9, s11
6099 ; GFX10-NEXT: s_mov_b32 s0, s2
6100 ; GFX10-NEXT: s_mov_b32 s2, s4
6101 ; GFX10-NEXT: s_mov_b32 s4, s6
6102 ; GFX10-NEXT: s_mov_b32 s6, s8
6103 ; GFX10-NEXT: s_mov_b32 s8, s10
6104 ; GFX10-NEXT: v_mov_b32_e32 v11, s9
6105 ; GFX10-NEXT: v_mov_b32_e32 v10, s8
6106 ; GFX10-NEXT: v_mov_b32_e32 v9, s7
6107 ; GFX10-NEXT: v_mov_b32_e32 v8, s6
6108 ; GFX10-NEXT: v_mov_b32_e32 v7, s5
6109 ; GFX10-NEXT: v_mov_b32_e32 v6, s4
6110 ; GFX10-NEXT: v_mov_b32_e32 v5, s3
6111 ; GFX10-NEXT: v_mov_b32_e32 v4, s2
6112 ; GFX10-NEXT: v_mov_b32_e32 v3, s1
6113 ; GFX10-NEXT: v_mov_b32_e32 v2, s0
6114 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0
6115 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s12, 1
6116 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s12, 4
6117 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
6118 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
6119 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v0, s0
6120 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 2
6121 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v1, s0
6122 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s12, 3
6123 ; GFX10-NEXT: v_readfirstlane_b32 s2, v4
6124 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc_lo
6125 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc_lo
6126 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v0, s0
6127 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v1, s0
6128 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v10, v0, s1
6129 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v11, v1, s1
6130 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
6131 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3
6132 ; GFX10-NEXT: v_readfirstlane_b32 s3, v5
6133 ; GFX10-NEXT: v_readfirstlane_b32 s4, v6
6134 ; GFX10-NEXT: v_readfirstlane_b32 s5, v7
6135 ; GFX10-NEXT: v_readfirstlane_b32 s6, v8
6136 ; GFX10-NEXT: v_readfirstlane_b32 s7, v9
6137 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0
6138 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1
6139 ; GFX10-NEXT: ; return to shader part epilog
6141 ; GFX11-LABEL: dyn_insertelement_v5f64_s_v_s:
6142 ; GFX11: ; %bb.0: ; %entry
6143 ; GFX11-NEXT: s_mov_b32 s1, s3
6144 ; GFX11-NEXT: s_mov_b32 s3, s5
6145 ; GFX11-NEXT: s_mov_b32 s5, s7
6146 ; GFX11-NEXT: s_mov_b32 s7, s9
6147 ; GFX11-NEXT: s_mov_b32 s9, s11
6148 ; GFX11-NEXT: s_mov_b32 s0, s2
6149 ; GFX11-NEXT: s_mov_b32 s2, s4
6150 ; GFX11-NEXT: s_mov_b32 s4, s6
6151 ; GFX11-NEXT: s_mov_b32 s6, s8
6152 ; GFX11-NEXT: s_mov_b32 s8, s10
6153 ; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
6154 ; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
6155 ; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
6156 ; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
6157 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
6158 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0
6159 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s12, 1
6160 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s12, 4
6161 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v0 :: v_dual_cndmask_b32 v3, v3, v1
6162 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v0, s0
6163 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 2
6164 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v1, s0
6165 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s12, 3
6166 ; GFX11-NEXT: v_readfirstlane_b32 s2, v4
6167 ; GFX11-NEXT: v_dual_cndmask_b32 v6, v6, v0 :: v_dual_cndmask_b32 v7, v7, v1
6168 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v0, s0
6169 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v1, s0
6170 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v10, v0, s1
6171 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v11, v1, s1
6172 ; GFX11-NEXT: v_readfirstlane_b32 s0, v2
6173 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3
6174 ; GFX11-NEXT: v_readfirstlane_b32 s3, v5
6175 ; GFX11-NEXT: v_readfirstlane_b32 s4, v6
6176 ; GFX11-NEXT: v_readfirstlane_b32 s5, v7
6177 ; GFX11-NEXT: v_readfirstlane_b32 s6, v8
6178 ; GFX11-NEXT: v_readfirstlane_b32 s7, v9
6179 ; GFX11-NEXT: v_readfirstlane_b32 s8, v0
6180 ; GFX11-NEXT: v_readfirstlane_b32 s9, v1
6181 ; GFX11-NEXT: ; return to shader part epilog
6183 %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6184 ret <5 x double> %insert
6187 define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) {
6188 ; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v:
6189 ; GPRIDX: ; %bb.0: ; %entry
6190 ; GPRIDX-NEXT: s_mov_b32 s1, s3
6191 ; GPRIDX-NEXT: s_mov_b32 s3, s5
6192 ; GPRIDX-NEXT: s_mov_b32 s5, s7
6193 ; GPRIDX-NEXT: s_mov_b32 s7, s9
6194 ; GPRIDX-NEXT: s_mov_b32 s9, s11
6195 ; GPRIDX-NEXT: s_mov_b32 s0, s2
6196 ; GPRIDX-NEXT: s_mov_b32 s2, s4
6197 ; GPRIDX-NEXT: s_mov_b32 s4, s6
6198 ; GPRIDX-NEXT: s_mov_b32 s6, s8
6199 ; GPRIDX-NEXT: s_mov_b32 s8, s10
6200 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s9
6201 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s1
6202 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s0
6203 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
6204 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s3
6205 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s2
6206 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
6207 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
6208 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
6209 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s5
6210 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s4
6211 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc
6212 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
6213 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2
6214 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s8
6215 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s7
6216 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s6
6217 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
6218 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc
6219 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2
6220 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 4, v2
6221 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc
6222 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v1, vcc
6223 ; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v11, v0, s[0:1]
6224 ; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[0:1]
6225 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3
6226 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4
6227 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v5
6228 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6
6229 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v7
6230 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8
6231 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9
6232 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v2
6233 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0
6234 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1
6235 ; GPRIDX-NEXT: ; return to shader part epilog
6237 ; GFX10-LABEL: dyn_insertelement_v5f64_s_v_v:
6238 ; GFX10: ; %bb.0: ; %entry
6239 ; GFX10-NEXT: s_mov_b32 s1, s3
6240 ; GFX10-NEXT: s_mov_b32 s3, s5
6241 ; GFX10-NEXT: s_mov_b32 s5, s7
6242 ; GFX10-NEXT: s_mov_b32 s7, s9
6243 ; GFX10-NEXT: s_mov_b32 s9, s11
6244 ; GFX10-NEXT: s_mov_b32 s0, s2
6245 ; GFX10-NEXT: s_mov_b32 s2, s4
6246 ; GFX10-NEXT: s_mov_b32 s4, s6
6247 ; GFX10-NEXT: s_mov_b32 s6, s8
6248 ; GFX10-NEXT: s_mov_b32 s8, s10
6249 ; GFX10-NEXT: v_mov_b32_e32 v12, s9
6250 ; GFX10-NEXT: v_mov_b32_e32 v11, s8
6251 ; GFX10-NEXT: v_mov_b32_e32 v10, s7
6252 ; GFX10-NEXT: v_mov_b32_e32 v9, s6
6253 ; GFX10-NEXT: v_mov_b32_e32 v8, s5
6254 ; GFX10-NEXT: v_mov_b32_e32 v7, s4
6255 ; GFX10-NEXT: v_mov_b32_e32 v6, s3
6256 ; GFX10-NEXT: v_mov_b32_e32 v5, s2
6257 ; GFX10-NEXT: v_mov_b32_e32 v4, s1
6258 ; GFX10-NEXT: v_mov_b32_e32 v3, s0
6259 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
6260 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
6261 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 4, v2
6262 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo
6263 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo
6264 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
6265 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
6266 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
6267 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
6268 ; GFX10-NEXT: v_readfirstlane_b32 s2, v5
6269 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo
6270 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc_lo
6271 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v0, s0
6272 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v10, v1, s0
6273 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v11, v0, s1
6274 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v1, s1
6275 ; GFX10-NEXT: v_readfirstlane_b32 s0, v3
6276 ; GFX10-NEXT: v_readfirstlane_b32 s1, v4
6277 ; GFX10-NEXT: v_readfirstlane_b32 s3, v6
6278 ; GFX10-NEXT: v_readfirstlane_b32 s4, v7
6279 ; GFX10-NEXT: v_readfirstlane_b32 s5, v2
6280 ; GFX10-NEXT: v_readfirstlane_b32 s6, v8
6281 ; GFX10-NEXT: v_readfirstlane_b32 s7, v9
6282 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0
6283 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1
6284 ; GFX10-NEXT: ; return to shader part epilog
6286 ; GFX11-LABEL: dyn_insertelement_v5f64_s_v_v:
6287 ; GFX11: ; %bb.0: ; %entry
6288 ; GFX11-NEXT: s_mov_b32 s1, s3
6289 ; GFX11-NEXT: s_mov_b32 s3, s5
6290 ; GFX11-NEXT: s_mov_b32 s5, s7
6291 ; GFX11-NEXT: s_mov_b32 s7, s9
6292 ; GFX11-NEXT: s_mov_b32 s9, s11
6293 ; GFX11-NEXT: s_mov_b32 s0, s2
6294 ; GFX11-NEXT: s_mov_b32 s2, s4
6295 ; GFX11-NEXT: s_mov_b32 s4, s6
6296 ; GFX11-NEXT: s_mov_b32 s6, s8
6297 ; GFX11-NEXT: s_mov_b32 s8, s10
6298 ; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
6299 ; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
6300 ; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
6301 ; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
6302 ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
6303 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
6304 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2
6305 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v2
6306 ; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
6307 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0
6308 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2
6309 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0
6310 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2
6311 ; GFX11-NEXT: v_readfirstlane_b32 s2, v5
6312 ; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v2, v8, v1
6313 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v9, v0, s0
6314 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v10, v1, s0
6315 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v11, v0, s1
6316 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v12, v1, s1
6317 ; GFX11-NEXT: v_readfirstlane_b32 s0, v3
6318 ; GFX11-NEXT: v_readfirstlane_b32 s1, v4
6319 ; GFX11-NEXT: v_readfirstlane_b32 s3, v6
6320 ; GFX11-NEXT: v_readfirstlane_b32 s4, v7
6321 ; GFX11-NEXT: v_readfirstlane_b32 s5, v2
6322 ; GFX11-NEXT: v_readfirstlane_b32 s6, v8
6323 ; GFX11-NEXT: v_readfirstlane_b32 s7, v9
6324 ; GFX11-NEXT: v_readfirstlane_b32 s8, v0
6325 ; GFX11-NEXT: v_readfirstlane_b32 s9, v1
6326 ; GFX11-NEXT: ; return to shader part epilog
6328 %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6329 ret <5 x double> %insert
6332 define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, double %val, i32 inreg %idx) {
6333 ; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_s:
6334 ; GPRIDX: ; %bb.0: ; %entry
6335 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
6336 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6337 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6338 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1
6339 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
6340 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
6341 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2
6342 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
6343 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
6344 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3
6345 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
6346 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
6347 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4
6348 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
6349 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
6350 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0
6351 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1
6352 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
6353 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3
6354 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4
6355 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5
6356 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6
6357 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7
6358 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8
6359 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9
6360 ; GPRIDX-NEXT: ; return to shader part epilog
6362 ; GFX10-LABEL: dyn_insertelement_v5f64_v_v_s:
6363 ; GFX10: ; %bb.0: ; %entry
6364 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
6365 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s2, 1
6366 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s2, 4
6367 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
6368 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
6369 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0
6370 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
6371 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0
6372 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s2, 3
6373 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1
6374 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1
6375 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo
6376 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
6377 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0
6378 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0
6379 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
6380 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
6381 ; GFX10-NEXT: v_readfirstlane_b32 s2, v2
6382 ; GFX10-NEXT: v_readfirstlane_b32 s3, v3
6383 ; GFX10-NEXT: v_readfirstlane_b32 s4, v4
6384 ; GFX10-NEXT: v_readfirstlane_b32 s5, v5
6385 ; GFX10-NEXT: v_readfirstlane_b32 s6, v6
6386 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7
6387 ; GFX10-NEXT: v_readfirstlane_b32 s8, v8
6388 ; GFX10-NEXT: v_readfirstlane_b32 s9, v9
6389 ; GFX10-NEXT: ; return to shader part epilog
6391 ; GFX11-LABEL: dyn_insertelement_v5f64_v_v_s:
6392 ; GFX11: ; %bb.0: ; %entry
6393 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0
6394 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s2, 1
6395 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s2, 4
6396 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
6397 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0
6398 ; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2
6399 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0
6400 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s2, 3
6401 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1
6402 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1
6403 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11
6404 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0
6405 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0
6406 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
6407 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
6408 ; GFX11-NEXT: v_readfirstlane_b32 s2, v2
6409 ; GFX11-NEXT: v_readfirstlane_b32 s3, v3
6410 ; GFX11-NEXT: v_readfirstlane_b32 s4, v4
6411 ; GFX11-NEXT: v_readfirstlane_b32 s5, v5
6412 ; GFX11-NEXT: v_readfirstlane_b32 s6, v6
6413 ; GFX11-NEXT: v_readfirstlane_b32 s7, v7
6414 ; GFX11-NEXT: v_readfirstlane_b32 s8, v8
6415 ; GFX11-NEXT: v_readfirstlane_b32 s9, v9
6416 ; GFX11-NEXT: ; return to shader part epilog
6418 %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6419 ret <5 x double> %insert
6422 define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) {
6423 ; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_v:
6424 ; GPRIDX: ; %bb.0: ; %entry
6425 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
6426 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6427 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6428 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12
6429 ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
6430 ; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
6431 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12
6432 ; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
6433 ; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
6434 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12
6435 ; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
6436 ; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
6437 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12
6438 ; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
6439 ; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
6440 ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0
6441 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1
6442 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
6443 ; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3
6444 ; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4
6445 ; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5
6446 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6
6447 ; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7
6448 ; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8
6449 ; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9
6450 ; GPRIDX-NEXT: ; return to shader part epilog
6452 ; GFX10-LABEL: dyn_insertelement_v5f64_v_v_v:
6453 ; GFX10: ; %bb.0: ; %entry
6454 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12
6455 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v12
6456 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 4, v12
6457 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo
6458 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo
6459 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0
6460 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
6461 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0
6462 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v12
6463 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1
6464 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1
6465 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo
6466 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
6467 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0
6468 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0
6469 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
6470 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
6471 ; GFX10-NEXT: v_readfirstlane_b32 s2, v2
6472 ; GFX10-NEXT: v_readfirstlane_b32 s3, v3
6473 ; GFX10-NEXT: v_readfirstlane_b32 s4, v4
6474 ; GFX10-NEXT: v_readfirstlane_b32 s5, v5
6475 ; GFX10-NEXT: v_readfirstlane_b32 s6, v6
6476 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7
6477 ; GFX10-NEXT: v_readfirstlane_b32 s8, v8
6478 ; GFX10-NEXT: v_readfirstlane_b32 s9, v9
6479 ; GFX10-NEXT: ; return to shader part epilog
6481 ; GFX11-LABEL: dyn_insertelement_v5f64_v_v_v:
6482 ; GFX11: ; %bb.0: ; %entry
6483 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12
6484 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v12
6485 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v12
6486 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
6487 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0
6488 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
6489 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0
6490 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v12
6491 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1
6492 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1
6493 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11
6494 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0
6495 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0
6496 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
6497 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
6498 ; GFX11-NEXT: v_readfirstlane_b32 s2, v2
6499 ; GFX11-NEXT: v_readfirstlane_b32 s3, v3
6500 ; GFX11-NEXT: v_readfirstlane_b32 s4, v4
6501 ; GFX11-NEXT: v_readfirstlane_b32 s5, v5
6502 ; GFX11-NEXT: v_readfirstlane_b32 s6, v6
6503 ; GFX11-NEXT: v_readfirstlane_b32 s7, v7
6504 ; GFX11-NEXT: v_readfirstlane_b32 s8, v8
6505 ; GFX11-NEXT: v_readfirstlane_b32 s9, v9
6506 ; GFX11-NEXT: ; return to shader part epilog
6508 %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6509 ret <5 x double> %insert