1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -mtriple=x86_64-linux-generic -mattr=avx < %s | FileCheck %s
5 ; The SplitVecRes_MSTORE method should split a extended value type
6 ; according to the halving of the enveloping type to avoid all sorts
7 ; of inconsistencies downstream. For example for a extended value type
8 ; with VL=14 and enveloping type VL=16 that is split 8/8, the extended
9 ; type should be split 8/6 and not 7/7. This also accounts for hi masked
10 ; store that get zero storage size (and are unused).
12 define void @mstore_split9(<9 x float> %value, <9 x float>* %addr, <9 x i1> %mask) {
13 ; CHECK-LABEL: mstore_split9:
15 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
16 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
17 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
18 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
19 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
20 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
21 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
22 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
23 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
24 ; CHECK-NEXT: vmovd %eax, %xmm2
25 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
26 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 32(%rdi)
27 ; CHECK-NEXT: vmovd %esi, %xmm1
28 ; CHECK-NEXT: vpinsrw $1, %edx, %xmm1, %xmm1
29 ; CHECK-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
30 ; CHECK-NEXT: vpinsrw $3, %r8d, %xmm1, %xmm1
31 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
32 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
33 ; CHECK-NEXT: vpinsrw $4, %r9d, %xmm1, %xmm1
34 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
35 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
36 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
37 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
38 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
39 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
40 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
41 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
42 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
43 ; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi)
44 ; CHECK-NEXT: vzeroupper
46 call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> %value, <9 x float>* %addr, i32 4, <9 x i1>%mask)
50 define void @mstore_split13(<13 x float> %value, <13 x float>* %addr, <13 x i1> %mask) {
51 ; CHECK-LABEL: mstore_split13:
53 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
54 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
55 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
56 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
57 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
58 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
59 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
60 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
61 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
62 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
63 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
64 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
65 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
66 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
67 ; CHECK-NEXT: vmovd %eax, %xmm2
68 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
69 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
70 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
71 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
72 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
73 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
74 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
75 ; CHECK-NEXT: vpslld $31, %xmm3, %xmm3
76 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
77 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
78 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
79 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
80 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
81 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 32(%rdi)
82 ; CHECK-NEXT: vmovd %esi, %xmm1
83 ; CHECK-NEXT: vpinsrw $1, %edx, %xmm1, %xmm1
84 ; CHECK-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
85 ; CHECK-NEXT: vpinsrw $3, %r8d, %xmm1, %xmm1
86 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
87 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
88 ; CHECK-NEXT: vpinsrw $4, %r9d, %xmm1, %xmm1
89 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
90 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
91 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
92 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
93 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
94 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
95 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
96 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
97 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
98 ; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi)
99 ; CHECK-NEXT: vzeroupper
101 call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> %value, <13 x float>* %addr, i32 4, <13 x i1>%mask)
105 define void @mstore_split14(<14 x float> %value, <14 x float>* %addr, <14 x i1> %mask) {
106 ; CHECK-LABEL: mstore_split14:
108 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
109 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
110 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
111 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
112 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
113 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
114 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
115 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
116 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
117 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
118 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
119 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
120 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
121 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
122 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
123 ; CHECK-NEXT: vmovd %eax, %xmm2
124 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
125 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
126 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
127 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
128 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
129 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
130 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
131 ; CHECK-NEXT: vpslld $31, %xmm3, %xmm3
132 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
133 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
134 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
135 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
136 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
137 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
138 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
139 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 32(%rdi)
140 ; CHECK-NEXT: vmovd %esi, %xmm1
141 ; CHECK-NEXT: vpinsrw $1, %edx, %xmm1, %xmm1
142 ; CHECK-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
143 ; CHECK-NEXT: vpinsrw $3, %r8d, %xmm1, %xmm1
144 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
145 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
146 ; CHECK-NEXT: vpinsrw $4, %r9d, %xmm1, %xmm1
147 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
148 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
149 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
150 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
151 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
152 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
153 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
154 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
155 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
156 ; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi)
157 ; CHECK-NEXT: vzeroupper
159 call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> %value, <14 x float>* %addr, i32 4, <14 x i1>%mask)
163 define void @mstore_split17(<17 x float> %value, <17 x float>* %addr, <17 x i1> %mask) {
164 ; CHECK-LABEL: mstore_split17:
166 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
167 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
168 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
169 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
170 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
171 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
172 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
173 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
174 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
175 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
176 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
177 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
178 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
179 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
180 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
181 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
182 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
183 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
184 ; CHECK-NEXT: vmovd %eax, %xmm3
185 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
186 ; CHECK-NEXT: vpslld $31, %xmm3, %xmm3
187 ; CHECK-NEXT: vmaskmovps %ymm2, %ymm3, 64(%rdi)
188 ; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
189 ; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2
190 ; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2
191 ; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2
192 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
193 ; CHECK-NEXT: vpslld $31, %xmm3, %xmm3
194 ; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2
195 ; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2
196 ; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2
197 ; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2
198 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
199 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
200 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
201 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 32(%rdi)
202 ; CHECK-NEXT: vmovd %esi, %xmm1
203 ; CHECK-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
204 ; CHECK-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
205 ; CHECK-NEXT: vpinsrb $6, %r8d, %xmm1, %xmm1
206 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
207 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
208 ; CHECK-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
209 ; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1
210 ; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1
211 ; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1
212 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
213 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
214 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
215 ; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi)
216 ; CHECK-NEXT: vzeroupper
218 call void @llvm.masked.store.v17f32.p0v17f32(<17 x float> %value, <17 x float>* %addr, i32 4, <17 x i1>%mask)
222 define void @mstore_split23(<23 x float> %value, <23 x float>* %addr, <23 x i1> %mask) {
223 ; CHECK-LABEL: mstore_split23:
225 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
226 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
227 ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
228 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
229 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
230 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
231 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
232 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
233 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
234 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
235 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
236 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
237 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
238 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
239 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
240 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
241 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
242 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
243 ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
244 ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
245 ; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
246 ; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
247 ; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
248 ; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
249 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
250 ; CHECK-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
251 ; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3
252 ; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3
253 ; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3
254 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
255 ; CHECK-NEXT: vpslld $31, %xmm4, %xmm4
256 ; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3
257 ; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3
258 ; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3
259 ; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3
260 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
261 ; CHECK-NEXT: vpslld $31, %xmm3, %xmm3
262 ; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
263 ; CHECK-NEXT: vmaskmovps %ymm2, %ymm3, 32(%rdi)
264 ; CHECK-NEXT: vmovd %eax, %xmm2
265 ; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2
266 ; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2
267 ; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2
268 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
269 ; CHECK-NEXT: vpslld $31, %xmm3, %xmm3
270 ; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2
271 ; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2
272 ; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2
273 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
274 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
275 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
276 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 64(%rdi)
277 ; CHECK-NEXT: vmovd %esi, %xmm1
278 ; CHECK-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
279 ; CHECK-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
280 ; CHECK-NEXT: vpinsrb $6, %r8d, %xmm1, %xmm1
281 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
282 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
283 ; CHECK-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
284 ; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1
285 ; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1
286 ; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1
287 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
288 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
289 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
290 ; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi)
291 ; CHECK-NEXT: vzeroupper
293 call void @llvm.masked.store.v23f32.p0v23f32(<23 x float> %value, <23 x float>* %addr, i32 4, <23 x i1>%mask)
297 declare void @llvm.masked.store.v9f32.p0v9f32(<9 x float>, <9 x float>*, i32, <9 x i1>)
298 declare void @llvm.masked.store.v13f32.p0v13f32(<13 x float>, <13 x float>*, i32, <13 x i1>)
299 declare void @llvm.masked.store.v14f32.p0v14f32(<14 x float>, <14 x float>*, i32, <14 x i1>)
300 declare void @llvm.masked.store.v17f32.p0v17f32(<17 x float>, <17 x float>*, i32, <17 x i1>)
301 declare void @llvm.masked.store.v23f32.p0v23f32(<23 x float>, <23 x float>*, i32, <23 x i1>)