1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
4 define <8 x i32> @test_256_1(i8 * %addr) {
5 ; CHECK-LABEL: test_256_1:
7 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
8 ; CHECK-NEXT: retq ## encoding: [0xc3]
9 %vaddr = bitcast i8* %addr to <8 x i32>*
10 %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
14 define <8 x i32> @test_256_2(i8 * %addr) {
15 ; CHECK-LABEL: test_256_2:
17 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
18 ; CHECK-NEXT: retq ## encoding: [0xc3]
19 %vaddr = bitcast i8* %addr to <8 x i32>*
20 %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
24 define void @test_256_3(i8 * %addr, <4 x i64> %data) {
25 ; CHECK-LABEL: test_256_3:
27 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
28 ; CHECK-NEXT: retq ## encoding: [0xc3]
29 %vaddr = bitcast i8* %addr to <4 x i64>*
30 store <4 x i64>%data, <4 x i64>* %vaddr, align 32
34 define void @test_256_4(i8 * %addr, <8 x i32> %data) {
35 ; CHECK-LABEL: test_256_4:
37 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
38 ; CHECK-NEXT: retq ## encoding: [0xc3]
39 %vaddr = bitcast i8* %addr to <8 x i32>*
40 store <8 x i32>%data, <8 x i32>* %vaddr, align 1
44 define void @test_256_5(i8 * %addr, <8 x i32> %data) {
45 ; CHECK-LABEL: test_256_5:
47 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
48 ; CHECK-NEXT: retq ## encoding: [0xc3]
49 %vaddr = bitcast i8* %addr to <8 x i32>*
50 store <8 x i32>%data, <8 x i32>* %vaddr, align 32
54 define <4 x i64> @test_256_6(i8 * %addr) {
55 ; CHECK-LABEL: test_256_6:
57 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
58 ; CHECK-NEXT: retq ## encoding: [0xc3]
59 %vaddr = bitcast i8* %addr to <4 x i64>*
60 %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
64 define void @test_256_7(i8 * %addr, <4 x i64> %data) {
65 ; CHECK-LABEL: test_256_7:
67 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
68 ; CHECK-NEXT: retq ## encoding: [0xc3]
69 %vaddr = bitcast i8* %addr to <4 x i64>*
70 store <4 x i64>%data, <4 x i64>* %vaddr, align 1
74 define <4 x i64> @test_256_8(i8 * %addr) {
75 ; CHECK-LABEL: test_256_8:
77 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
78 ; CHECK-NEXT: retq ## encoding: [0xc3]
79 %vaddr = bitcast i8* %addr to <4 x i64>*
80 %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
84 define void @test_256_9(i8 * %addr, <4 x double> %data) {
85 ; CHECK-LABEL: test_256_9:
87 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
88 ; CHECK-NEXT: retq ## encoding: [0xc3]
89 %vaddr = bitcast i8* %addr to <4 x double>*
90 store <4 x double>%data, <4 x double>* %vaddr, align 32
94 define <4 x double> @test_256_10(i8 * %addr) {
95 ; CHECK-LABEL: test_256_10:
97 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
98 ; CHECK-NEXT: retq ## encoding: [0xc3]
99 %vaddr = bitcast i8* %addr to <4 x double>*
100 %res = load <4 x double>, <4 x double>* %vaddr, align 32
104 define void @test_256_11(i8 * %addr, <8 x float> %data) {
105 ; CHECK-LABEL: test_256_11:
107 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
108 ; CHECK-NEXT: retq ## encoding: [0xc3]
109 %vaddr = bitcast i8* %addr to <8 x float>*
110 store <8 x float>%data, <8 x float>* %vaddr, align 32
114 define <8 x float> @test_256_12(i8 * %addr) {
115 ; CHECK-LABEL: test_256_12:
117 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
118 ; CHECK-NEXT: retq ## encoding: [0xc3]
119 %vaddr = bitcast i8* %addr to <8 x float>*
120 %res = load <8 x float>, <8 x float>* %vaddr, align 32
124 define void @test_256_13(i8 * %addr, <4 x double> %data) {
125 ; CHECK-LABEL: test_256_13:
127 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
128 ; CHECK-NEXT: retq ## encoding: [0xc3]
129 %vaddr = bitcast i8* %addr to <4 x double>*
130 store <4 x double>%data, <4 x double>* %vaddr, align 1
134 define <4 x double> @test_256_14(i8 * %addr) {
135 ; CHECK-LABEL: test_256_14:
137 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
138 ; CHECK-NEXT: retq ## encoding: [0xc3]
139 %vaddr = bitcast i8* %addr to <4 x double>*
140 %res = load <4 x double>, <4 x double>* %vaddr, align 1
144 define void @test_256_15(i8 * %addr, <8 x float> %data) {
145 ; CHECK-LABEL: test_256_15:
147 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
148 ; CHECK-NEXT: retq ## encoding: [0xc3]
149 %vaddr = bitcast i8* %addr to <8 x float>*
150 store <8 x float>%data, <8 x float>* %vaddr, align 1
154 define <8 x float> @test_256_16(i8 * %addr) {
155 ; CHECK-LABEL: test_256_16:
157 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
158 ; CHECK-NEXT: retq ## encoding: [0xc3]
159 %vaddr = bitcast i8* %addr to <8 x float>*
160 %res = load <8 x float>, <8 x float>* %vaddr, align 1
164 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
165 ; CHECK-LABEL: test_256_17:
167 ; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
168 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
169 ; CHECK-NEXT: retq ## encoding: [0xc3]
170 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
171 %vaddr = bitcast i8* %addr to <8 x i32>*
172 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
173 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
177 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
178 ; CHECK-LABEL: test_256_18:
180 ; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
181 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
182 ; CHECK-NEXT: retq ## encoding: [0xc3]
183 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
184 %vaddr = bitcast i8* %addr to <8 x i32>*
185 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
186 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
190 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
191 ; CHECK-LABEL: test_256_19:
193 ; CHECK-NEXT: vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
194 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
195 ; CHECK-NEXT: retq ## encoding: [0xc3]
196 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
197 %vaddr = bitcast i8* %addr to <8 x i32>*
198 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
199 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
203 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
204 ; CHECK-LABEL: test_256_20:
206 ; CHECK-NEXT: vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
207 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
208 ; CHECK-NEXT: retq ## encoding: [0xc3]
209 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
210 %vaddr = bitcast i8* %addr to <8 x i32>*
211 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
212 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
216 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
217 ; CHECK-LABEL: test_256_21:
219 ; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
220 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
221 ; CHECK-NEXT: retq ## encoding: [0xc3]
222 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
223 %vaddr = bitcast i8* %addr to <4 x i64>*
224 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
225 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
229 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
230 ; CHECK-LABEL: test_256_22:
232 ; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
233 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
234 ; CHECK-NEXT: retq ## encoding: [0xc3]
235 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
236 %vaddr = bitcast i8* %addr to <4 x i64>*
237 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
238 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
242 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
243 ; CHECK-LABEL: test_256_23:
245 ; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
246 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
247 ; CHECK-NEXT: retq ## encoding: [0xc3]
248 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
249 %vaddr = bitcast i8* %addr to <4 x i64>*
250 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
251 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
255 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
256 ; CHECK-LABEL: test_256_24:
258 ; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
259 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
260 ; CHECK-NEXT: retq ## encoding: [0xc3]
261 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
262 %vaddr = bitcast i8* %addr to <4 x i64>*
263 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
264 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
268 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
269 ; CHECK-LABEL: test_256_25:
271 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
272 ; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
273 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
274 ; CHECK-NEXT: retq ## encoding: [0xc3]
275 %mask = fcmp one <8 x float> %mask1, zeroinitializer
276 %vaddr = bitcast i8* %addr to <8 x float>*
277 %r = load <8 x float>, <8 x float>* %vaddr, align 32
278 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
282 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
283 ; CHECK-LABEL: test_256_26:
285 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
286 ; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
287 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
288 ; CHECK-NEXT: retq ## encoding: [0xc3]
289 %mask = fcmp one <8 x float> %mask1, zeroinitializer
290 %vaddr = bitcast i8* %addr to <8 x float>*
291 %r = load <8 x float>, <8 x float>* %vaddr, align 1
292 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
296 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
297 ; CHECK-LABEL: test_256_27:
299 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
300 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
301 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
302 ; CHECK-NEXT: retq ## encoding: [0xc3]
303 %mask = fcmp one <8 x float> %mask1, zeroinitializer
304 %vaddr = bitcast i8* %addr to <8 x float>*
305 %r = load <8 x float>, <8 x float>* %vaddr, align 32
306 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
310 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
311 ; CHECK-LABEL: test_256_28:
313 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
314 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
315 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
316 ; CHECK-NEXT: retq ## encoding: [0xc3]
317 %mask = fcmp one <8 x float> %mask1, zeroinitializer
318 %vaddr = bitcast i8* %addr to <8 x float>*
319 %r = load <8 x float>, <8 x float>* %vaddr, align 1
320 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
324 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
325 ; CHECK-LABEL: test_256_29:
327 ; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
328 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
329 ; CHECK-NEXT: retq ## encoding: [0xc3]
330 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
331 %vaddr = bitcast i8* %addr to <4 x double>*
332 %r = load <4 x double>, <4 x double>* %vaddr, align 32
333 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
337 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
338 ; CHECK-LABEL: test_256_30:
340 ; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
341 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
342 ; CHECK-NEXT: retq ## encoding: [0xc3]
343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
344 %vaddr = bitcast i8* %addr to <4 x double>*
345 %r = load <4 x double>, <4 x double>* %vaddr, align 1
346 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
350 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
351 ; CHECK-LABEL: test_256_31:
353 ; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
354 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
355 ; CHECK-NEXT: retq ## encoding: [0xc3]
356 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
357 %vaddr = bitcast i8* %addr to <4 x double>*
358 %r = load <4 x double>, <4 x double>* %vaddr, align 32
359 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
363 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
364 ; CHECK-LABEL: test_256_32:
366 ; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
367 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
368 ; CHECK-NEXT: retq ## encoding: [0xc3]
369 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
370 %vaddr = bitcast i8* %addr to <4 x double>*
371 %r = load <4 x double>, <4 x double>* %vaddr, align 1
372 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
376 define <4 x i32> @test_128_1(i8 * %addr) {
377 ; CHECK-LABEL: test_128_1:
379 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
380 ; CHECK-NEXT: retq ## encoding: [0xc3]
381 %vaddr = bitcast i8* %addr to <4 x i32>*
382 %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
386 define <4 x i32> @test_128_2(i8 * %addr) {
387 ; CHECK-LABEL: test_128_2:
389 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
390 ; CHECK-NEXT: retq ## encoding: [0xc3]
391 %vaddr = bitcast i8* %addr to <4 x i32>*
392 %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
396 define void @test_128_3(i8 * %addr, <2 x i64> %data) {
397 ; CHECK-LABEL: test_128_3:
399 ; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
400 ; CHECK-NEXT: retq ## encoding: [0xc3]
401 %vaddr = bitcast i8* %addr to <2 x i64>*
402 store <2 x i64>%data, <2 x i64>* %vaddr, align 16
406 define void @test_128_4(i8 * %addr, <4 x i32> %data) {
407 ; CHECK-LABEL: test_128_4:
409 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
410 ; CHECK-NEXT: retq ## encoding: [0xc3]
411 %vaddr = bitcast i8* %addr to <4 x i32>*
412 store <4 x i32>%data, <4 x i32>* %vaddr, align 1
416 define void @test_128_5(i8 * %addr, <4 x i32> %data) {
417 ; CHECK-LABEL: test_128_5:
419 ; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
420 ; CHECK-NEXT: retq ## encoding: [0xc3]
421 %vaddr = bitcast i8* %addr to <4 x i32>*
422 store <4 x i32>%data, <4 x i32>* %vaddr, align 16
426 define <2 x i64> @test_128_6(i8 * %addr) {
427 ; CHECK-LABEL: test_128_6:
429 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
430 ; CHECK-NEXT: retq ## encoding: [0xc3]
431 %vaddr = bitcast i8* %addr to <2 x i64>*
432 %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
436 define void @test_128_7(i8 * %addr, <2 x i64> %data) {
437 ; CHECK-LABEL: test_128_7:
439 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
440 ; CHECK-NEXT: retq ## encoding: [0xc3]
441 %vaddr = bitcast i8* %addr to <2 x i64>*
442 store <2 x i64>%data, <2 x i64>* %vaddr, align 1
446 define <2 x i64> @test_128_8(i8 * %addr) {
447 ; CHECK-LABEL: test_128_8:
449 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
450 ; CHECK-NEXT: retq ## encoding: [0xc3]
451 %vaddr = bitcast i8* %addr to <2 x i64>*
452 %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
456 define void @test_128_9(i8 * %addr, <2 x double> %data) {
457 ; CHECK-LABEL: test_128_9:
459 ; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
460 ; CHECK-NEXT: retq ## encoding: [0xc3]
461 %vaddr = bitcast i8* %addr to <2 x double>*
462 store <2 x double>%data, <2 x double>* %vaddr, align 16
466 define <2 x double> @test_128_10(i8 * %addr) {
467 ; CHECK-LABEL: test_128_10:
469 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
470 ; CHECK-NEXT: retq ## encoding: [0xc3]
471 %vaddr = bitcast i8* %addr to <2 x double>*
472 %res = load <2 x double>, <2 x double>* %vaddr, align 16
476 define void @test_128_11(i8 * %addr, <4 x float> %data) {
477 ; CHECK-LABEL: test_128_11:
479 ; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
480 ; CHECK-NEXT: retq ## encoding: [0xc3]
481 %vaddr = bitcast i8* %addr to <4 x float>*
482 store <4 x float>%data, <4 x float>* %vaddr, align 16
486 define <4 x float> @test_128_12(i8 * %addr) {
487 ; CHECK-LABEL: test_128_12:
489 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
490 ; CHECK-NEXT: retq ## encoding: [0xc3]
491 %vaddr = bitcast i8* %addr to <4 x float>*
492 %res = load <4 x float>, <4 x float>* %vaddr, align 16
496 define void @test_128_13(i8 * %addr, <2 x double> %data) {
497 ; CHECK-LABEL: test_128_13:
499 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
500 ; CHECK-NEXT: retq ## encoding: [0xc3]
501 %vaddr = bitcast i8* %addr to <2 x double>*
502 store <2 x double>%data, <2 x double>* %vaddr, align 1
506 define <2 x double> @test_128_14(i8 * %addr) {
507 ; CHECK-LABEL: test_128_14:
509 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
510 ; CHECK-NEXT: retq ## encoding: [0xc3]
511 %vaddr = bitcast i8* %addr to <2 x double>*
512 %res = load <2 x double>, <2 x double>* %vaddr, align 1
516 define void @test_128_15(i8 * %addr, <4 x float> %data) {
517 ; CHECK-LABEL: test_128_15:
519 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
520 ; CHECK-NEXT: retq ## encoding: [0xc3]
521 %vaddr = bitcast i8* %addr to <4 x float>*
522 store <4 x float>%data, <4 x float>* %vaddr, align 1
526 define <4 x float> @test_128_16(i8 * %addr) {
527 ; CHECK-LABEL: test_128_16:
529 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
530 ; CHECK-NEXT: retq ## encoding: [0xc3]
531 %vaddr = bitcast i8* %addr to <4 x float>*
532 %res = load <4 x float>, <4 x float>* %vaddr, align 1
536 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
537 ; CHECK-LABEL: test_128_17:
539 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
540 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
541 ; CHECK-NEXT: retq ## encoding: [0xc3]
542 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
543 %vaddr = bitcast i8* %addr to <4 x i32>*
544 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
545 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
549 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
550 ; CHECK-LABEL: test_128_18:
552 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
553 ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07]
554 ; CHECK-NEXT: retq ## encoding: [0xc3]
555 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
556 %vaddr = bitcast i8* %addr to <4 x i32>*
557 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
558 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
562 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
563 ; CHECK-LABEL: test_128_19:
565 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
566 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
567 ; CHECK-NEXT: retq ## encoding: [0xc3]
568 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
569 %vaddr = bitcast i8* %addr to <4 x i32>*
570 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
571 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
575 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
576 ; CHECK-LABEL: test_128_20:
578 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
579 ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
580 ; CHECK-NEXT: retq ## encoding: [0xc3]
581 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
582 %vaddr = bitcast i8* %addr to <4 x i32>*
583 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
584 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
588 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
589 ; CHECK-LABEL: test_128_21:
591 ; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
592 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
593 ; CHECK-NEXT: retq ## encoding: [0xc3]
594 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
595 %vaddr = bitcast i8* %addr to <2 x i64>*
596 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
597 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
601 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
602 ; CHECK-LABEL: test_128_22:
604 ; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
605 ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07]
606 ; CHECK-NEXT: retq ## encoding: [0xc3]
607 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
608 %vaddr = bitcast i8* %addr to <2 x i64>*
609 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
610 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
614 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
615 ; CHECK-LABEL: test_128_23:
617 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
618 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
619 ; CHECK-NEXT: retq ## encoding: [0xc3]
620 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
621 %vaddr = bitcast i8* %addr to <2 x i64>*
622 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
623 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
627 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
628 ; CHECK-LABEL: test_128_24:
630 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
631 ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
632 ; CHECK-NEXT: retq ## encoding: [0xc3]
633 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
634 %vaddr = bitcast i8* %addr to <2 x i64>*
635 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
636 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
640 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
641 ; CHECK-LABEL: test_128_25:
643 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
644 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
645 ; CHECK-NEXT: retq ## encoding: [0xc3]
646 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
647 %vaddr = bitcast i8* %addr to <4 x float>*
648 %r = load <4 x float>, <4 x float>* %vaddr, align 16
649 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
653 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
654 ; CHECK-LABEL: test_128_26:
656 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
657 ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
658 ; CHECK-NEXT: retq ## encoding: [0xc3]
659 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
660 %vaddr = bitcast i8* %addr to <4 x float>*
661 %r = load <4 x float>, <4 x float>* %vaddr, align 1
662 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
666 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
667 ; CHECK-LABEL: test_128_27:
669 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
670 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
671 ; CHECK-NEXT: retq ## encoding: [0xc3]
672 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
673 %vaddr = bitcast i8* %addr to <4 x float>*
674 %r = load <4 x float>, <4 x float>* %vaddr, align 16
675 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
679 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
680 ; CHECK-LABEL: test_128_28:
682 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
683 ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
684 ; CHECK-NEXT: retq ## encoding: [0xc3]
685 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
686 %vaddr = bitcast i8* %addr to <4 x float>*
687 %r = load <4 x float>, <4 x float>* %vaddr, align 1
688 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
692 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
693 ; CHECK-LABEL: test_128_29:
695 ; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
696 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
697 ; CHECK-NEXT: retq ## encoding: [0xc3]
698 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
699 %vaddr = bitcast i8* %addr to <2 x double>*
700 %r = load <2 x double>, <2 x double>* %vaddr, align 16
701 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
705 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
706 ; CHECK-LABEL: test_128_30:
708 ; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
709 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
710 ; CHECK-NEXT: retq ## encoding: [0xc3]
711 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
712 %vaddr = bitcast i8* %addr to <2 x double>*
713 %r = load <2 x double>, <2 x double>* %vaddr, align 1
714 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
718 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
719 ; CHECK-LABEL: test_128_31:
721 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
722 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
723 ; CHECK-NEXT: retq ## encoding: [0xc3]
724 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
725 %vaddr = bitcast i8* %addr to <2 x double>*
726 %r = load <2 x double>, <2 x double>* %vaddr, align 16
727 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
731 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
732 ; CHECK-LABEL: test_128_32:
734 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
735 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
736 ; CHECK-NEXT: retq ## encoding: [0xc3]
737 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
738 %vaddr = bitcast i8* %addr to <2 x double>*
739 %r = load <2 x double>, <2 x double>* %vaddr, align 1
740 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer