1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
4 define void @test_mscatter_v16f16(half* %base, <16 x i32> %index, <16 x half> %val)
5 ; CHECK-LABEL: test_mscatter_v16f16:
7 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm3
8 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
9 ; CHECK-NEXT: vpmovsxdq %ymm2, %zmm2
10 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm4
11 ; CHECK-NEXT: vpaddq %zmm4, %zmm2, %zmm2
12 ; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0
13 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm3
14 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
15 ; CHECK-NEXT: vmovq %xmm0, %rax
16 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
17 ; CHECK-NEXT: vpsrld $16, %xmm1, %xmm3
18 ; CHECK-NEXT: vpextrq $1, %xmm0, %rax
19 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
20 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
21 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4
22 ; CHECK-NEXT: vmovq %xmm4, %rax
23 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
24 ; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
25 ; CHECK-NEXT: vpextrq $1, %xmm4, %rax
26 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
27 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
28 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm4
29 ; CHECK-NEXT: vmovq %xmm4, %rax
30 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
31 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
32 ; CHECK-NEXT: vpextrq $1, %xmm4, %rax
33 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
34 ; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,3,3,3]
35 ; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm0
36 ; CHECK-NEXT: vmovq %xmm0, %rax
37 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
38 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
39 ; CHECK-NEXT: vpextrq $1, %xmm0, %rax
40 ; CHECK-NEXT: vmovsh %xmm3, (%rax)
41 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm0
42 ; CHECK-NEXT: vmovq %xmm2, %rax
43 ; CHECK-NEXT: vmovsh %xmm0, (%rax)
44 ; CHECK-NEXT: vpsrld $16, %xmm0, %xmm1
45 ; CHECK-NEXT: vpextrq $1, %xmm2, %rax
46 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
47 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
48 ; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
49 ; CHECK-NEXT: vmovq %xmm3, %rax
50 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
51 ; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm1
52 ; CHECK-NEXT: vpextrq $1, %xmm3, %rax
53 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
54 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
55 ; CHECK-NEXT: vextracti32x4 $2, %zmm2, %xmm3
56 ; CHECK-NEXT: vmovq %xmm3, %rax
57 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
58 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
59 ; CHECK-NEXT: vpextrq $1, %xmm3, %rax
60 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
61 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
62 ; CHECK-NEXT: vextracti32x4 $3, %zmm2, %xmm2
63 ; CHECK-NEXT: vmovq %xmm2, %rax
64 ; CHECK-NEXT: vmovsh %xmm1, (%rax)
65 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
66 ; CHECK-NEXT: vpextrq $1, %xmm2, %rax
67 ; CHECK-NEXT: vmovsh %xmm0, (%rax)
68 ; CHECK-NEXT: vzeroupper
71 %gep = getelementptr half, half* %base, <16 x i32> %index
72 call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> %val, <16 x half*> %gep, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
75 declare void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> , <16 x half*> , i32 , <16 x i1>)