1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 | FileCheck %s --check-prefix=X64
4 define <8 x i32> @simple(ptr %base, <8 x i32> %offsets) {
7 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm2
8 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
9 ; X64-NEXT: vmovq %rdi, %xmm1
10 ; X64-NEXT: vpbroadcastq %xmm1, %ymm1
11 ; X64-NEXT: vpmovsxdq %xmm2, %ymm2
12 ; X64-NEXT: vpsllq $2, %ymm0, %ymm0
13 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
14 ; X64-NEXT: vmovq %xmm0, %rax
15 ; X64-NEXT: vpextrq $1, %xmm0, %rcx
16 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm0
17 ; X64-NEXT: vpsllq $2, %ymm2, %ymm2
18 ; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm2
19 ; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
20 ; X64-NEXT: vpextrq $1, %xmm0, %rdx
21 ; X64-NEXT: vmovq %xmm0, %rsi
22 ; X64-NEXT: vextracti128 $1, %ymm2, %xmm0
23 ; X64-NEXT: vmovq %xmm2, %rdi
24 ; X64-NEXT: vpextrq $1, %xmm2, %r8
25 ; X64-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm1
26 ; X64-NEXT: vmovq %xmm0, %r9
27 ; X64-NEXT: vpextrq $1, %xmm0, %r10
28 ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
29 ; X64-NEXT: vpinsrd $2, (%rsi), %xmm1, %xmm1
30 ; X64-NEXT: vpinsrd $1, (%r8), %xmm0, %xmm0
31 ; X64-NEXT: vpinsrd $3, (%rdx), %xmm1, %xmm1
32 ; X64-NEXT: vpinsrd $2, (%r9), %xmm0, %xmm0
33 ; X64-NEXT: vpinsrd $3, (%r10), %xmm0, %xmm0
34 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
36 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets
37 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
38 ret <8 x i32> %wide.masked.gather
41 define <8 x i32> @optsize(ptr %base, <8 x i32> %offsets) optsize {
44 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm2
45 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
46 ; X64-NEXT: vmovq %rdi, %xmm1
47 ; X64-NEXT: vpbroadcastq %xmm1, %ymm1
48 ; X64-NEXT: vpmovsxdq %xmm2, %ymm2
49 ; X64-NEXT: vpsllq $2, %ymm0, %ymm0
50 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
51 ; X64-NEXT: vmovq %xmm0, %rax
52 ; X64-NEXT: vpextrq $1, %xmm0, %rcx
53 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm0
54 ; X64-NEXT: vpsllq $2, %ymm2, %ymm2
55 ; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm2
56 ; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
57 ; X64-NEXT: vpextrq $1, %xmm0, %rdx
58 ; X64-NEXT: vmovq %xmm0, %rsi
59 ; X64-NEXT: vextracti128 $1, %ymm2, %xmm0
60 ; X64-NEXT: vmovq %xmm2, %rdi
61 ; X64-NEXT: vpextrq $1, %xmm2, %r8
62 ; X64-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm1
63 ; X64-NEXT: vmovq %xmm0, %r9
64 ; X64-NEXT: vpextrq $1, %xmm0, %r10
65 ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
66 ; X64-NEXT: vpinsrd $2, (%rsi), %xmm1, %xmm1
67 ; X64-NEXT: vpinsrd $1, (%r8), %xmm0, %xmm0
68 ; X64-NEXT: vpinsrd $3, (%rdx), %xmm1, %xmm1
69 ; X64-NEXT: vpinsrd $2, (%r9), %xmm0, %xmm0
70 ; X64-NEXT: vpinsrd $3, (%r10), %xmm0, %xmm0
71 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
73 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets
74 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
75 ret <8 x i32> %wide.masked.gather
78 define <8 x i32> @minsize(ptr %base, <8 x i32> %offsets) minsize {
81 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm2
82 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
83 ; X64-NEXT: vmovq %rdi, %xmm1
84 ; X64-NEXT: vpbroadcastq %xmm1, %ymm1
85 ; X64-NEXT: vpmovsxdq %xmm2, %ymm2
86 ; X64-NEXT: vpsllq $2, %ymm0, %ymm0
87 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
88 ; X64-NEXT: vmovq %xmm0, %rax
89 ; X64-NEXT: vpextrq $1, %xmm0, %rcx
90 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm0
91 ; X64-NEXT: vpsllq $2, %ymm2, %ymm2
92 ; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm2
93 ; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
94 ; X64-NEXT: vpextrq $1, %xmm0, %rdx
95 ; X64-NEXT: vmovq %xmm0, %rsi
96 ; X64-NEXT: vextracti128 $1, %ymm2, %xmm0
97 ; X64-NEXT: vmovq %xmm2, %rdi
98 ; X64-NEXT: vpextrq $1, %xmm2, %r8
99 ; X64-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm1
100 ; X64-NEXT: vmovq %xmm0, %r9
101 ; X64-NEXT: vpextrq $1, %xmm0, %r10
102 ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
103 ; X64-NEXT: vpinsrd $2, (%rsi), %xmm1, %xmm1
104 ; X64-NEXT: vpinsrd $1, (%r8), %xmm0, %xmm0
105 ; X64-NEXT: vpinsrd $3, (%rdx), %xmm1, %xmm1
106 ; X64-NEXT: vpinsrd $2, (%r9), %xmm0, %xmm0
107 ; X64-NEXT: vpinsrd $3, (%r10), %xmm0, %xmm0
108 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
110 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets
111 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
112 ret <8 x i32> %wide.masked.gather
115 declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i32>)