1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-linux -mcpu=skylake-avx512 < %s | FileCheck %s
4 define void @scatter_scale_512(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
5 ; CHECK-LABEL: scatter_scale_512:
7 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
8 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
9 ; CHECK-NEXT: vpsllq $9, %ymm0, %ymm0
10 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
11 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
12 ; CHECK-NEXT: vzeroupper
14 %gep = getelementptr inbounds [512 x i8], ptr %result, <4 x i64> %idx
15 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
19 define void @scatter_scale_16(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
20 ; CHECK-LABEL: scatter_scale_16:
22 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
23 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
24 ; CHECK-NEXT: vpsllq $4, %ymm0, %ymm0
25 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
26 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
27 ; CHECK-NEXT: vzeroupper
29 %gep = getelementptr inbounds [16 x i8], ptr %result, <4 x i64> %idx
30 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
34 define void @scatter_scale_8(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
35 ; CHECK-LABEL: scatter_scale_8:
37 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
38 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
39 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
40 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,8) {%k1}
41 ; CHECK-NEXT: vzeroupper
43 %gep = getelementptr inbounds [8 x i8], ptr %result, <4 x i64> %idx
44 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
48 define void @scatter_scale_4(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
49 ; CHECK-LABEL: scatter_scale_4:
51 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
52 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
53 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
54 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
55 ; CHECK-NEXT: vzeroupper
57 %gep = getelementptr inbounds [4 x i8], ptr %result, <4 x i64> %idx
58 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
62 define void @scatter_scale_3(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
63 ; CHECK-LABEL: scatter_scale_3:
65 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
66 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
67 ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm1
68 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
69 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
70 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
71 ; CHECK-NEXT: vzeroupper
73 %gep = getelementptr inbounds [3 x i8], ptr %result, <4 x i64> %idx
74 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
78 define void @scatter_scale_1(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
79 ; CHECK-LABEL: scatter_scale_1:
81 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
82 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
83 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
84 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
85 ; CHECK-NEXT: vzeroupper
87 %gep = getelementptr inbounds [1 x i8], ptr %result, <4 x i64> %idx
88 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
92 define <4 x double> @gather_scale_512(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
93 ; CHECK-LABEL: gather_scale_512:
95 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
96 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
97 ; CHECK-NEXT: vpsllq $9, %ymm0, %ymm1
98 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
99 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1), %ymm0 {%k1}
101 %gep = getelementptr inbounds [512 x i8], ptr %result, <4 x i64> %idx
102 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
103 ret <4 x double> %res
106 define <4 x double> @gather_scale_16(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
107 ; CHECK-LABEL: gather_scale_16:
109 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
110 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
111 ; CHECK-NEXT: vpsllq $4, %ymm0, %ymm1
112 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
113 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1), %ymm0 {%k1}
115 %gep = getelementptr inbounds [16 x i8], ptr %result, <4 x i64> %idx
116 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
117 ret <4 x double> %res
120 define <4 x double> @gather_scale_8(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
121 ; CHECK-LABEL: gather_scale_8:
123 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
124 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
125 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
126 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0,8), %ymm1 {%k1}
127 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
129 %gep = getelementptr inbounds [8 x i8], ptr %result, <4 x i64> %idx
130 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
131 ret <4 x double> %res
134 define <4 x double> @gather_scale_4(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
135 ; CHECK-LABEL: gather_scale_4:
137 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
138 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
139 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
140 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0,4), %ymm1 {%k1}
141 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
143 %gep = getelementptr inbounds [4 x i8], ptr %result, <4 x i64> %idx
144 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
145 ret <4 x double> %res
148 define <4 x double> @gather_scale_3(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
149 ; CHECK-LABEL: gather_scale_3:
151 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
152 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
153 ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm1
154 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm1
155 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
156 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1), %ymm0 {%k1}
158 %gep = getelementptr inbounds [3 x i8], ptr %result, <4 x i64> %idx
159 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
160 ret <4 x double> %res
163 define <4 x double> @gather_scale_1(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
164 ; CHECK-LABEL: gather_scale_1:
166 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
167 ; CHECK-NEXT: vpmovd2m %xmm1, %k1
168 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
169 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
170 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
172 %gep = getelementptr inbounds [1 x i8], ptr %result, <4 x i64> %idx
173 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
174 ret <4 x double> %res
177 declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>)
178 declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>)