1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
2 ; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-range -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE
3 ; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-range -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE
5 define <4 x float> @rangeps_128(<4 x float> %a0, <4 x float> %a1) {
6 ; ENABLE-LABEL: rangeps_128:
8 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
11 ; ENABLE-NEXT: #NO_APP
12 ; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
13 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
14 ; ENABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1
15 ; ENABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0
16 ; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
19 ; DISABLE-LABEL: rangeps_128:
21 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
24 ; DISABLE-NEXT: #NO_APP
25 ; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
26 ; DISABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1
27 ; DISABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0
28 ; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
31 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1)
32 %3 = fadd <4 x float> %a0, %a1
33 %res = fadd <4 x float> %2, %3
37 define <4 x float> @rangeps_mem_128(<4 x float> %a0, ptr %p1) {
38 ; ENABLE-LABEL: rangeps_mem_128:
40 ; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
43 ; ENABLE-NEXT: #NO_APP
44 ; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
45 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
46 ; ENABLE-NEXT: vrangeps $88, (%rdi), %xmm1, %xmm0
47 ; ENABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0
50 ; DISABLE-LABEL: rangeps_mem_128:
52 ; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
55 ; DISABLE-NEXT: #NO_APP
56 ; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
57 ; DISABLE-NEXT: vrangeps $88, (%rdi), %xmm1, %xmm0
58 ; DISABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0
60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
61 %a1 = load <4 x float>, ptr %p1, align 64
62 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1)
63 %res = fadd <4 x float> %2, %a0
67 define <4 x float> @rangeps_broadcast_128(<4 x float> %a0, ptr %p1) {
68 ; ENABLE-LABEL: rangeps_broadcast_128:
70 ; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
73 ; ENABLE-NEXT: #NO_APP
74 ; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
75 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
76 ; ENABLE-NEXT: vrangeps $88, (%rdi){1to4}, %xmm1, %xmm0
77 ; ENABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0
80 ; DISABLE-LABEL: rangeps_broadcast_128:
82 ; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
85 ; DISABLE-NEXT: #NO_APP
86 ; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
87 ; DISABLE-NEXT: vrangeps $88, (%rdi){1to4}, %xmm1, %xmm0
88 ; DISABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0
90 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
91 %v1 = load float, ptr %p1, align 4
92 %t0 = insertelement <4 x float> undef, float %v1, i64 0
93 %a1 = shufflevector <4 x float> %t0, <4 x float> undef, <4 x i32> zeroinitializer
94 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1)
95 %res = fadd <4 x float> %2, %a0
99 define <4 x float> @rangeps_maskz_128(<4 x float> %a0, <4 x float> %a1, ptr %pmask) {
100 ; ENABLE-LABEL: rangeps_maskz_128:
102 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
105 ; ENABLE-NEXT: #NO_APP
106 ; ENABLE-NEXT: kmovb (%rdi), %k1
107 ; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
108 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
109 ; ENABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
110 ; ENABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0
111 ; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
114 ; DISABLE-LABEL: rangeps_maskz_128:
116 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
119 ; DISABLE-NEXT: #NO_APP
120 ; DISABLE-NEXT: kmovb (%rdi), %k1
121 ; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
122 ; DISABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
123 ; DISABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0
124 ; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
126 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
127 %mask = load i8, ptr %pmask
128 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 %mask)
129 %3 = fadd <4 x float> %a0, %a1
130 %res = fadd <4 x float> %2, %3
134 declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) nounwind readnone
136 define <8 x float> @rangeps_256(<8 x float> %a0, <8 x float> %a1) {
137 ; ENABLE-LABEL: rangeps_256:
139 ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
142 ; ENABLE-NEXT: #NO_APP
143 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
144 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
145 ; ENABLE-NEXT: vrangeps $88, %ymm2, %ymm0, %ymm1
146 ; ENABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0
147 ; ENABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0
150 ; DISABLE-LABEL: rangeps_256:
152 ; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
155 ; DISABLE-NEXT: #NO_APP
156 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
157 ; DISABLE-NEXT: vrangeps $88, %ymm2, %ymm0, %ymm1
158 ; DISABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0
159 ; DISABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0
161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
162 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1)
163 %3 = fadd <8 x float> %a0, %a1
164 %res = fadd <8 x float> %2, %3
168 define <8 x float> @rangeps_mem_256(<8 x float> %a0, ptr %p1) {
169 ; ENABLE-LABEL: rangeps_mem_256:
171 ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
174 ; ENABLE-NEXT: #NO_APP
175 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
176 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
177 ; ENABLE-NEXT: vrangeps $88, (%rdi), %ymm1, %ymm0
178 ; ENABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0
181 ; DISABLE-LABEL: rangeps_mem_256:
183 ; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
186 ; DISABLE-NEXT: #NO_APP
187 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
188 ; DISABLE-NEXT: vrangeps $88, (%rdi), %ymm1, %ymm0
189 ; DISABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0
191 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
192 %a1 = load <8 x float>, ptr %p1, align 64
193 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1)
194 %res = fadd <8 x float> %2, %a0
198 define <8 x float> @rangeps_broadcast_256(<8 x float> %a0, ptr %p1) {
199 ; ENABLE-LABEL: rangeps_broadcast_256:
201 ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
204 ; ENABLE-NEXT: #NO_APP
205 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
206 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
207 ; ENABLE-NEXT: vrangeps $88, (%rdi){1to8}, %ymm1, %ymm0
208 ; ENABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0
211 ; DISABLE-LABEL: rangeps_broadcast_256:
213 ; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
216 ; DISABLE-NEXT: #NO_APP
217 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
218 ; DISABLE-NEXT: vrangeps $88, (%rdi){1to8}, %ymm1, %ymm0
219 ; DISABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0
221 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
222 %v1 = load float, ptr %p1, align 4
223 %t0 = insertelement <8 x float> undef, float %v1, i64 0
224 %a1 = shufflevector <8 x float> %t0, <8 x float> undef, <8 x i32> zeroinitializer
225 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1)
226 %res = fadd <8 x float> %2, %a0
230 define <8 x float> @rangeps_maskz_256(<8 x float> %a0, <8 x float> %a1, ptr %pmask) {
231 ; ENABLE-LABEL: rangeps_maskz_256:
233 ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
236 ; ENABLE-NEXT: #NO_APP
237 ; ENABLE-NEXT: kmovb (%rdi), %k1
238 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
239 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
240 ; ENABLE-NEXT: vrangeps $44, %ymm2, %ymm0, %ymm1 {%k1} {z}
241 ; ENABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0
242 ; ENABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0
245 ; DISABLE-LABEL: rangeps_maskz_256:
247 ; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
250 ; DISABLE-NEXT: #NO_APP
251 ; DISABLE-NEXT: kmovb (%rdi), %k1
252 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
253 ; DISABLE-NEXT: vrangeps $44, %ymm2, %ymm0, %ymm1 {%k1} {z}
254 ; DISABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0
255 ; DISABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0
257 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
258 %mask = load i8, ptr %pmask
259 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 44, <8 x float> undef, i8 %mask)
260 %3 = fadd <8 x float> %a0, %a1
261 %res = fadd <8 x float> %2, %3
265 declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) nounwind readnone
267 define <16 x float> @rangeps_512(<16 x float> %a0, <16 x float> %a1) {
268 ; ENABLE-LABEL: rangeps_512:
270 ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
273 ; ENABLE-NEXT: #NO_APP
274 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
275 ; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1
276 ; ENABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1
277 ; ENABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0
278 ; ENABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0
281 ; DISABLE-LABEL: rangeps_512:
283 ; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
286 ; DISABLE-NEXT: #NO_APP
287 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
288 ; DISABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1
289 ; DISABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0
290 ; DISABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0
292 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
293 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4)
294 %3 = fadd <16 x float> %a0, %a1
295 %res = fadd <16 x float> %2, %3
296 ret <16 x float> %res
299 define <16 x float> @rangeps_mem_512(<16 x float> %a0, ptr %p1) {
300 ; ENABLE-LABEL: rangeps_mem_512:
302 ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
305 ; ENABLE-NEXT: #NO_APP
306 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
307 ; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0
308 ; ENABLE-NEXT: vrangeps $88, (%rdi), %zmm1, %zmm0
309 ; ENABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0
312 ; DISABLE-LABEL: rangeps_mem_512:
314 ; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
317 ; DISABLE-NEXT: #NO_APP
318 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
319 ; DISABLE-NEXT: vrangeps $88, (%rdi), %zmm1, %zmm0
320 ; DISABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0
322 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
323 %a1 = load <16 x float>, ptr %p1, align 64
324 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4)
325 %res = fadd <16 x float> %2, %a0
326 ret <16 x float> %res
329 define <16 x float> @rangeps_broadcast_512(<16 x float> %a0, ptr %p1) {
330 ; ENABLE-LABEL: rangeps_broadcast_512:
332 ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
335 ; ENABLE-NEXT: #NO_APP
336 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
337 ; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0
338 ; ENABLE-NEXT: vrangeps $88, (%rdi){1to16}, %zmm1, %zmm0
339 ; ENABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0
342 ; DISABLE-LABEL: rangeps_broadcast_512:
344 ; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
347 ; DISABLE-NEXT: #NO_APP
348 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
349 ; DISABLE-NEXT: vrangeps $88, (%rdi){1to16}, %zmm1, %zmm0
350 ; DISABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0
352 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
353 %v1 = load float, ptr %p1, align 4
354 %t0 = insertelement <16 x float> undef, float %v1, i64 0
355 %a1 = shufflevector <16 x float> %t0, <16 x float> undef, <16 x i32> zeroinitializer
356 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4)
357 %res = fadd <16 x float> %2, %a0
358 ret <16 x float> %res
361 define <16 x float> @rangeps_maskz_512(<16 x float> %a0, <16 x float> %a1, ptr %pmask) {
362 ; ENABLE-LABEL: rangeps_maskz_512:
364 ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
367 ; ENABLE-NEXT: #NO_APP
368 ; ENABLE-NEXT: kmovw (%rdi), %k1
369 ; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
370 ; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1
371 ; ENABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
372 ; ENABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0
373 ; ENABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0
376 ; DISABLE-LABEL: rangeps_maskz_512:
378 ; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
381 ; DISABLE-NEXT: #NO_APP
382 ; DISABLE-NEXT: kmovw (%rdi), %k1
383 ; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
384 ; DISABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
385 ; DISABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0
386 ; DISABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0
388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
389 %mask = load i16, ptr %pmask
390 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 %mask, i32 4)
391 %3 = fadd <16 x float> %a0, %a1
392 %res = fadd <16 x float> %2, %3
393 ret <16 x float> %res
396 declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) nounwind readnone
399 define <2 x double> @rangepd_128(<2 x double> %a0, <2 x double> %a1) {
400 ; ENABLE-LABEL: rangepd_128:
402 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
405 ; ENABLE-NEXT: #NO_APP
406 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
407 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
408 ; ENABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1
409 ; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
410 ; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
413 ; DISABLE-LABEL: rangepd_128:
415 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
418 ; DISABLE-NEXT: #NO_APP
419 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
420 ; DISABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1
421 ; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
422 ; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
424 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
425 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1)
426 %3 = fadd <2 x double> %a0, %a1
427 %res = fadd <2 x double> %2, %3
428 ret <2 x double> %res
431 define <2 x double> @rangepd_mem_128(<2 x double> %a0, ptr %p1) {
432 ; ENABLE-LABEL: rangepd_mem_128:
434 ; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
437 ; ENABLE-NEXT: #NO_APP
438 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
439 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
440 ; ENABLE-NEXT: vrangepd $88, (%rdi), %xmm1, %xmm0
441 ; ENABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0
444 ; DISABLE-LABEL: rangepd_mem_128:
446 ; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
449 ; DISABLE-NEXT: #NO_APP
450 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
451 ; DISABLE-NEXT: vrangepd $88, (%rdi), %xmm1, %xmm0
452 ; DISABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0
454 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
455 %a1 = load <2 x double>, ptr %p1, align 64
456 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1)
457 %res = fadd <2 x double> %2, %a0
458 ret <2 x double> %res
461 define <2 x double> @rangepd_broadcast_128(<2 x double> %a0, ptr %p1) {
462 ; ENABLE-LABEL: rangepd_broadcast_128:
464 ; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
467 ; ENABLE-NEXT: #NO_APP
468 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
469 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
470 ; ENABLE-NEXT: vrangepd $88, (%rdi){1to2}, %xmm1, %xmm0
471 ; ENABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0
474 ; DISABLE-LABEL: rangepd_broadcast_128:
476 ; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
479 ; DISABLE-NEXT: #NO_APP
480 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
481 ; DISABLE-NEXT: vrangepd $88, (%rdi){1to2}, %xmm1, %xmm0
482 ; DISABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0
484 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
485 %v1 = load double, ptr %p1, align 4
486 %t0 = insertelement <2 x double> undef, double %v1, i64 0
487 %a1 = shufflevector <2 x double> %t0, <2 x double> undef, <2 x i32> zeroinitializer
488 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1)
489 %res = fadd <2 x double> %2, %a0
490 ret <2 x double> %res
493 define <2 x double> @rangepd_maskz_128(<2 x double> %a0, <2 x double> %a1, ptr %pmask) {
494 ; ENABLE-LABEL: rangepd_maskz_128:
496 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
499 ; ENABLE-NEXT: #NO_APP
500 ; ENABLE-NEXT: kmovb (%rdi), %k1
501 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
502 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
503 ; ENABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
504 ; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
505 ; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
508 ; DISABLE-LABEL: rangepd_maskz_128:
510 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
513 ; DISABLE-NEXT: #NO_APP
514 ; DISABLE-NEXT: kmovb (%rdi), %k1
515 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
516 ; DISABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
517 ; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
518 ; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
521 %mask = load i8, ptr %pmask
522 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 %mask)
523 %3 = fadd <2 x double> %a0, %a1
524 %res = fadd <2 x double> %2, %3
525 ret <2 x double> %res
528 declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) nounwind readnone
530 define <4 x double> @rangepd_256(<4 x double> %a0, <4 x double> %a1) {
531 ; ENABLE-LABEL: rangepd_256:
533 ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
536 ; ENABLE-NEXT: #NO_APP
537 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
538 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
539 ; ENABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1
540 ; ENABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0
541 ; ENABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0
544 ; DISABLE-LABEL: rangepd_256:
546 ; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
549 ; DISABLE-NEXT: #NO_APP
550 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
551 ; DISABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1
552 ; DISABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0
553 ; DISABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0
555 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
556 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1)
557 %3 = fadd <4 x double> %a0, %a1
558 %res = fadd <4 x double> %2, %3
559 ret <4 x double> %res
562 define <4 x double> @rangepd_mem_256(<4 x double> %a0, ptr %p1) {
563 ; ENABLE-LABEL: rangepd_mem_256:
565 ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
568 ; ENABLE-NEXT: #NO_APP
569 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
570 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
571 ; ENABLE-NEXT: vrangepd $88, (%rdi), %ymm1, %ymm0
572 ; ENABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0
575 ; DISABLE-LABEL: rangepd_mem_256:
577 ; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
580 ; DISABLE-NEXT: #NO_APP
581 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
582 ; DISABLE-NEXT: vrangepd $88, (%rdi), %ymm1, %ymm0
583 ; DISABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0
585 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
586 %a1 = load <4 x double>, ptr %p1, align 64
587 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1)
588 %res = fadd <4 x double> %2, %a0
589 ret <4 x double> %res
592 define <4 x double> @rangepd_broadcast_256(<4 x double> %a0, ptr %p1) {
593 ; ENABLE-LABEL: rangepd_broadcast_256:
595 ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
598 ; ENABLE-NEXT: #NO_APP
599 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
600 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
601 ; ENABLE-NEXT: vrangepd $88, (%rdi){1to4}, %ymm1, %ymm0
602 ; ENABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0
605 ; DISABLE-LABEL: rangepd_broadcast_256:
607 ; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
610 ; DISABLE-NEXT: #NO_APP
611 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
612 ; DISABLE-NEXT: vrangepd $88, (%rdi){1to4}, %ymm1, %ymm0
613 ; DISABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0
615 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
616 %v1 = load double, ptr %p1, align 4
617 %t0 = insertelement <4 x double> undef, double %v1, i64 0
618 %a1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer
619 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1)
620 %res = fadd <4 x double> %2, %a0
621 ret <4 x double> %res
624 define <4 x double> @rangepd_maskz_256(<4 x double> %a0, <4 x double> %a1, ptr %pmask) {
625 ; ENABLE-LABEL: rangepd_maskz_256:
627 ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
630 ; ENABLE-NEXT: #NO_APP
631 ; ENABLE-NEXT: kmovb (%rdi), %k1
632 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
633 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
634 ; ENABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1 {%k1} {z}
635 ; ENABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0
636 ; ENABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0
639 ; DISABLE-LABEL: rangepd_maskz_256:
641 ; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
644 ; DISABLE-NEXT: #NO_APP
645 ; DISABLE-NEXT: kmovb (%rdi), %k1
646 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
647 ; DISABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1 {%k1} {z}
648 ; DISABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0
649 ; DISABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0
651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
652 %mask = load i8, ptr %pmask
653 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 %mask)
654 %3 = fadd <4 x double> %a0, %a1
655 %res = fadd <4 x double> %2, %3
656 ret <4 x double> %res
659 declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) nounwind readnone
661 define <8 x double> @rangepd_512(<8 x double> %a0, <8 x double> %a1) {
662 ; ENABLE-LABEL: rangepd_512:
664 ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
667 ; ENABLE-NEXT: #NO_APP
668 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
669 ; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1
670 ; ENABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1
671 ; ENABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0
672 ; ENABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0
675 ; DISABLE-LABEL: rangepd_512:
677 ; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
680 ; DISABLE-NEXT: #NO_APP
681 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
682 ; DISABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1
683 ; DISABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0
684 ; DISABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0
686 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
687 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4)
688 %3 = fadd <8 x double> %a0, %a1
689 %res = fadd <8 x double> %2, %3
690 ret <8 x double> %res
693 define <8 x double> @rangepd_mem_512(<8 x double> %a0, ptr %p1) {
694 ; ENABLE-LABEL: rangepd_mem_512:
696 ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
699 ; ENABLE-NEXT: #NO_APP
700 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
701 ; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0
702 ; ENABLE-NEXT: vrangepd $88, (%rdi), %zmm1, %zmm0
703 ; ENABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0
706 ; DISABLE-LABEL: rangepd_mem_512:
708 ; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
711 ; DISABLE-NEXT: #NO_APP
712 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
713 ; DISABLE-NEXT: vrangepd $88, (%rdi), %zmm1, %zmm0
714 ; DISABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0
716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
717 %a1 = load <8 x double>, ptr %p1, align 64
718 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4)
719 %res = fadd <8 x double> %2, %a0
720 ret <8 x double> %res
723 define <8 x double> @rangepd_broadcast_512(<8 x double> %a0, ptr %p1) {
724 ; ENABLE-LABEL: rangepd_broadcast_512:
726 ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
729 ; ENABLE-NEXT: #NO_APP
730 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
731 ; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0
732 ; ENABLE-NEXT: vrangepd $88, (%rdi){1to8}, %zmm1, %zmm0
733 ; ENABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0
736 ; DISABLE-LABEL: rangepd_broadcast_512:
738 ; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
741 ; DISABLE-NEXT: #NO_APP
742 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
743 ; DISABLE-NEXT: vrangepd $88, (%rdi){1to8}, %zmm1, %zmm0
744 ; DISABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0
746 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
747 %v1 = load double, ptr %p1, align 4
748 %t0 = insertelement <8 x double> undef, double %v1, i64 0
749 %a1 = shufflevector <8 x double> %t0, <8 x double> undef, <8 x i32> zeroinitializer
750 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4)
751 %res = fadd <8 x double> %2, %a0
752 ret <8 x double> %res
755 define <8 x double> @rangepd_maskz_512(<8 x double> %a0, <8 x double> %a1, ptr %pmask) {
756 ; ENABLE-LABEL: rangepd_maskz_512:
758 ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
761 ; ENABLE-NEXT: #NO_APP
762 ; ENABLE-NEXT: kmovb (%rdi), %k1
763 ; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
764 ; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1
765 ; ENABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
766 ; ENABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0
767 ; ENABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0
770 ; DISABLE-LABEL: rangepd_maskz_512:
772 ; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
775 ; DISABLE-NEXT: #NO_APP
776 ; DISABLE-NEXT: kmovb (%rdi), %k1
777 ; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
778 ; DISABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
779 ; DISABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0
780 ; DISABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0
782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
783 %mask = load i8, ptr %pmask
784 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 %mask, i32 4)
785 %3 = fadd <8 x double> %a0, %a1
786 %res = fadd <8 x double> %2, %3
787 ret <8 x double> %res
790 declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) nounwind readnone
792 define <4 x float> @rangess(<4 x float> %a0, <4 x float> %a1) {
793 ; ENABLE-LABEL: rangess:
795 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
798 ; ENABLE-NEXT: #NO_APP
799 ; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
800 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
801 ; ENABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1
802 ; ENABLE-NEXT: vaddps %xmm0, %xmm2, %xmm0
803 ; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
806 ; DISABLE-LABEL: rangess:
808 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
811 ; DISABLE-NEXT: #NO_APP
812 ; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
813 ; DISABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1
814 ; DISABLE-NEXT: vaddps %xmm0, %xmm2, %xmm0
815 ; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
817 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
818 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4, i32 4)
819 %3 = fadd <4 x float> %a1, %a0
820 %res = fadd <4 x float> %2, %3
824 define <4 x float> @rangess_mem(<4 x float> %a0, ptr %p1) {
825 ; ENABLE-LABEL: rangess_mem:
829 ; ENABLE-NEXT: #NO_APP
830 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
831 ; ENABLE-NEXT: vrangess $4, (%rdi), %xmm0, %xmm1
832 ; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
835 ; DISABLE-LABEL: rangess_mem:
839 ; DISABLE-NEXT: #NO_APP
840 ; DISABLE-NEXT: vrangess $4, (%rdi), %xmm0, %xmm1
841 ; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
843 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
844 %a1 = load <4 x float>, ptr %p1, align 64
845 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4, i32 4)
846 %res = fadd <4 x float> %2, %a0
850 define <4 x float> @rangess_maskz(<4 x float> %a0, <4 x float> %a1, ptr %pmask) {
851 ; ENABLE-LABEL: rangess_maskz:
853 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
856 ; ENABLE-NEXT: #NO_APP
857 ; ENABLE-NEXT: kmovb (%rdi), %k1
858 ; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
859 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
860 ; ENABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
861 ; ENABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0
862 ; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
865 ; DISABLE-LABEL: rangess_maskz:
867 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
870 ; DISABLE-NEXT: #NO_APP
871 ; DISABLE-NEXT: kmovb (%rdi), %k1
872 ; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
873 ; DISABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
874 ; DISABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0
875 ; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0
877 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
878 %mask = load i8, ptr %pmask
879 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 %mask, i32 4, i32 4)
880 %3 = fadd <4 x float> %a0, %a1
881 %res = fadd <4 x float> %2, %3
885 declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
887 define <2 x double> @rangesd(<2 x double> %a0, <2 x double> %a1) {
888 ; ENABLE-LABEL: rangesd:
890 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
893 ; ENABLE-NEXT: #NO_APP
894 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
895 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
896 ; ENABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1
897 ; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
898 ; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
901 ; DISABLE-LABEL: rangesd:
903 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
906 ; DISABLE-NEXT: #NO_APP
907 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
908 ; DISABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1
909 ; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
910 ; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
912 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
913 %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 -1, i32 4, i32 4)
914 %3 = fadd <2 x double> %a0, %a1
915 %res = fadd <2 x double> %2, %3
916 ret <2 x double> %res
919 define <2 x double> @rangesd_mem(<2 x double> %a0, ptr %p1) {
920 ; ENABLE-LABEL: rangesd_mem:
922 ; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
925 ; ENABLE-NEXT: #NO_APP
926 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
927 ; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0
928 ; ENABLE-NEXT: vrangesd $4, (%rdi), %xmm1, %xmm0
929 ; ENABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0
932 ; DISABLE-LABEL: rangesd_mem:
934 ; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
937 ; DISABLE-NEXT: #NO_APP
938 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
939 ; DISABLE-NEXT: vrangesd $4, (%rdi), %xmm1, %xmm0
940 ; DISABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0
942 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
943 %a1 = load <2 x double>, ptr %p1, align 64
944 %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 -1, i32 4, i32 4)
945 %res = fadd <2 x double> %2, %a0
946 ret <2 x double> %res
949 define <2 x double> @rangesd_maskz(<2 x double> %a0, <2 x double> %a1, ptr %pmask) {
950 ; ENABLE-LABEL: rangesd_maskz:
952 ; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
955 ; ENABLE-NEXT: #NO_APP
956 ; ENABLE-NEXT: kmovb (%rdi), %k1
957 ; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
958 ; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1
959 ; ENABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
960 ; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
961 ; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
964 ; DISABLE-LABEL: rangesd_maskz:
966 ; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
969 ; DISABLE-NEXT: #NO_APP
970 ; DISABLE-NEXT: kmovb (%rdi), %k1
971 ; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
972 ; DISABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
973 ; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0
974 ; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0
976 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
977 %mask = load i8, ptr %pmask
978 %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 %mask, i32 4, i32 4)
979 %3 = fadd <2 x double> %a0, %a1
980 %res = fadd <2 x double> %2, %3
981 ret <2 x double> %res
984 declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)