1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
4 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
6 ; Test regcall when receiving arguments of v64i1 type
7 define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) {
8 ; X32-LABEL: test_argv64i1:
10 ; X32-NEXT: addl %edx, %eax
11 ; X32-NEXT: adcl %edi, %ecx
12 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
13 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
14 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
15 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
16 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
17 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
18 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
19 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
20 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
21 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
22 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
23 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
24 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
25 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
26 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
27 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
28 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
29 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
30 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
31 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
32 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
33 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
36 ; WIN64-LABEL: test_argv64i1:
38 ; WIN64-NEXT: addq %rcx, %rax
39 ; WIN64-NEXT: addq %rdx, %rax
40 ; WIN64-NEXT: addq %rdi, %rax
41 ; WIN64-NEXT: leaq (%rsi,%r8), %rcx
42 ; WIN64-NEXT: addq %r9, %rcx
43 ; WIN64-NEXT: addq %rcx, %rax
44 ; WIN64-NEXT: leaq (%r10,%r11), %rcx
45 ; WIN64-NEXT: addq %r12, %rcx
46 ; WIN64-NEXT: addq %r14, %rcx
47 ; WIN64-NEXT: addq %rcx, %rax
48 ; WIN64-NEXT: addq %r15, %rax
49 ; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax
52 ; LINUXOSX64-LABEL: test_argv64i1:
53 ; LINUXOSX64: # %bb.0:
54 ; LINUXOSX64-NEXT: addq %rcx, %rax
55 ; LINUXOSX64-NEXT: addq %rdx, %rax
56 ; LINUXOSX64-NEXT: addq %rdi, %rax
57 ; LINUXOSX64-NEXT: leaq (%rsi,%r8), %rcx
58 ; LINUXOSX64-NEXT: addq %r9, %rcx
59 ; LINUXOSX64-NEXT: addq %rcx, %rax
60 ; LINUXOSX64-NEXT: leaq (%r12,%r13), %rcx
61 ; LINUXOSX64-NEXT: addq %r14, %rcx
62 ; LINUXOSX64-NEXT: addq %r15, %rcx
63 ; LINUXOSX64-NEXT: addq %rcx, %rax
64 ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
65 ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
66 ; LINUXOSX64-NEXT: retq
67 %y0 = bitcast <64 x i1> %x0 to i64
68 %y1 = bitcast <64 x i1> %x1 to i64
69 %y2 = bitcast <64 x i1> %x2 to i64
70 %y3 = bitcast <64 x i1> %x3 to i64
71 %y4 = bitcast <64 x i1> %x4 to i64
72 %y5 = bitcast <64 x i1> %x5 to i64
73 %y6 = bitcast <64 x i1> %x6 to i64
74 %y7 = bitcast <64 x i1> %x7 to i64
75 %y8 = bitcast <64 x i1> %x8 to i64
76 %y9 = bitcast <64 x i1> %x9 to i64
77 %y10 = bitcast <64 x i1> %x10 to i64
78 %y11 = bitcast <64 x i1> %x11 to i64
79 %y12 = bitcast <64 x i1> %x12 to i64
80 %add1 = add i64 %y0, %y1
81 %add2 = add i64 %add1, %y2
82 %add3 = add i64 %add2, %y3
83 %add4 = add i64 %add3, %y4
84 %add5 = add i64 %add4, %y5
85 %add6 = add i64 %add5, %y6
86 %add7 = add i64 %add6, %y7
87 %add8 = add i64 %add7, %y8
88 %add9 = add i64 %add8, %y9
89 %add10 = add i64 %add9, %y10
90 %add11 = add i64 %add10, %y11
91 %add12 = add i64 %add11, %y12
95 ; Test regcall when passing arguments of v64i1 type
96 define dso_local i64 @caller_argv64i1() #0 {
97 ; X32-LABEL: caller_argv64i1:
98 ; X32: # %bb.0: # %entry
99 ; X32-NEXT: pushl %edi
100 ; X32-NEXT: subl $88, %esp
101 ; X32-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
102 ; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp)
103 ; X32-NEXT: vmovups %zmm0, (%esp)
104 ; X32-NEXT: movl $1, {{[0-9]+}}(%esp)
105 ; X32-NEXT: movl $2, {{[0-9]+}}(%esp)
106 ; X32-NEXT: movl $2, %eax
107 ; X32-NEXT: movl $1, %ecx
108 ; X32-NEXT: movl $2, %edx
109 ; X32-NEXT: movl $1, %edi
110 ; X32-NEXT: vzeroupper
111 ; X32-NEXT: calll _test_argv64i1
112 ; X32-NEXT: movl %ecx, %edx
113 ; X32-NEXT: addl $88, %esp
114 ; X32-NEXT: popl %edi
117 ; WIN64-LABEL: caller_argv64i1:
118 ; WIN64: # %bb.0: # %entry
119 ; WIN64-NEXT: pushq %r15
120 ; WIN64-NEXT: .seh_pushreg %r15
121 ; WIN64-NEXT: pushq %r14
122 ; WIN64-NEXT: .seh_pushreg %r14
123 ; WIN64-NEXT: pushq %r12
124 ; WIN64-NEXT: .seh_pushreg %r12
125 ; WIN64-NEXT: pushq %rsi
126 ; WIN64-NEXT: .seh_pushreg %rsi
127 ; WIN64-NEXT: pushq %rdi
128 ; WIN64-NEXT: .seh_pushreg %rdi
129 ; WIN64-NEXT: subq $48, %rsp
130 ; WIN64-NEXT: .seh_stackalloc 48
131 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
132 ; WIN64-NEXT: .seh_savexmm %xmm7, 32
133 ; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
134 ; WIN64-NEXT: .seh_savexmm %xmm6, 16
135 ; WIN64-NEXT: .seh_endprologue
136 ; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
137 ; WIN64-NEXT: movq %rax, (%rsp)
138 ; WIN64-NEXT: movq %rax, %rcx
139 ; WIN64-NEXT: movq %rax, %rdx
140 ; WIN64-NEXT: movq %rax, %rdi
141 ; WIN64-NEXT: movq %rax, %r8
142 ; WIN64-NEXT: movq %rax, %r9
143 ; WIN64-NEXT: movq %rax, %r10
144 ; WIN64-NEXT: movq %rax, %r11
145 ; WIN64-NEXT: movq %rax, %r12
146 ; WIN64-NEXT: movq %rax, %r14
147 ; WIN64-NEXT: movq %rax, %r15
148 ; WIN64-NEXT: movq %rax, %rsi
149 ; WIN64-NEXT: callq test_argv64i1
150 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
151 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
152 ; WIN64-NEXT: addq $48, %rsp
153 ; WIN64-NEXT: popq %rdi
154 ; WIN64-NEXT: popq %rsi
155 ; WIN64-NEXT: popq %r12
156 ; WIN64-NEXT: popq %r14
157 ; WIN64-NEXT: popq %r15
159 ; WIN64-NEXT: .seh_endproc
161 ; LINUXOSX64-LABEL: caller_argv64i1:
162 ; LINUXOSX64: # %bb.0: # %entry
163 ; LINUXOSX64-NEXT: pushq %r15
164 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
165 ; LINUXOSX64-NEXT: pushq %r14
166 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
167 ; LINUXOSX64-NEXT: pushq %r13
168 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
169 ; LINUXOSX64-NEXT: pushq %r12
170 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40
171 ; LINUXOSX64-NEXT: pushq %rax
172 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
173 ; LINUXOSX64-NEXT: .cfi_offset %r12, -40
174 ; LINUXOSX64-NEXT: .cfi_offset %r13, -32
175 ; LINUXOSX64-NEXT: .cfi_offset %r14, -24
176 ; LINUXOSX64-NEXT: .cfi_offset %r15, -16
177 ; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
178 ; LINUXOSX64-NEXT: movq %rax, %rcx
179 ; LINUXOSX64-NEXT: movq %rax, %rdx
180 ; LINUXOSX64-NEXT: movq %rax, %rdi
181 ; LINUXOSX64-NEXT: movq %rax, %r8
182 ; LINUXOSX64-NEXT: movq %rax, %r9
183 ; LINUXOSX64-NEXT: movq %rax, %r12
184 ; LINUXOSX64-NEXT: movq %rax, %r13
185 ; LINUXOSX64-NEXT: movq %rax, %r14
186 ; LINUXOSX64-NEXT: movq %rax, %r15
187 ; LINUXOSX64-NEXT: movq %rax, %rsi
188 ; LINUXOSX64-NEXT: pushq %rax
189 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
190 ; LINUXOSX64-NEXT: pushq %rax
191 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
192 ; LINUXOSX64-NEXT: callq test_argv64i1
193 ; LINUXOSX64-NEXT: addq $24, %rsp
194 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24
195 ; LINUXOSX64-NEXT: popq %r12
196 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
197 ; LINUXOSX64-NEXT: popq %r13
198 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
199 ; LINUXOSX64-NEXT: popq %r14
200 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
201 ; LINUXOSX64-NEXT: popq %r15
202 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
203 ; LINUXOSX64-NEXT: retq
205 %v0 = bitcast i64 4294967298 to <64 x i1>
206 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
207 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
208 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
209 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
214 ; Test regcall when returning v64i1 type
215 define dso_local x86_regcallcc <64 x i1> @test_retv64i1() {
216 ; X32-LABEL: test_retv64i1:
218 ; X32-NEXT: movl $2, %eax
219 ; X32-NEXT: movl $1, %ecx
222 ; CHECK64-LABEL: test_retv64i1:
224 ; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
226 %a = bitcast i64 4294967298 to <64 x i1>
230 ; Test regcall when processing result of v64i1 type
231 define dso_local <64 x i1> @caller_retv64i1() #0 {
232 ; X32-LABEL: caller_retv64i1:
233 ; X32: # %bb.0: # %entry
234 ; X32-NEXT: calll _test_retv64i1
235 ; X32-NEXT: kmovd %eax, %k0
236 ; X32-NEXT: kmovd %ecx, %k1
237 ; X32-NEXT: kunpckdq %k0, %k1, %k0
238 ; X32-NEXT: vpmovm2b %k0, %zmm0
241 ; WIN64-LABEL: caller_retv64i1:
242 ; WIN64: # %bb.0: # %entry
243 ; WIN64-NEXT: pushq %rsi
244 ; WIN64-NEXT: .seh_pushreg %rsi
245 ; WIN64-NEXT: pushq %rdi
246 ; WIN64-NEXT: .seh_pushreg %rdi
247 ; WIN64-NEXT: subq $40, %rsp
248 ; WIN64-NEXT: .seh_stackalloc 40
249 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
250 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
251 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
252 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
253 ; WIN64-NEXT: .seh_endprologue
254 ; WIN64-NEXT: callq test_retv64i1
255 ; WIN64-NEXT: kmovq %rax, %k0
256 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
257 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
258 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
259 ; WIN64-NEXT: addq $40, %rsp
260 ; WIN64-NEXT: popq %rdi
261 ; WIN64-NEXT: popq %rsi
263 ; WIN64-NEXT: .seh_endproc
265 ; LINUXOSX64-LABEL: caller_retv64i1:
266 ; LINUXOSX64: # %bb.0: # %entry
267 ; LINUXOSX64-NEXT: pushq %rax
268 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
269 ; LINUXOSX64-NEXT: callq test_retv64i1
270 ; LINUXOSX64-NEXT: kmovq %rax, %k0
271 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0
272 ; LINUXOSX64-NEXT: popq %rax
273 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
274 ; LINUXOSX64-NEXT: retq
276 %call = call x86_regcallcc <64 x i1> @test_retv64i1()
280 ; Test regcall when receiving arguments of v32i1 type
281 declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
282 define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) {
283 ; X32-LABEL: test_argv32i1:
284 ; X32: # %bb.0: # %entry
285 ; X32-NEXT: subl $76, %esp
286 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
287 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
288 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
289 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
290 ; X32-NEXT: kmovd %edx, %k0
291 ; X32-NEXT: kmovd %ecx, %k1
292 ; X32-NEXT: kmovd %eax, %k2
293 ; X32-NEXT: vpmovm2b %k2, %zmm0
294 ; X32-NEXT: vpmovm2b %k1, %zmm1
295 ; X32-NEXT: vpmovm2b %k0, %zmm2
296 ; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
297 ; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
298 ; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
299 ; X32-NEXT: calll _test_argv32i1helper
300 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
301 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
302 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
303 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
304 ; X32-NEXT: addl $76, %esp
305 ; X32-NEXT: vzeroupper
308 ; WIN64-LABEL: test_argv32i1:
309 ; WIN64: # %bb.0: # %entry
310 ; WIN64-NEXT: pushq %rbp
311 ; WIN64-NEXT: .seh_pushreg %rbp
312 ; WIN64-NEXT: pushq %r11
313 ; WIN64-NEXT: .seh_pushreg %r11
314 ; WIN64-NEXT: pushq %r10
315 ; WIN64-NEXT: .seh_pushreg %r10
316 ; WIN64-NEXT: subq $128, %rsp
317 ; WIN64-NEXT: .seh_stackalloc 128
318 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
319 ; WIN64-NEXT: .seh_setframe %rbp, 128
320 ; WIN64-NEXT: .seh_endprologue
321 ; WIN64-NEXT: andq $-32, %rsp
322 ; WIN64-NEXT: kmovd %edx, %k0
323 ; WIN64-NEXT: kmovd %eax, %k1
324 ; WIN64-NEXT: kmovd %ecx, %k2
325 ; WIN64-NEXT: vpmovm2b %k2, %zmm0
326 ; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
327 ; WIN64-NEXT: vpmovm2b %k1, %zmm0
328 ; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
329 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
330 ; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
331 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
332 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
333 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
334 ; WIN64-NEXT: vzeroupper
335 ; WIN64-NEXT: callq test_argv32i1helper
337 ; WIN64-NEXT: movq %rbp, %rsp
338 ; WIN64-NEXT: popq %r10
339 ; WIN64-NEXT: popq %r11
340 ; WIN64-NEXT: popq %rbp
342 ; WIN64-NEXT: .seh_endproc
344 ; LINUXOSX64-LABEL: test_argv32i1:
345 ; LINUXOSX64: # %bb.0: # %entry
346 ; LINUXOSX64-NEXT: subq $136, %rsp
347 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
348 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
349 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
350 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
351 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
352 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
354 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
355 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
356 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
357 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
358 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
359 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
360 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
361 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
362 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
363 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
364 ; LINUXOSX64-NEXT: kmovd %edx, %k0
365 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
366 ; LINUXOSX64-NEXT: kmovd %eax, %k2
367 ; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
368 ; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
369 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
370 ; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
371 ; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
372 ; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
373 ; LINUXOSX64-NEXT: callq test_argv32i1helper@PLT
374 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
375 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
376 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
377 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
378 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
379 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
380 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
381 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
382 ; LINUXOSX64-NEXT: addq $136, %rsp
383 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
384 ; LINUXOSX64-NEXT: vzeroupper
385 ; LINUXOSX64-NEXT: retq
387 %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
391 ; Test regcall when passing arguments of v32i1 type
392 define dso_local i32 @caller_argv32i1() #0 {
393 ; X32-LABEL: caller_argv32i1:
394 ; X32: # %bb.0: # %entry
395 ; X32-NEXT: movl $1, %eax
396 ; X32-NEXT: movl $1, %ecx
397 ; X32-NEXT: movl $1, %edx
398 ; X32-NEXT: calll _test_argv32i1
401 ; WIN64-LABEL: caller_argv32i1:
402 ; WIN64: # %bb.0: # %entry
403 ; WIN64-NEXT: pushq %rsi
404 ; WIN64-NEXT: .seh_pushreg %rsi
405 ; WIN64-NEXT: pushq %rdi
406 ; WIN64-NEXT: .seh_pushreg %rdi
407 ; WIN64-NEXT: subq $40, %rsp
408 ; WIN64-NEXT: .seh_stackalloc 40
409 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
410 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
411 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
412 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
413 ; WIN64-NEXT: .seh_endprologue
414 ; WIN64-NEXT: movl $1, %eax
415 ; WIN64-NEXT: movl $1, %ecx
416 ; WIN64-NEXT: movl $1, %edx
417 ; WIN64-NEXT: callq test_argv32i1
418 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
419 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
420 ; WIN64-NEXT: addq $40, %rsp
421 ; WIN64-NEXT: popq %rdi
422 ; WIN64-NEXT: popq %rsi
424 ; WIN64-NEXT: .seh_endproc
426 ; LINUXOSX64-LABEL: caller_argv32i1:
427 ; LINUXOSX64: # %bb.0: # %entry
428 ; LINUXOSX64-NEXT: pushq %rax
429 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
430 ; LINUXOSX64-NEXT: movl $1, %eax
431 ; LINUXOSX64-NEXT: movl $1, %ecx
432 ; LINUXOSX64-NEXT: movl $1, %edx
433 ; LINUXOSX64-NEXT: callq test_argv32i1
434 ; LINUXOSX64-NEXT: popq %rcx
435 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
436 ; LINUXOSX64-NEXT: retq
438 %v0 = bitcast i32 1 to <32 x i1>
439 %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
443 ; Test regcall when returning v32i1 type
444 define dso_local x86_regcallcc <32 x i1> @test_retv32i1() {
445 ; X32-LABEL: test_retv32i1:
447 ; X32-NEXT: movl $1, %eax
450 ; CHECK64-LABEL: test_retv32i1:
452 ; CHECK64-NEXT: movl $1, %eax
454 %a = bitcast i32 1 to <32 x i1>
458 ; Test regcall when processing result of v32i1 type
459 define dso_local i32 @caller_retv32i1() #0 {
460 ; X32-LABEL: caller_retv32i1:
461 ; X32: # %bb.0: # %entry
462 ; X32-NEXT: calll _test_retv32i1
463 ; X32-NEXT: incl %eax
466 ; WIN64-LABEL: caller_retv32i1:
467 ; WIN64: # %bb.0: # %entry
468 ; WIN64-NEXT: pushq %rsi
469 ; WIN64-NEXT: .seh_pushreg %rsi
470 ; WIN64-NEXT: pushq %rdi
471 ; WIN64-NEXT: .seh_pushreg %rdi
472 ; WIN64-NEXT: subq $40, %rsp
473 ; WIN64-NEXT: .seh_stackalloc 40
474 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
475 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
476 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
477 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
478 ; WIN64-NEXT: .seh_endprologue
479 ; WIN64-NEXT: callq test_retv32i1
480 ; WIN64-NEXT: incl %eax
481 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
482 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
483 ; WIN64-NEXT: addq $40, %rsp
484 ; WIN64-NEXT: popq %rdi
485 ; WIN64-NEXT: popq %rsi
487 ; WIN64-NEXT: .seh_endproc
489 ; LINUXOSX64-LABEL: caller_retv32i1:
490 ; LINUXOSX64: # %bb.0: # %entry
491 ; LINUXOSX64-NEXT: pushq %rax
492 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
493 ; LINUXOSX64-NEXT: callq test_retv32i1
494 ; LINUXOSX64-NEXT: incl %eax
495 ; LINUXOSX64-NEXT: popq %rcx
496 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
497 ; LINUXOSX64-NEXT: retq
499 %call = call x86_regcallcc <32 x i1> @test_retv32i1()
500 %c = bitcast <32 x i1> %call to i32
505 ; Test regcall when receiving arguments of v16i1 type
506 declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
507 define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) {
508 ; X32-LABEL: test_argv16i1:
510 ; X32-NEXT: subl $76, %esp
511 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
512 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
513 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
514 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
515 ; X32-NEXT: kmovd %edx, %k0
516 ; X32-NEXT: kmovd %ecx, %k1
517 ; X32-NEXT: kmovd %eax, %k2
518 ; X32-NEXT: vpmovm2b %k2, %zmm0
519 ; X32-NEXT: vpmovm2b %k1, %zmm1
520 ; X32-NEXT: vpmovm2b %k0, %zmm2
521 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
522 ; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
523 ; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
524 ; X32-NEXT: vzeroupper
525 ; X32-NEXT: calll _test_argv16i1helper
526 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
527 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
528 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
529 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
530 ; X32-NEXT: addl $76, %esp
533 ; WIN64-LABEL: test_argv16i1:
535 ; WIN64-NEXT: pushq %r11
536 ; WIN64-NEXT: .seh_pushreg %r11
537 ; WIN64-NEXT: pushq %r10
538 ; WIN64-NEXT: .seh_pushreg %r10
539 ; WIN64-NEXT: subq $88, %rsp
540 ; WIN64-NEXT: .seh_stackalloc 88
541 ; WIN64-NEXT: .seh_endprologue
542 ; WIN64-NEXT: kmovd %edx, %k0
543 ; WIN64-NEXT: kmovd %eax, %k1
544 ; WIN64-NEXT: kmovd %ecx, %k2
545 ; WIN64-NEXT: vpmovm2b %k2, %zmm0
546 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
547 ; WIN64-NEXT: vpmovm2b %k1, %zmm0
548 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
549 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
550 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
551 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
552 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
553 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
554 ; WIN64-NEXT: vzeroupper
555 ; WIN64-NEXT: callq test_argv16i1helper
557 ; WIN64-NEXT: addq $88, %rsp
558 ; WIN64-NEXT: popq %r10
559 ; WIN64-NEXT: popq %r11
561 ; WIN64-NEXT: .seh_endproc
563 ; LINUXOSX64-LABEL: test_argv16i1:
564 ; LINUXOSX64: # %bb.0:
565 ; LINUXOSX64-NEXT: subq $136, %rsp
566 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
567 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
568 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
569 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
570 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
571 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
572 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
573 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
574 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
575 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
576 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
577 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
578 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
579 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
580 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
581 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
582 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
583 ; LINUXOSX64-NEXT: kmovd %edx, %k0
584 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
585 ; LINUXOSX64-NEXT: kmovd %eax, %k2
586 ; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
587 ; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
588 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
589 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
590 ; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
591 ; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
592 ; LINUXOSX64-NEXT: vzeroupper
593 ; LINUXOSX64-NEXT: callq test_argv16i1helper@PLT
594 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
595 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
596 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
597 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
598 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
599 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
600 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
601 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
602 ; LINUXOSX64-NEXT: addq $136, %rsp
603 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
604 ; LINUXOSX64-NEXT: retq
605 %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
609 ; Test regcall when passing arguments of v16i1 type
610 define dso_local i16 @caller_argv16i1() #0 {
611 ; X32-LABEL: caller_argv16i1:
612 ; X32: # %bb.0: # %entry
613 ; X32-NEXT: movl $1, %eax
614 ; X32-NEXT: movl $1, %ecx
615 ; X32-NEXT: movl $1, %edx
616 ; X32-NEXT: calll _test_argv16i1
619 ; WIN64-LABEL: caller_argv16i1:
620 ; WIN64: # %bb.0: # %entry
621 ; WIN64-NEXT: pushq %rsi
622 ; WIN64-NEXT: .seh_pushreg %rsi
623 ; WIN64-NEXT: pushq %rdi
624 ; WIN64-NEXT: .seh_pushreg %rdi
625 ; WIN64-NEXT: subq $40, %rsp
626 ; WIN64-NEXT: .seh_stackalloc 40
627 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
628 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
629 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
630 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
631 ; WIN64-NEXT: .seh_endprologue
632 ; WIN64-NEXT: movl $1, %eax
633 ; WIN64-NEXT: movl $1, %ecx
634 ; WIN64-NEXT: movl $1, %edx
635 ; WIN64-NEXT: callq test_argv16i1
636 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
637 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
638 ; WIN64-NEXT: addq $40, %rsp
639 ; WIN64-NEXT: popq %rdi
640 ; WIN64-NEXT: popq %rsi
642 ; WIN64-NEXT: .seh_endproc
644 ; LINUXOSX64-LABEL: caller_argv16i1:
645 ; LINUXOSX64: # %bb.0: # %entry
646 ; LINUXOSX64-NEXT: pushq %rax
647 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
648 ; LINUXOSX64-NEXT: movl $1, %eax
649 ; LINUXOSX64-NEXT: movl $1, %ecx
650 ; LINUXOSX64-NEXT: movl $1, %edx
651 ; LINUXOSX64-NEXT: callq test_argv16i1
652 ; LINUXOSX64-NEXT: popq %rcx
653 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
654 ; LINUXOSX64-NEXT: retq
656 %v0 = bitcast i16 1 to <16 x i1>
657 %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
661 ; Test regcall when returning v16i1 type
662 define dso_local x86_regcallcc <16 x i1> @test_retv16i1() {
663 ; X32-LABEL: test_retv16i1:
665 ; X32-NEXT: movw $1, %ax
668 ; CHECK64-LABEL: test_retv16i1:
670 ; CHECK64-NEXT: movw $1, %ax
672 %a = bitcast i16 1 to <16 x i1>
676 ; Test regcall when processing result of v16i1 type
677 define dso_local i16 @caller_retv16i1() #0 {
678 ; X32-LABEL: caller_retv16i1:
679 ; X32: # %bb.0: # %entry
680 ; X32-NEXT: calll _test_retv16i1
681 ; X32-NEXT: # kill: def $ax killed $ax def $eax
682 ; X32-NEXT: incl %eax
683 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
686 ; WIN64-LABEL: caller_retv16i1:
687 ; WIN64: # %bb.0: # %entry
688 ; WIN64-NEXT: pushq %rsi
689 ; WIN64-NEXT: .seh_pushreg %rsi
690 ; WIN64-NEXT: pushq %rdi
691 ; WIN64-NEXT: .seh_pushreg %rdi
692 ; WIN64-NEXT: subq $40, %rsp
693 ; WIN64-NEXT: .seh_stackalloc 40
694 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
695 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
696 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
697 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
698 ; WIN64-NEXT: .seh_endprologue
699 ; WIN64-NEXT: callq test_retv16i1
700 ; WIN64-NEXT: # kill: def $ax killed $ax def $eax
701 ; WIN64-NEXT: incl %eax
702 ; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
703 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
704 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
705 ; WIN64-NEXT: addq $40, %rsp
706 ; WIN64-NEXT: popq %rdi
707 ; WIN64-NEXT: popq %rsi
709 ; WIN64-NEXT: .seh_endproc
711 ; LINUXOSX64-LABEL: caller_retv16i1:
712 ; LINUXOSX64: # %bb.0: # %entry
713 ; LINUXOSX64-NEXT: pushq %rax
714 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
715 ; LINUXOSX64-NEXT: callq test_retv16i1
716 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
717 ; LINUXOSX64-NEXT: incl %eax
718 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
719 ; LINUXOSX64-NEXT: popq %rcx
720 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
721 ; LINUXOSX64-NEXT: retq
723 %call = call x86_regcallcc <16 x i1> @test_retv16i1()
724 %c = bitcast <16 x i1> %call to i16
729 ; Test regcall when receiving arguments of v8i1 type
730 declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
731 define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) {
732 ; X32-LABEL: test_argv8i1:
734 ; X32-NEXT: subl $76, %esp
735 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
736 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
737 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
738 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
739 ; X32-NEXT: kmovd %edx, %k0
740 ; X32-NEXT: kmovd %ecx, %k1
741 ; X32-NEXT: kmovd %eax, %k2
742 ; X32-NEXT: vpmovm2w %k2, %zmm0
743 ; X32-NEXT: vpmovm2w %k1, %zmm1
744 ; X32-NEXT: vpmovm2w %k0, %zmm2
745 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
746 ; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
747 ; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
748 ; X32-NEXT: vzeroupper
749 ; X32-NEXT: calll _test_argv8i1helper
750 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
751 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
752 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
753 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
754 ; X32-NEXT: addl $76, %esp
757 ; WIN64-LABEL: test_argv8i1:
759 ; WIN64-NEXT: pushq %r11
760 ; WIN64-NEXT: .seh_pushreg %r11
761 ; WIN64-NEXT: pushq %r10
762 ; WIN64-NEXT: .seh_pushreg %r10
763 ; WIN64-NEXT: subq $88, %rsp
764 ; WIN64-NEXT: .seh_stackalloc 88
765 ; WIN64-NEXT: .seh_endprologue
766 ; WIN64-NEXT: kmovd %edx, %k0
767 ; WIN64-NEXT: kmovd %eax, %k1
768 ; WIN64-NEXT: kmovd %ecx, %k2
769 ; WIN64-NEXT: vpmovm2w %k2, %zmm0
770 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
771 ; WIN64-NEXT: vpmovm2w %k1, %zmm0
772 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
773 ; WIN64-NEXT: vpmovm2w %k0, %zmm0
774 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
775 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
776 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
777 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
778 ; WIN64-NEXT: vzeroupper
779 ; WIN64-NEXT: callq test_argv8i1helper
781 ; WIN64-NEXT: addq $88, %rsp
782 ; WIN64-NEXT: popq %r10
783 ; WIN64-NEXT: popq %r11
785 ; WIN64-NEXT: .seh_endproc
787 ; LINUXOSX64-LABEL: test_argv8i1:
788 ; LINUXOSX64: # %bb.0:
789 ; LINUXOSX64-NEXT: subq $136, %rsp
790 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
791 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
792 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
793 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
794 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
795 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
796 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
797 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
798 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
799 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
800 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
801 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
802 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
803 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
804 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
805 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
806 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
807 ; LINUXOSX64-NEXT: kmovd %edx, %k0
808 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
809 ; LINUXOSX64-NEXT: kmovd %eax, %k2
810 ; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0
811 ; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1
812 ; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2
813 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
814 ; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
815 ; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
816 ; LINUXOSX64-NEXT: vzeroupper
817 ; LINUXOSX64-NEXT: callq test_argv8i1helper@PLT
818 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
819 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
820 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
821 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
822 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
823 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
824 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
825 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
826 ; LINUXOSX64-NEXT: addq $136, %rsp
827 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
828 ; LINUXOSX64-NEXT: retq
829 %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
833 ; Test regcall when passing arguments of v8i1 type
834 define dso_local i8 @caller_argv8i1() #0 {
835 ; X32-LABEL: caller_argv8i1:
836 ; X32: # %bb.0: # %entry
837 ; X32-NEXT: movl $1, %eax
838 ; X32-NEXT: movl $1, %ecx
839 ; X32-NEXT: movl $1, %edx
840 ; X32-NEXT: calll _test_argv8i1
843 ; WIN64-LABEL: caller_argv8i1:
844 ; WIN64: # %bb.0: # %entry
845 ; WIN64-NEXT: pushq %rsi
846 ; WIN64-NEXT: .seh_pushreg %rsi
847 ; WIN64-NEXT: pushq %rdi
848 ; WIN64-NEXT: .seh_pushreg %rdi
849 ; WIN64-NEXT: subq $40, %rsp
850 ; WIN64-NEXT: .seh_stackalloc 40
851 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
852 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
853 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
854 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
855 ; WIN64-NEXT: .seh_endprologue
856 ; WIN64-NEXT: movl $1, %eax
857 ; WIN64-NEXT: movl $1, %ecx
858 ; WIN64-NEXT: movl $1, %edx
859 ; WIN64-NEXT: callq test_argv8i1
860 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
861 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
862 ; WIN64-NEXT: addq $40, %rsp
863 ; WIN64-NEXT: popq %rdi
864 ; WIN64-NEXT: popq %rsi
866 ; WIN64-NEXT: .seh_endproc
868 ; LINUXOSX64-LABEL: caller_argv8i1:
869 ; LINUXOSX64: # %bb.0: # %entry
870 ; LINUXOSX64-NEXT: pushq %rax
871 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
872 ; LINUXOSX64-NEXT: movl $1, %eax
873 ; LINUXOSX64-NEXT: movl $1, %ecx
874 ; LINUXOSX64-NEXT: movl $1, %edx
875 ; LINUXOSX64-NEXT: callq test_argv8i1
876 ; LINUXOSX64-NEXT: popq %rcx
877 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
878 ; LINUXOSX64-NEXT: retq
880 %v0 = bitcast i8 1 to <8 x i1>
881 %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
885 ; Test regcall when returning v8i1 type
886 define dso_local x86_regcallcc <8 x i1> @test_retv8i1() {
887 ; X32-LABEL: test_retv8i1:
889 ; X32-NEXT: movb $1, %al
892 ; CHECK64-LABEL: test_retv8i1:
894 ; CHECK64-NEXT: movb $1, %al
896 %a = bitcast i8 1 to <8 x i1>
900 ; Test regcall when processing result of v8i1 type
901 define dso_local <8 x i1> @caller_retv8i1() #0 {
902 ; X32-LABEL: caller_retv8i1:
903 ; X32: # %bb.0: # %entry
904 ; X32-NEXT: calll _test_retv8i1
905 ; X32-NEXT: # kill: def $al killed $al def $eax
906 ; X32-NEXT: kmovd %eax, %k0
907 ; X32-NEXT: vpmovm2w %k0, %zmm0
908 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
909 ; X32-NEXT: vzeroupper
912 ; WIN64-LABEL: caller_retv8i1:
913 ; WIN64: # %bb.0: # %entry
914 ; WIN64-NEXT: pushq %rsi
915 ; WIN64-NEXT: .seh_pushreg %rsi
916 ; WIN64-NEXT: pushq %rdi
917 ; WIN64-NEXT: .seh_pushreg %rdi
918 ; WIN64-NEXT: subq $40, %rsp
919 ; WIN64-NEXT: .seh_stackalloc 40
920 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
921 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
922 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
923 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
924 ; WIN64-NEXT: .seh_endprologue
925 ; WIN64-NEXT: callq test_retv8i1
926 ; WIN64-NEXT: # kill: def $al killed $al def $eax
927 ; WIN64-NEXT: kmovd %eax, %k0
928 ; WIN64-NEXT: vpmovm2w %k0, %zmm0
929 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
930 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
931 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
932 ; WIN64-NEXT: addq $40, %rsp
933 ; WIN64-NEXT: popq %rdi
934 ; WIN64-NEXT: popq %rsi
935 ; WIN64-NEXT: vzeroupper
937 ; WIN64-NEXT: .seh_endproc
939 ; LINUXOSX64-LABEL: caller_retv8i1:
940 ; LINUXOSX64: # %bb.0: # %entry
941 ; LINUXOSX64-NEXT: pushq %rax
942 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
943 ; LINUXOSX64-NEXT: callq test_retv8i1
944 ; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax
945 ; LINUXOSX64-NEXT: kmovd %eax, %k0
946 ; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0
947 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
948 ; LINUXOSX64-NEXT: popq %rax
949 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
950 ; LINUXOSX64-NEXT: vzeroupper
951 ; LINUXOSX64-NEXT: retq
953 %call = call x86_regcallcc <8 x i1> @test_retv8i1()